diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /erts/emulator/drivers/common | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'erts/emulator/drivers/common')
-rw-r--r-- | erts/emulator/drivers/common/efile_drv.c | 3138 | ||||
-rw-r--r-- | erts/emulator/drivers/common/erl_efile.h | 152 | ||||
-rw-r--r-- | erts/emulator/drivers/common/gzio.c | 822 | ||||
-rw-r--r-- | erts/emulator/drivers/common/gzio.h | 27 | ||||
-rw-r--r-- | erts/emulator/drivers/common/gzio_zutil.h | 82 | ||||
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 9949 | ||||
-rw-r--r-- | erts/emulator/drivers/common/ram_file_drv.c | 692 | ||||
-rw-r--r-- | erts/emulator/drivers/common/zlib_drv.c | 650 |
8 files changed, 15512 insertions, 0 deletions
diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c new file mode 100644 index 0000000000..95510a16b2 --- /dev/null +++ b/erts/emulator/drivers/common/efile_drv.c @@ -0,0 +1,3138 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1996-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * Purpose: Provides file and directory operations. + * + * This file is generic, and does the work of decoding the commands + * and encoding the responses. System-specific functions are found in + * the unix_efile.c and win_efile.c files. + */ + +/* Operations */ + +#define FILE_OPEN 1 /* Essential for startup */ +#define FILE_READ 2 +#define FILE_LSEEK 3 +#define FILE_WRITE 4 +#define FILE_FSTAT 5 /* Essential for startup */ +#define FILE_PWD 6 /* Essential for startup */ +#define FILE_READDIR 7 /* Essential for startup */ +#define FILE_CHDIR 8 +#define FILE_FSYNC 9 +#define FILE_MKDIR 10 +#define FILE_DELETE 11 +#define FILE_RENAME 12 +#define FILE_RMDIR 13 +#define FILE_TRUNCATE 14 +#define FILE_READ_FILE 15 /* Essential for startup */ +#define FILE_WRITE_INFO 16 +#define FILE_LSTAT 19 +#define FILE_READLINK 20 +#define FILE_LINK 21 +#define FILE_SYMLINK 22 +#define FILE_CLOSE 23 +#define FILE_PWRITEV 24 +#define FILE_PREADV 25 +#define FILE_SETOPT 26 +#define FILE_IPREAD 27 +#define FILE_ALTNAME 28 +#define FILE_READ_LINE 29 + +/* Return codes */ + +#define FILE_RESP_OK 0 +#define FILE_RESP_ERROR 1 +#define FILE_RESP_DATA 2 +#define FILE_RESP_NUMBER 3 +#define FILE_RESP_INFO 4 +#define FILE_RESP_NUMERR 5 +#define FILE_RESP_LDATA 6 +#define FILE_RESP_N2DATA 7 +#define FILE_RESP_EOF 8 + +/* Options */ + +#define FILE_OPT_DELAYED_WRITE 0 +#define FILE_OPT_READ_AHEAD 1 + +/* IPREAD variants */ + +#define IPREAD_S32BU_P32BU 0 + +/* Limits */ + +#define FILE_SEGMENT_READ (256*1024) +#define FILE_SEGMENT_WRITE (256*1024) + +/* Internal */ + +/* Set to 1 to test having read_ahead implicitly for read_line */ +#define ALWAYS_READ_LINE_AHEAD 0 + + +/* Must not be possible to get from malloc()! */ +#define FILE_FD_INVALID ((Sint)(-1)) + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdlib.h> +#include "sys.h" +#include "erl_driver.h" +#include "erl_efile.h" +#include "erl_threads.h" +#include "zlib.h" +#include "gzio.h" +#include <ctype.h> +#include <sys/types.h> + +extern void erl_exit(int n, char *fmt, _DOTS_); + +static ErlDrvSysInfo sys_info; + + +/*#define TRACE 1*/ +#ifdef TRACE +# define TRACE_C(c) (putchar(c)) +# define TRACE_S(s) (fputs((s), stdout)) +# define TRACE_F(args) (printf args) +#else +# define TRACE_C(c) ((void)(0)) +# define TRACE_S(s) ((void)(0)) +# define TRACE_F(args) ((void)(0)) +#endif + + +#ifdef USE_THREADS +#define IF_THRDS if (sys_info.async_threads > 0) +#ifdef HARDDEBUG /* HARDDEBUG in io.c is expected too */ +#define TRACE_DRIVER fprintf(stderr, "Efile: ") +#else +#define TRACE_DRIVER +#endif +#define MUTEX_INIT(m, p) do { IF_THRDS { TRACE_DRIVER; (m = driver_pdl_create(p)); } } while (0) +#define MUTEX_LOCK(m) do { IF_THRDS { TRACE_DRIVER; driver_pdl_lock(m); } } while (0) +#define MUTEX_UNLOCK(m) do { IF_THRDS { TRACE_DRIVER; driver_pdl_unlock(m); } } while (0) +#else +#define MUTEX_INIT(m, p) +#define MUTEX_LOCK(m) +#define MUTEX_UNLOCK(m) +#endif + + + +#if 0 +/* Experimental, for forcing all file operations to use the same thread. */ +static unsigned file_fixed_key = 1; +#define KEY(desc) (&file_fixed_key) +#else +#define KEY(desc) (&(desc)->key) +#endif + + + +#if MAXPATHLEN >= BUFSIZ +#define RESBUFSIZE MAXPATHLEN+1 +#else +#define RESBUFSIZE BUFSIZ +#endif + +#define GET_TIME(i, b) \ + (i).year = get_int32((b) + 0 * 4); \ + (i).month = get_int32((b) + 1 * 4); \ + (i).day = get_int32((b) + 2 * 4); \ + (i).hour = get_int32((b) + 3 * 4); \ + (i).minute = get_int32((b) + 4 * 4); \ + (i).second = get_int32((b) + 5 * 4) + +#define PUT_TIME(i, b) \ + put_int32((i).year, (b) + 0 * 4); \ + put_int32((i).month, (b) + 1 * 4); \ + put_int32((i).day, (b) + 2 * 4); \ + put_int32((i).hour, (b) + 3 * 4); \ + put_int32((i).minute,(b) + 4 * 4); \ + put_int32((i).second,(b) + 5 * 4) + + +#if ALWAYS_READ_LINE_AHEAD +#define DEFAULT_LINEBUF_SIZE 2048 +#else +#define DEFAULT_LINEBUF_SIZE 512 /* Small, it's usually discarded anyway */ +#endif + +typedef unsigned char uchar; + +static ErlDrvData file_start(ErlDrvPort port, char* command); +static int file_init(void); +static void file_stop(ErlDrvData); +static void file_output(ErlDrvData, char* buf, int len); +static int file_control(ErlDrvData, unsigned int command, + char* buf, int len, char **rbuf, int rlen); +static void file_timeout(ErlDrvData); +static void file_outputv(ErlDrvData, ErlIOVec*); +static void file_async_ready(ErlDrvData, ErlDrvThreadData); +static void file_flush(ErlDrvData); + + + +enum e_timer {timer_idle, timer_again, timer_write}; + +struct t_data; + +typedef struct { + Sint fd; + ErlDrvPort port; + unsigned key; /* Async queue key */ + unsigned flags; /* Original flags from FILE_OPEN. */ + void (*invoke)(void *); + struct t_data *d; + void (*free)(void *); + struct t_data *cq_head; /* Queue of incoming commands */ + struct t_data *cq_tail; /* -""- */ + enum e_timer timer_state; + size_t read_bufsize; + ErlDrvBinary *read_binp; + size_t read_offset; + size_t read_size; + size_t write_bufsize; + unsigned long write_delay; + int write_error; + Efile_error write_errInfo; + ErlDrvPDL q_mtx; /* Mutex for the driver queue, known by the emulator. Also used for + mutual exclusion when accessing field(s) below. */ + size_t write_buffered; +} file_descriptor; + + +static int reply_error(file_descriptor*, Efile_error* errInfo); + +struct erl_drv_entry efile_driver_entry = { + file_init, + file_start, + file_stop, + file_output, + NULL, + NULL, + "efile", + NULL, + NULL, + file_control, + file_timeout, + file_outputv, + file_async_ready, + file_flush, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL +}; + + + +static int thread_short_circuit; + +#define DRIVER_ASYNC(level, desc, f_invoke, data, f_free) \ +if (thread_short_circuit >= (level)) { \ + (*(f_invoke))(data); \ + file_async_ready((ErlDrvData)(desc), (data)); \ +} else { \ + driver_async((desc)->port, KEY(desc), (f_invoke), (data), (f_free)); \ +} + + + +struct t_pbuf_spec { + Sint64 offset; + size_t size; +}; + +struct t_pwritev { + ErlDrvPort port; + ErlDrvPDL q_mtx; + size_t size; + size_t free_size; + unsigned cnt; + unsigned n; + struct t_pbuf_spec specs[1]; +}; + +struct t_preadv { + ErlIOVec eiov; + unsigned n; + unsigned cnt; + size_t size; + Sint64 offsets[1]; +}; + +#define READDIR_BUFSIZE (8*1024) +#if READDIR_BUFSIZE < (2*MAXPATHLEN) +#undef READDIR_BUFSIZE +#define READDIR_BUFSIZE (2*MAXPATHLEN) +#endif + +struct t_readdir_buf { + struct t_readdir_buf *next; + char buf[READDIR_BUFSIZE]; +}; + +struct t_data +{ + struct t_data *next; + int command; + int level; + void (*invoke)(void *); + void (*free)(void *); + int again; + int reply; + int result_ok; + Efile_error errInfo; + int flags; + Sint fd; + /**/ + Efile_info info; + EFILE_DIR_HANDLE dir_handle; /* Handle to open directory. */ + ErlDrvBinary *bin; + int drive; + size_t n; + /*off_t offset;*/ + /*size_t bytesRead; Bytes read from the file. */ + /**/ + union { + struct { + Sint64 offset; + int origin; + Sint64 location; + } lseek; + struct { + ErlDrvPort port; + ErlDrvPDL q_mtx; + size_t size; + size_t free_size; + size_t reply_size; + } writev; + struct t_pwritev pwritev; + struct t_preadv preadv; + struct { + ErlDrvBinary *binp; + size_t bin_offset; + size_t bin_size; + size_t size; + } read; + struct { + ErlDrvBinary *binp; /* in - out */ + size_t read_offset; /* in - out */ + size_t read_size; /* in - out */ + size_t nl_pos; /* out */ + short nl_skip; /* out, 0 or 1 */ +#if !ALWAYS_READ_LINE_AHEAD + short read_ahead; /* in, bool */ +#endif + } read_line; + struct { + ErlDrvBinary *binp; + int size; + int offset; + char name[1]; + } read_file; + struct { + struct t_readdir_buf *first_buf; + struct t_readdir_buf *last_buf; + } read_dir; + } c; + char b[1]; +}; + + +#define EF_ALLOC(S) driver_alloc((S)) +#define EF_REALLOC(P, S) driver_realloc((P), (S)) +#define EF_SAFE_ALLOC(S) ef_safe_alloc((S)) +#define EF_SAFE_REALLOC(P, S) ef_safe_realloc((P), (S)) +#define EF_FREE(P) do { if((P)) driver_free((P)); } while(0) + +static void *ef_safe_alloc(Uint s) +{ + void *p = EF_ALLOC(s); + if (!p) erl_exit(1, "efile drv: Can't allocate %d bytes of memory\n", s); + return p; +} + +#if 0 /* Currently not used */ + +static void *ef_safe_realloc(void *op, Uint s) +{ + void *p = EF_REALLOC(op, s); + if (!p) erl_exit(1, "efile drv: Can't reallocate %d bytes of memory\n", s); + return p; +} + +#endif + +/********************************************************************* + * ErlIOVec manipulation functions. + */ + +/* char EV_CHAR(ErlIOVec *ev, int p, int q) */ +#define EV_CHAR_P(ev, p, q) \ + (((char *)(ev)->iov[(q)].iov_base) + (p)) + +/* int EV_GET_CHAR(ErlIOVec *ev, char *p, int *pp, int *qp) */ +#define EV_GET_CHAR(ev, p, pp, qp) \ + (*(pp)+1 <= (ev)->iov[*(qp)].iov_len \ + ? (*(p) = *EV_CHAR_P(ev, *(pp), *(qp)), \ + *(pp) = ( *(pp)+1 < (ev)->iov[*(qp)].iov_len \ + ? *(pp)+1 \ + : ((*(qp))++, 0)), \ + !0) \ + : 0) + +/* Uint32 EV_UINT32(ErlIOVec *ev, int p, int q)*/ +#define EV_UINT32(ev, p, q) \ + ((Uint32) *(((unsigned char *)(ev)->iov[(q)].iov_base) + (p))) + +/* int EV_GET_UINT32(ErlIOVec *ev, Uint32 *p, int *pp, int *qp) */ +#define EV_GET_UINT32(ev, p, pp, qp) \ + (*(pp)+4 <= (ev)->iov[*(qp)].iov_len \ + ? (*(p) = (EV_UINT32(ev, *(pp), *(qp)) << 24) \ + | (EV_UINT32(ev, *(pp)+1, *(qp)) << 16) \ + | (EV_UINT32(ev, *(pp)+2, *(qp)) << 8) \ + | (EV_UINT32(ev, *(pp)+3, *(qp))), \ + *(pp) = ( *(pp)+4 < (ev)->iov[*(qp)].iov_len \ + ? *(pp)+4 \ + : ((*(qp))++, 0)), \ + !0) \ + : 0) + +/* Uint64 EV_UINT64(ErlIOVec *ev, int p, int q)*/ +#define EV_UINT64(ev, p, q) \ + ((Uint64) *(((unsigned char *)(ev)->iov[(q)].iov_base) + (p))) + +/* int EV_GET_UINT64(ErlIOVec *ev, Uint32 *p, int *pp, int *qp) */ +#define EV_GET_UINT64(ev, p, pp, qp) \ + (*(pp)+8 <= (ev)->iov[*(qp)].iov_len \ + ? (*(p) = (EV_UINT64(ev, *(pp), *(qp)) << 56) \ + | (EV_UINT64(ev, *(pp)+1, *(qp)) << 48) \ + | (EV_UINT64(ev, *(pp)+2, *(qp)) << 40) \ + | (EV_UINT64(ev, *(pp)+3, *(qp)) << 32) \ + | (EV_UINT64(ev, *(pp)+4, *(qp)) << 24) \ + | (EV_UINT64(ev, *(pp)+5, *(qp)) << 16) \ + | (EV_UINT64(ev, *(pp)+6, *(qp)) << 8) \ + | (EV_UINT64(ev, *(pp)+7, *(qp))), \ + *(pp) = ( *(pp)+8 < (ev)->iov[*(qp)].iov_len \ + ? *(pp)+8 \ + : ((*(qp))++, 0)), \ + !0) \ + : 0) + + + +#if 0 + +static void ev_clear(ErlIOVec *ev) { + ASSERT(ev); + ev->size = 0; + ev->vsize = 0; + ev->iov = NULL; + ev->binv = NULL; +} + +/* Assumes that ->iov and ->binv were allocated with sys_alloc(). + */ +static void ev_free(ErlIOVec *ev) { + if (! ev) { + return; + } + if (ev->vsize > 0) { + int i; + ASSERT(ev->iov); + ASSERT(ev->binv); + for (i = 0; i < ev->vsize; i++) { + if (ev->binv[i]) { + driver_free_binary(ev->binv[i]); + } + } + EF_FREE(ev->iov); + EF_FREE(ev->binv); + } +} + +/* Copy the contents from source to dest. + * Data in binaries is not copied, just the pointers; + * and refc is incremented. + */ +static ErlIOVec *ev_copy(ErlIOVec *dest, ErlIOVec *source) { + int *ip; + ASSERT(dest); + ASSERT(source); + if (source->vsize == 0) { + /* Empty source */ + ev_clear(dest); + return dest; + } + /* Allocate ->iov and ->binv */ + dest->iov = EF_ALLOC(sizeof(*dest->iov) * source->vsize); + if (! dest->iov) { + return NULL; + } + dest->binv = EF_ALLOC(sizeof(*dest->binv) * source->vsize); + if (! dest->binv) { + EF_FREE(dest->iov); + return NULL; + } + dest->size = source->size; + /* Copy one vector element at the time. + * Use *ip as an alias for dest->vsize to improve readabiliy. + * Keep dest consistent in every iteration by using + * dest->vsize==*ip as loop variable. + */ + for (ip = &dest->vsize, *ip = 0; *ip < source->vsize; (*ip)++) { + if (source->iov[*ip].iov_len == 0) { + /* Empty vector element */ + dest->iov[*ip].iov_len = 0; + dest->iov[*ip].iov_base = NULL; + dest->binv[*ip] = NULL; + } else { + /* Non empty vector element */ + if (source->binv[*ip]) { + /* Contents in binary - copy pointers and increment refc */ + dest->iov[*ip] = source->iov[*ip]; + dest->binv[*ip] = source->binv[*ip]; + driver_binary_inc_refc(source->binv[*ip]); + } else { + /* Contents not in binary - allocate new binary and copy data */ + if (! (dest->binv[*ip] = + driver_alloc_binary(source->iov[*ip].iov_len))) { + goto failed; + } + sys_memcpy(dest->binv[*ip]->orig_bytes, + source->iov[*ip].iov_base, + source->iov[*ip].iov_len); + dest->iov[*ip].iov_base = dest->binv[*ip]->orig_bytes; + dest->iov[*ip].iov_len = source->iov[*ip].iov_len; + } + } + } + return dest; + failed: + ev_free(dest); + return NULL; +} + +#endif + + + +/********************************************************************* + * Command queue functions + */ + +static void cq_enq(file_descriptor *desc, struct t_data *d) { + ASSERT(d); + if (desc->cq_head) { + ASSERT(desc->cq_tail); + ASSERT(!desc->cq_tail->next); + desc->cq_tail = desc->cq_tail->next = d; + } else { + ASSERT(desc->cq_tail == NULL); + desc->cq_head = desc->cq_tail = d; + } + d->next = NULL; +} + +static struct t_data *cq_deq(file_descriptor *desc) { + struct t_data *d = desc->cq_head; + ASSERT(d || (!d && !desc->cq_tail)); + if (d) { + ASSERT(!d->next || (d->next && desc->cq_tail != d)); + if ((desc->cq_head = d->next) == NULL) { + ASSERT(desc->cq_tail == d); + desc->cq_tail = NULL; + } + } + return d; +} + + + +/********************************************************************* + * Driver entry point -> init + */ +static int +file_init(void) +{ + char buf[21]; /* enough to hold any 64-bit integer */ + size_t bufsz = sizeof(buf); + thread_short_circuit = (erl_drv_getenv("ERL_EFILE_THREAD_SHORT_CIRCUIT", + buf, + &bufsz) == 0 + ? atoi(buf) + : 0); + driver_system_info(&sys_info, sizeof(ErlDrvSysInfo)); + return 0; +} + +/********************************************************************* + * Driver entry point -> start + */ +static ErlDrvData +file_start(ErlDrvPort port, char* command) + +{ + file_descriptor* desc; + + if ((desc = (file_descriptor*) EF_ALLOC(sizeof(file_descriptor))) + == NULL) { + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + desc->fd = FILE_FD_INVALID; + desc->port = port; + desc->key = (unsigned) (Uint) port; + desc->flags = 0; + desc->invoke = NULL; + desc->d = NULL; + desc->free = NULL; + desc->cq_head = NULL; + desc->cq_tail = NULL; + desc->timer_state = timer_idle; + desc->read_bufsize = 0; + desc->read_binp = NULL; + desc->read_offset = 0; + desc->read_size = 0; + desc->write_delay = 0L; + desc->write_bufsize = 0; + desc->write_error = 0; + MUTEX_INIT(desc->q_mtx, port); /* Refc is one, referenced by emulator now */ + desc->write_buffered = 0; + return (ErlDrvData) desc; +} + +static void free_data(void *data) +{ + EF_FREE(data); +} + +static void do_close(int flags, Sint fd) { + if (flags & EFILE_COMPRESSED) { + erts_gzclose((gzFile)(fd)); + } else { + efile_closefile((int) fd); + } +} + +static void invoke_close(void *data) +{ + struct t_data *d = (struct t_data *) data; + d->again = 0; + do_close(d->flags, d->fd); +} + +/********************************************************************* + * Driver entry point -> stop + */ +static void +file_stop(ErlDrvData e) +{ + file_descriptor* desc = (file_descriptor*)e; + + TRACE_C('p'); + + if (desc->fd != FILE_FD_INVALID) { + do_close(desc->flags, desc->fd); + desc->fd = FILE_FD_INVALID; + desc->flags = 0; + } + if (desc->read_binp) { + driver_free_binary(desc->read_binp); + } + EF_FREE(desc); +} + + +/* + * Sends back an error reply to Erlang. + */ + +static void reply_posix_error(file_descriptor *desc, int posix_errno) { + char response[256]; /* Response buffer. */ + char* s; + char* t; + + /* + * Contents of buffer sent back: + * + * +-----------------------------------------+ + * | FILE_RESP_ERROR | Posix error id string | + * +-----------------------------------------+ + */ + + TRACE_C('E'); + + response[0] = FILE_RESP_ERROR; + for (s = erl_errno_id(posix_errno), t = response+1; *s; s++, t++) + *t = tolower(*s); + driver_output2(desc->port, response, t-response, NULL, 0); +} + +static void reply_Uint_posix_error(file_descriptor *desc, Uint num, + int posix_errno) { + char response[256]; /* Response buffer. */ + char* s; + char* t; + + /* + * Contents of buffer sent back: + * + * +----------------------------------------------------------------------+ + * | FILE_RESP_NUMERR | 64-bit number (big-endian) | Posix error id string | + * +----------------------------------------------------------------------+ + */ + + TRACE_C('N'); + + response[0] = FILE_RESP_NUMERR; +#if SIZEOF_VOID_P == 4 + put_int32(0, response+1); +#else + put_int32(num>>32, response+1); +#endif + put_int32((Uint32)num, response+1+4); + for (s = erl_errno_id(posix_errno), t = response+1+4+4; *s; s++, t++) + *t = tolower(*s); + driver_output2(desc->port, response, t-response, NULL, 0); +} + + + +static int reply_error(file_descriptor *desc, + Efile_error *errInfo) /* The error codes. */ +{ + reply_posix_error(desc, errInfo->posix_errno); + return 0; +} + +static int reply_Uint_error(file_descriptor *desc, Uint num, + Efile_error *errInfo) /* The error codes. */ +{ + reply_Uint_posix_error(desc, num, errInfo->posix_errno); + return 0; +} + +static int reply_ok(file_descriptor *desc) { + char c = FILE_RESP_OK; + + driver_output2(desc->port, &c, 1, NULL, 0); + return 0; +} + +static int reply(file_descriptor *desc, int ok, Efile_error *errInfo) { + if (!ok) { + reply_error(desc, errInfo); + } else { + TRACE_C('K'); + reply_ok(desc); + } + return 0; +} + +static int reply_Uint(file_descriptor *desc, Uint result) { + char tmp[1+4+4]; + + /* + * Contents of buffer sent back: + * + * +-----------------------------------------------+ + * | FILE_RESP_NUMBER | 64-bit number (big-endian) | + * +-----------------------------------------------+ + */ + + TRACE_C('R'); + + tmp[0] = FILE_RESP_NUMBER; +#if SIZEOF_VOID_P == 4 + put_int32(0, tmp+1); +#else + put_int32(result>>32, tmp+1); +#endif + put_int32((Uint32)result, tmp+1+4); + driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0); + return 0; +} + +static int reply_Sint64(file_descriptor *desc, Sint64 result) { + char tmp[1+4+4]; + + /* + * Contents of buffer sent back: + * + * +-----------------------------------------------+ + * | FILE_RESP_NUMBER | 64-bit number (big-endian) | + * +-----------------------------------------------+ + */ + + TRACE_C('R'); + + tmp[0] = FILE_RESP_NUMBER; + put_int64(result, tmp+1); + driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0); + return 0; +} + +#if 0 +static void reply_again(file_descriptor *desc) { + char tmp[1]; + tmp[0] = FILE_RESP_AGAIN; + driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0); +} +#endif + +static void reply_ev(file_descriptor *desc, char response, ErlIOVec *ev) { + char tmp[1]; + /* Data arriving at the Erlang process: + * [Response, Binary0, Binary1, .... | BinaryN-1] + */ + tmp[0] = response; + driver_outputv(desc->port, tmp, sizeof(tmp), ev, 0); +} + +static void reply_data(file_descriptor *desc, + ErlDrvBinary *binp, size_t offset, size_t len) { + char header[1+4+4]; + /* Data arriving at the Erlang process: + * [?FILE_RESP_DATA, 64-bit length (big-endian) | Data] + */ + header[0] = FILE_RESP_DATA; +#if SIZEOF_SIZE_T == 4 + put_int32(0, header+1); +#else + put_int32(len>>32, header+1); +#endif + put_int32((Uint32)len, header+1+4); + driver_output_binary(desc->port, header, sizeof(header), + binp, offset, len); +} + +static void reply_buf(file_descriptor *desc, char *buf, size_t len) { + char header[1+4+4]; + /* Data arriving at the Erlang process: + * [?FILE_RESP_DATA, 64-bit length (big-endian) | Data] + */ + header[0] = FILE_RESP_DATA; +#if SIZEOF_SIZE_T == 4 + put_int32(0, header+1); +#else + put_int32(len>>32, header+1); +#endif + put_int32((Uint32)len, header+1+4); + driver_output2(desc->port, header, sizeof(header), buf, len); +} + +static int reply_eof(file_descriptor *desc) { + char c = FILE_RESP_EOF; + + driver_output2(desc->port, &c, 1, NULL, 0); + return 0; +} + + + +static void invoke_name(void *data, int (*f)(Efile_error *, char *)) +{ + struct t_data *d = (struct t_data *) data; + char *name = (char *) d->b; + + d->again = 0; + d->result_ok = (*f)(&d->errInfo, name); +} + +static void invoke_mkdir(void *data) +{ + invoke_name(data, efile_mkdir); +} + +static void invoke_rmdir(void *data) +{ + invoke_name(data, efile_rmdir); +} + +static void invoke_delete_file(void *data) +{ + invoke_name(data, efile_delete_file); +} + +static void invoke_chdir(void *data) +{ + invoke_name(data, efile_chdir); +} + +static void invoke_fsync(void *data) +{ + struct t_data *d = (struct t_data *) data; + int fd = (int) d->fd; + + d->again = 0; + d->result_ok = efile_fsync(&d->errInfo, fd); +} + +static void invoke_truncate(void *data) +{ + struct t_data *d = (struct t_data *) data; + int fd = (int) d->fd; + + d->again = 0; + d->result_ok = efile_truncate_file(&d->errInfo, &fd, d->flags); +} + +static void invoke_read(void *data) +{ + struct t_data *d = (struct t_data *) data; + int status, segment; + size_t size, read_size; + + segment = d->again && d->c.read.bin_size >= 2*FILE_SEGMENT_READ; + if (segment) { + size = FILE_SEGMENT_READ; + } else { + size = d->c.read.bin_size; + } + read_size = size; + if (d->flags & EFILE_COMPRESSED) { + read_size = erts_gzread((gzFile)d->fd, + d->c.read.binp->orig_bytes + d->c.read.bin_offset, + size); + status = (read_size != -1); + if (!status) { + d->errInfo.posix_errno = EIO; + } + } else { + status = efile_read(&d->errInfo, d->flags, (int) d->fd, + d->c.read.binp->orig_bytes + d->c.read.bin_offset, + size, + &read_size); + } + if ( (d->result_ok = status)) { + ASSERT(read_size <= size); + d->c.read.bin_offset += read_size; + if (read_size < size || !segment) { + d->c.read.bin_size = 0; + d->again = 0; + } else { + d->c.read.bin_size -= read_size; + } + } else { + d->again = 0; + } +} + +static void free_read(void *data) +{ + struct t_data *d = (struct t_data *) data; + + driver_free_binary(d->c.read.binp); + EF_FREE(d); +} + +static void invoke_read_line(void *data) +{ + struct t_data *d = (struct t_data *) data; + int status; + size_t read_size; + int local_loop = (d->again == 0); + + do { + size_t size = (d->c.read_line.binp)->orig_size - + d->c.read_line.read_offset - d->c.read_line.read_size; + if (size == 0) { + /* Need more place */ + size_t need = (d->c.read_line.read_size >= DEFAULT_LINEBUF_SIZE) ? + d->c.read_line.read_size + DEFAULT_LINEBUF_SIZE : DEFAULT_LINEBUF_SIZE; + ErlDrvBinary *newbin = driver_alloc_binary(need); + if (newbin == NULL) { + d->result_ok = 0; + d->errInfo.posix_errno = ENOMEM; + d->again = 0; + break; + } + memcpy(newbin->orig_bytes, (d->c.read_line.binp)->orig_bytes + d->c.read_line.read_offset, + d->c.read_line.read_size); + driver_free_binary(d->c.read_line.binp); + d->c.read_line.binp = newbin; + d->c.read_line.read_offset = 0; + size = need - d->c.read_line.read_size; + } + if (d->flags & EFILE_COMPRESSED) { + read_size = erts_gzread((gzFile)d->fd, + d->c.read_line.binp->orig_bytes + + d->c.read_line.read_offset + d->c.read_line.read_size, + size); + status = (read_size != -1); + if (!status) { + d->errInfo.posix_errno = EIO; + } + } else { + status = efile_read(&d->errInfo, d->flags, (int) d->fd, + d->c.read_line.binp->orig_bytes + + d->c.read_line.read_offset + d->c.read_line.read_size, + size, + &read_size); + } + if ( (d->result_ok = status)) { + void *nl_ptr = memchr((d->c.read_line.binp)->orig_bytes + + d->c.read_line.read_offset + d->c.read_line.read_size,'\n',read_size); + ASSERT(read_size <= size); + d->c.read_line.read_size += read_size; + if (nl_ptr != NULL) { + /* If found, we're done */ + d->c.read_line.nl_pos = ((char *) nl_ptr) - + ((char *) ((d->c.read_line.binp)->orig_bytes)) + 1; + if (d->c.read_line.nl_pos > 1 && + *(((char *) nl_ptr) - 1) == '\r') { + --d->c.read_line.nl_pos; + *(((char *) nl_ptr) - 1) = '\n'; + d->c.read_line.nl_skip = 1; + } else { + d->c.read_line.nl_skip = 0; + } + d->again = 0; +#if !ALWAYS_READ_LINE_AHEAD + if (!(d->c.read_line.read_ahead)) { + /* Ouch! Undo buffering... */ + size_t too_much = d->c.read_line.read_size - d->c.read_line.nl_skip - + (d->c.read_line.nl_pos - d->c.read_line.read_offset); + d->c.read_line.read_size -= too_much; + ASSERT(d->c.read_line.read_size >= 0); + if (d->flags & EFILE_COMPRESSED) { + Sint64 location = erts_gzseek((gzFile)d->fd, + -((Sint64) too_much), EFILE_SEEK_CUR); + if (location == -1) { + d->result_ok = 0; + d->errInfo.posix_errno = errno; + } + } else { + Sint64 location; + d->result_ok = efile_seek(&d->errInfo, (int) d->fd, + -((Sint64) too_much), EFILE_SEEK_CUR, + &location); + } + } +#endif + break; + } else if (read_size == 0) { + d->c.read_line.nl_pos = + d->c.read_line.read_offset + d->c.read_line.read_size; + d->c.read_line.nl_skip = 0; + d->again = 0; + break; + } + } else { + d->again = 0; + break; + } + } while (local_loop); +} + +static void free_read_line(void *data) +{ + struct t_data *d = (struct t_data *) data; + + driver_free_binary(d->c.read_line.binp); + EF_FREE(d); +} + +static void invoke_read_file(void *data) +{ + struct t_data *d = (struct t_data *) data; + size_t read_size; + int chop; + + if (! d->c.read_file.binp) { /* First invocation only */ + int fd; + Sint64 size; + + if (! (d->result_ok = + efile_openfile(&d->errInfo, d->c.read_file.name, + EFILE_MODE_READ, &fd, &size))) { + goto done; + } + d->fd = fd; + d->c.read_file.size = (int) size; + if (size < 0 || size != d->c.read_file.size || + ! (d->c.read_file.binp = + driver_alloc_binary(d->c.read_file.size))) { + d->result_ok = 0; + d->errInfo.posix_errno = ENOMEM; + goto close; + } + d->c.read_file.offset = 0; + } + /* Invariant: d->c.read_file.size >= d->c.read_file.offset */ + + read_size = (size_t) (d->c.read_file.size - d->c.read_file.offset); + if (! read_size) goto close; + chop = d->again && read_size >= FILE_SEGMENT_READ*2; + if (chop) read_size = FILE_SEGMENT_READ; + d->result_ok = + efile_read(&d->errInfo, + EFILE_MODE_READ, + (int) d->fd, + d->c.read_file.binp->orig_bytes + d->c.read_file.offset, + read_size, + &read_size); + if (d->result_ok) { + d->c.read_file.offset += read_size; + if (chop) return; /* again */ + } + close: + efile_closefile((int) d->fd); + done: + d->again = 0; +} + +static void free_read_file(void *data) +{ + struct t_data *d = (struct t_data *) data; + + if (d->c.read_file.binp) driver_free_binary(d->c.read_file.binp); + EF_FREE(d); +} + + + +static void invoke_preadv(void *data) +{ + struct t_data *d = (struct t_data *) data; + struct t_preadv *c = &d->c.preadv; + ErlIOVec *ev = &c->eiov; + size_t bytes_read_so_far = 0; + unsigned char *p = (unsigned char *)ev->iov[0].iov_base + 4+4+8*c->cnt; + + while (c->cnt < c->n) { + size_t read_size = ev->iov[1 + c->cnt].iov_len - c->size; + size_t bytes_read = 0; + int chop = d->again + && bytes_read_so_far + read_size >= 2*FILE_SEGMENT_READ; + if (chop) { + ASSERT(bytes_read_so_far < FILE_SEGMENT_READ); + read_size = FILE_SEGMENT_READ + FILE_SEGMENT_READ/2 + - bytes_read_so_far; + } + if ( (d->result_ok + = efile_pread(&d->errInfo, + (int) d->fd, + c->offsets[c->cnt] + c->size, + ev->iov[1 + c->cnt].iov_base + c->size, + read_size, + &bytes_read))) { + bytes_read_so_far += bytes_read; + if (chop && bytes_read == read_size) { + c->size += bytes_read; + return; + } + ASSERT(bytes_read <= read_size); + ev->iov[1 + c->cnt].iov_len = bytes_read + c->size; + ev->size += bytes_read + c->size; + put_int64(bytes_read + c->size, p); p += 8; + c->size = 0; + c->cnt++; + if (d->again + && bytes_read_so_far >= FILE_SEGMENT_READ + && c->cnt < c->n) { + return; + } + } else { + /* In case of a read error, ev->size will not be correct, + * which does not matter since no read data is returned + * to Erlang. + */ + break; + } + } + d->again = 0; +} + +static void free_preadv(void *data) { + struct t_data *d = data; + int i; + ErlIOVec *ev = &d->c.preadv.eiov; + + for(i = 0; i < ev->vsize; i++) { + driver_free_binary(ev->binv[i]); + } + EF_FREE(d); +} + +static void invoke_ipread(void *data) +{ + struct t_data *d = data; + struct t_preadv *c = &d->c.preadv; + ErlIOVec *ev = &c->eiov; + size_t bytes_read = 0; + char buf[2*sizeof(Uint32)]; + Uint32 offset, size; + + /* Read indirection header */ + if (! efile_pread(&d->errInfo, (int) d->fd, c->offsets[0], + buf, sizeof(buf), &bytes_read)) { + goto error; + } + if (bytes_read != sizeof(buf)) goto done; /* eof */ + size = get_int32(buf); + offset = get_int32(buf+4); + if (size > c->size) goto done; /* eof */ + c->n = 1; + c->cnt = 0; + c->size = 0; + c->offsets[0] = offset; + if (! (ev->binv[0] = driver_alloc_binary(3*8))) { + d->errInfo.posix_errno = ENOMEM; + goto error; + } + ev->vsize = 1; + ev->iov[0].iov_len = 3*8; + ev->iov[0].iov_base = ev->binv[0]->orig_bytes; + ev->size = ev->iov[0].iov_len; + put_int64(offset, ev->iov[0].iov_base); + put_int64(size, ((char *)ev->iov[0].iov_base) + 2*8); + if (size == 0) { + put_int64(size, ((char *)ev->iov[0].iov_base) + 8); + goto done; + } + if (! (ev->binv[1] = driver_alloc_binary(size))) { + d->errInfo.posix_errno = ENOMEM; + goto error; + } + ev->vsize = 2; + ev->iov[1].iov_len = size; + ev->iov[1].iov_base = ev->binv[1]->orig_bytes; + /* Read data block */ + d->invoke = invoke_preadv; + invoke_preadv(data); + return; + error: + d->result_ok = 0; + d->again = 0; + return; + done: + d->result_ok = !0; + d->again = 0; +} + +/* invoke_writev and invoke_pwritev are the only thread functions that + * access non-thread data i.e the port queue and a mutex in the port + * structure that is used to lock the port queue. + * + * The port will normally not be terminated until the port queue is + * empty, but if the port is killed, i.e., exit(Port, kill) is called, + * it will terminate regardless of the port queue state. When the + * port is invalid driver_peekq() returns NULL and set the size to -1, + * and driver_sizeq() returns -1. + */ + +static void invoke_writev(void *data) { + struct t_data *d = (struct t_data *) data; + SysIOVec *iov0; + SysIOVec *iov; + int iovlen; + int iovcnt; + size_t size; + size_t p; + int segment; + + segment = d->again && d->c.writev.size >= 2*FILE_SEGMENT_WRITE; + if (segment) { + size = FILE_SEGMENT_WRITE; + } else { + size = d->c.writev.size; + } + + /* Copy the io vector to avoid locking the port que while writing */ + MUTEX_LOCK(d->c.writev.q_mtx); /* Lock before accessing the port queue */ + iov0 = driver_peekq(d->c.writev.port, &iovlen); + + /* Calculate iovcnt */ + for (p = 0, iovcnt = 0; + p < size && iovcnt < iovlen; + p += iov0[iovcnt++].iov_len) + ; + iov = EF_ALLOC(sizeof(SysIOVec)*iovcnt); + memcpy(iov,iov0,iovcnt*sizeof(SysIOVec)); + MUTEX_UNLOCK(d->c.writev.q_mtx); + /* Let go of lock until we deque from original vector */ + + if (iovlen > 0) { + ASSERT(iov[iovcnt-1].iov_len > p - size); + iov[iovcnt-1].iov_len -= p - size; + if (d->flags & EFILE_COMPRESSED) { + int i, status = 1; + for (i = 0; i < iovcnt; i++) { + if (iov[i].iov_base && iov[i].iov_len > 0) { + /* Just in case, I do not know what gzwrite does + * with errno. + */ + errno = EINVAL; + if (! (status = + erts_gzwrite((gzFile)d->fd, + iov[i].iov_base, + iov[i].iov_len)) == iov[i].iov_len) { + d->errInfo.posix_errno = + d->errInfo.os_errno = errno; /* XXX Correct? */ + break; + } + } + } + d->result_ok = status; + } else { + d->result_ok = efile_writev(&d->errInfo, + d->flags, (int) d->fd, + iov, iovcnt, size); + } + } else if (iovlen == 0) { + d->result_ok = 1; + } + else { /* Port has terminated */ + d->result_ok = 0; + d->errInfo.posix_errno = d->errInfo.os_errno = EINVAL; + } + EF_FREE(iov); + + d->c.writev.free_size = size; + d->c.writev.size -= size; + if (! d->result_ok) { + d->again = 0; + } else { + if (! segment) { + d->again = 0; + } + TRACE_F(("w%lu", (unsigned long)size)); + + } +} + +static void free_writev(void *data) { + struct t_data *d = data; + MUTEX_LOCK(d->c.writev.q_mtx); + driver_deq(d->c.writev.port, d->c.writev.size + d->c.writev.free_size); + MUTEX_UNLOCK(d->c.writev.q_mtx); + EF_FREE(d); +} + +static void invoke_pwd(void *data) +{ + struct t_data *d = (struct t_data *) data; + + d->again = 0; + d->result_ok = efile_getdcwd(&d->errInfo,d->drive, d->b+1, + RESBUFSIZE-1); +} + +static void invoke_readlink(void *data) +{ + struct t_data *d = (struct t_data *) data; + char resbuf[RESBUFSIZE]; /* Result buffer. */ + + d->again = 0; + d->result_ok = efile_readlink(&d->errInfo, d->b, resbuf+1, + RESBUFSIZE-1); + if (d->result_ok != 0) + strcpy((char *) d->b + 1, resbuf+1); +} + +static void invoke_altname(void *data) +{ + struct t_data *d = (struct t_data *) data; + char resbuf[RESBUFSIZE]; /* Result buffer. */ + + d->again = 0; + d->result_ok = efile_altname(&d->errInfo, d->b, resbuf+1, + RESBUFSIZE-1); + if (d->result_ok != 0) + strcpy((char *) d->b + 1, resbuf+1); +} + +static void invoke_pwritev(void *data) { + struct t_data *d = (struct t_data *) data; + SysIOVec *iov0; + SysIOVec *iov; + int iovlen; + int iovcnt; + struct t_pwritev *c = &d->c.pwritev; + size_t p; + int segment; + size_t size, write_size; + + segment = d->again && c->size >= 2*FILE_SEGMENT_WRITE; + if (segment) { + size = FILE_SEGMENT_WRITE; + } else { + size = c->size; + } + d->result_ok = !0; + p = 0; + /* Lock the queue just for a while, we don't want it locked during write */ + MUTEX_LOCK(c->q_mtx); + iov0 = driver_peekq(c->port, &iovlen); + iov = EF_ALLOC(sizeof(SysIOVec)*iovlen); + memcpy(iov,iov0,sizeof(SysIOVec)*iovlen); + MUTEX_UNLOCK(c->q_mtx); + + if (iovlen < 0) + goto error; /* Port terminated */ + for (iovcnt = 0, c->free_size = 0; + c->cnt < c->n && iovcnt < iovlen && c->free_size < size; + c->cnt++) { + int chop; + write_size = c->specs[c->cnt].size; + if (iov[iovcnt].iov_len - p < write_size) { + /* Mismatch between pos/size spec and what is queued */ + d->errInfo.posix_errno = EINVAL; + d->result_ok = 0; + d->again = 0; + goto done; + } + chop = segment && c->free_size + write_size >= 2*FILE_SEGMENT_WRITE; + if (chop) { + ASSERT(c->free_size < FILE_SEGMENT_WRITE); + write_size = FILE_SEGMENT_WRITE + FILE_SEGMENT_WRITE/2 + - c->free_size; + } + d->result_ok = efile_pwrite(&d->errInfo, (int) d->fd, + iov[iovcnt].iov_base + p, + write_size, + c->specs[c->cnt].offset); + if (! d->result_ok) { + d->again = 0; + goto done; + } + c->free_size += write_size; + c->size -= write_size; + if (chop) { + c->specs[c->cnt].offset += write_size; + c->specs[c->cnt].size -= write_size; + /* Schedule out (d->again != 0) */ + goto done; + } + /* Move forward in buffer */ + p += write_size; + ASSERT(iov[iovcnt].iov_len >= p); + if (iov[iovcnt].iov_len == p) { + /* Move to next iov[], we trust that it is not a + * zero length vector, and thereby depend on that + * such are not queued. + */ + iovcnt++; p = 0; + } + } + if (! segment) { + if (c->cnt != c->n) { + /* Mismatch between number of + * pos/size specs vs number of queued buffers . + */ + error: + d->errInfo.posix_errno = EINVAL; + d->result_ok = 0; + d->again = 0; + } else { + ASSERT(c->free_size == size); + d->again = 0; + } + } + done: + EF_FREE(iov); /* Free our copy of the vector, nothing to restore */ +} + +static void free_pwritev(void *data) { + struct t_data *d = data; + + MUTEX_LOCK(d->c.writev.q_mtx); + driver_deq(d->c.pwritev.port, d->c.pwritev.free_size + d->c.pwritev.size); + MUTEX_UNLOCK(d->c.writev.q_mtx); + EF_FREE(d); +} + +static void invoke_flstat(void *data) +{ + struct t_data *d = (struct t_data *) data; + + d->again = 0; + d->result_ok = efile_fileinfo(&d->errInfo, &d->info, + d->b, d->command == FILE_LSTAT); +} + +static void invoke_link(void *data) +{ + struct t_data *d = (struct t_data *) data; + char *name = d->b; + char *new_name; + + d->again = 0; + new_name = name+strlen(name)+1; + d->result_ok = efile_link(&d->errInfo, name, new_name); +} + +static void invoke_symlink(void *data) +{ + struct t_data *d = (struct t_data *) data; + char *name = d->b; + char *new_name; + + d->again = 0; + new_name = name+strlen(name)+1; + d->result_ok = efile_symlink(&d->errInfo, name, new_name); +} + +static void invoke_rename(void *data) +{ + struct t_data *d = (struct t_data *) data; + char *name = d->b; + char *new_name; + + d->again = 0; + new_name = name+strlen(name)+1; + d->result_ok = efile_rename(&d->errInfo, name, new_name); +} + +static void invoke_write_info(void *data) +{ + struct t_data *d = (struct t_data *) data; + + d->again = 0; + d->result_ok = efile_write_info(&d->errInfo, &d->info, d->b); +} + +static void invoke_lseek(void *data) +{ + struct t_data *d = (struct t_data *) data; + int status; + + d->again = 0; + if (d->flags & EFILE_COMPRESSED) { + int offset = (int) d->c.lseek.offset; + + if (offset != d->c.lseek.offset) { + d->errInfo.posix_errno = EINVAL; + status = 0; + } else { + d->c.lseek.location = erts_gzseek((gzFile)d->fd, + offset, d->c.lseek.origin); + if (d->c.lseek.location == -1) { + d->errInfo.posix_errno = errno; + status = 0; + } else { + status = 1; + } + } + } else { + status = efile_seek(&d->errInfo, (int) d->fd, + d->c.lseek.offset, d->c.lseek.origin, + &d->c.lseek.location); + } + d->result_ok = status; +} + +static void invoke_readdir(void *data) +{ + struct t_data *d = (struct t_data *) data; + int s; + char *p = NULL; + int buf_sz = 0; + + d->again = 0; + d->errInfo.posix_errno = 0; + + while (1) { + char *str; + if (buf_sz < (4 /* sz */ + 1 /* cmd */ + MAXPATHLEN + 1 /* '\0' */)) { + struct t_readdir_buf *b; + if (p) { + put_int32(0, p); /* EOB */ + } + b = EF_SAFE_ALLOC(sizeof(struct t_readdir_buf)); + b->next = NULL; + if (d->c.read_dir.last_buf) + d->c.read_dir.last_buf->next = b; + else + d->c.read_dir.first_buf = b; + d->c.read_dir.last_buf = b; + p = &b->buf[0]; + buf_sz = READDIR_BUFSIZE - 4/* EOB */; + } + + p[4] = FILE_RESP_OK; + buf_sz -= 4 + 1; + str = p + 4 + 1; + ASSERT(buf_sz >= MAXPATHLEN + 1); + s = efile_readdir(&d->errInfo, d->b, &d->dir_handle, str, buf_sz); + + if (s) { + int str_sz = strlen(str); + int sz = str_sz + 1; + put_int32(sz, p); + p += 4 + sz; + buf_sz -= str_sz; + } + else { + put_int32(1, p); + p += 4 + 1; + put_int32(0, p); /* EOB */ + d->result_ok = (d->errInfo.posix_errno == 0); + break; + } + } +} + +static void invoke_open(void *data) +{ + struct t_data *d = (struct t_data *) data; + + int status = 1; /* Status of open call. */ + + d->again = 0; + if ((d->flags & EFILE_COMPRESSED) == 0) { + int fd; + status = efile_openfile(&d->errInfo, d->b, d->flags, &fd, NULL); + d->fd = fd; + } else { + char* mode = NULL; + + if (((d->flags & (EFILE_MODE_READ_WRITE)) == EFILE_MODE_READ_WRITE) || + (d->flags & EFILE_MODE_APPEND)) { + status = 0; + d->errInfo.posix_errno = EINVAL; + } else { + status = efile_may_openfile(&d->errInfo, d->b); + if (status || (d->errInfo.posix_errno != EISDIR)) { + mode = (d->flags & EFILE_MODE_READ) ? "rb" : "wb"; + d->fd = (Sint) erts_gzopen(d->b, mode); + if ((gzFile)d->fd) { + status = 1; + } else { + if (errno == 0) { + errno = ENOMEM; + } + d->errInfo.posix_errno = errno; + status = 0; + } + } + } + } + + d->result_ok = status; +} + +static void free_readdir(void *data) +{ + struct t_data *d = (struct t_data *) data; + struct t_readdir_buf *b1 = d->c.read_dir.first_buf; + while (b1) { + struct t_readdir_buf *b2 = b1; + b1 = b1->next; + EF_FREE(b2); + } + EF_FREE(d); +} + + + +static void try_free_read_bin(file_descriptor *desc) { + if ((desc->read_size == 0) + && (desc->read_offset >= desc->read_binp->orig_size)) { + ASSERT(desc->read_offset == desc->read_binp->orig_size); + driver_free_binary(desc->read_binp); + desc->read_binp = NULL; + desc->read_offset = 0; + desc->read_size = 0; + } +} + + + +static int try_again(file_descriptor *desc, struct t_data *d) { + if (! d->again) { + return 0; + } + switch (d->command) { + case FILE_WRITE: + MUTEX_LOCK(d->c.writev.q_mtx); + driver_deq(d->c.writev.port, d->c.writev.free_size); + MUTEX_UNLOCK(d->c.writev.q_mtx); + break; + case FILE_PWRITEV: + MUTEX_LOCK(d->c.writev.q_mtx); + driver_deq(d->c.pwritev.port, d->c.pwritev.free_size); + MUTEX_UNLOCK(d->c.writev.q_mtx); + break; + } + if (desc->timer_state != timer_idle) { + driver_cancel_timer(desc->port); + } + desc->timer_state = timer_again; + desc->invoke = d->invoke; + desc->d = d; + desc->free = d->free; + driver_set_timer(desc->port, 0L); + return !0; +} + + + +static void cq_execute(file_descriptor *desc) { + struct t_data *d; + register void *void_ptr; /* Soft cast variable */ + if (desc->timer_state == timer_again) + return; + if (! (d = cq_deq(desc))) + return; + TRACE_F(("x%i", (int) d->command)); + d->again = sys_info.async_threads == 0; + DRIVER_ASYNC(d->level, desc, d->invoke, void_ptr=d, d->free); +} + +static int async_write(file_descriptor *desc, int *errp, + int reply, Uint32 reply_size) { + struct t_data *d; + if (! (d = EF_ALLOC(sizeof(struct t_data) - 1))) { + if (errp) *errp = ENOMEM; + return -1; + } + TRACE_F(("w%lu", (unsigned long)desc->write_buffered)); + d->command = FILE_WRITE; + d->fd = desc->fd; + d->flags = desc->flags; + d->c.writev.port = desc->port; + d->c.writev.q_mtx = desc->q_mtx; + d->c.writev.size = desc->write_buffered; + d->reply = reply; + d->c.writev.free_size = 0; + d->c.writev.reply_size = reply_size; + d->invoke = invoke_writev; + d->free = free_writev; + d->level = 1; + cq_enq(desc, d); + desc->write_buffered = 0; + return 0; +} + +static int flush_write(file_descriptor *desc, int *errp) { + int result; + MUTEX_LOCK(desc->q_mtx); + if (desc->write_buffered > 0) { + result = async_write(desc, errp, 0, 0); + } else { + result = 0; + } + MUTEX_UNLOCK(desc->q_mtx); + return result; +} + +static int check_write_error(file_descriptor *desc, int *errp) { + if (desc->write_error) { + if (errp) *errp = desc->write_errInfo.posix_errno; + desc->write_error = 0; + return -1; + } + return 0; +} + +static int flush_write_check_error(file_descriptor *desc, int *errp) { + int r; + if ( (r = flush_write(desc, errp)) != 0) { + check_write_error(desc, NULL); + return r; + } else { + return check_write_error(desc, errp); + } +} + +static int async_lseek(file_descriptor *desc, int *errp, int reply, + Sint64 offset, int origin) { + struct t_data *d; + if (! (d = EF_ALLOC(sizeof(struct t_data)))) { + *errp = ENOMEM; + return -1; + } + d->flags = desc->flags; + d->fd = desc->fd; + d->command = FILE_LSEEK; + d->reply = reply; + d->c.lseek.offset = offset; + d->c.lseek.origin = origin; + d->invoke = invoke_lseek; + d->free = free_data; + d->level = 1; + cq_enq(desc, d); + return 0; +} + +static void flush_read(file_descriptor *desc) { + desc->read_offset = 0; + desc->read_size = 0; + if (desc->read_binp) { + driver_free_binary(desc->read_binp); + desc->read_binp = NULL; + } +} + +static int lseek_flush_read(file_descriptor *desc, int *errp) { + int r = 0; + size_t read_size = desc->read_size; + if (read_size != 0) { + flush_read(desc); + if ((r = async_lseek(desc, errp, 0, + -((ssize_t)read_size), EFILE_SEEK_CUR)) + < 0) { + return r; + } + } else { + flush_read(desc); + } + return r; +} + + + +/********************************************************************* + * Driver entry point -> ready_async + */ +static void +file_async_ready(ErlDrvData e, ErlDrvThreadData data) +{ + file_descriptor *desc = (file_descriptor*)e; + struct t_data *d = (struct t_data *) data; + char header[5]; /* result code + count */ + char resbuf[RESBUFSIZE]; /* Result buffer. */ + + + TRACE_C('r'); + + if (try_again(desc, d)) { + return; + } + + switch (d->command) + { + case FILE_READ: + if (!d->result_ok) { + reply_error(desc, &d->errInfo); + } else { + size_t available_bytes = + d->c.read.bin_offset + d->c.read.bin_size - desc->read_offset; + if (available_bytes < d->c.read.size) { + d->c.read.size = available_bytes; + } + TRACE_C('D'); + reply_data(desc, d->c.read.binp, + desc->read_offset, d->c.read.size); + desc->read_offset += d->c.read.size; + desc->read_size = + d->c.read.bin_offset + d->c.read.bin_size - desc->read_offset; + try_free_read_bin(desc); + } + free_read(data); + break; + case FILE_READ_LINE: + /* The read_line stucture differs from the read structure. + The data->read_offset and d->c.read_line.read_offset are copies, as are + data->read_size and d->c.read_line.read_size + The read_line function does not kniow in advance how large the binary has to be, + why new allocation (but not reallocation of the old binary, for obvious reasons) + may happen in the worker thread. */ + if (!d->result_ok) { + reply_error(desc, &d->errInfo); + } else { + size_t len = d->c.read_line.nl_pos - d->c.read_line.read_offset; + TRACE_C('L'); + reply_data(desc, d->c.read_line.binp, + d->c.read_line.read_offset, len); + desc->read_offset = d->c.read_line.read_offset + d->c.read_line.nl_skip + len; + desc->read_size = + d->c.read_line.read_size - d->c.read_line.nl_skip - len; + if (desc->read_binp != d->c.read_line.binp) { /* New binary allocated */ + driver_free_binary(desc->read_binp); + desc->read_binp = d->c.read_line.binp; + driver_binary_inc_refc(desc->read_binp); + } +#if !ALWAYS_READ_LINE_AHEAD + ASSERT(desc->read_bufsize > 0 || desc->read_size == 0); + if (desc->read_bufsize == 0) { + desc->read_offset = desc->read_binp->orig_size; /* triggers cleanup */ + } +#endif + try_free_read_bin(desc); + } + free_read_line(data); + break; + case FILE_READ_FILE: + if (!d->result_ok) + reply_error(desc, &d->errInfo); + else { + header[0] = FILE_RESP_OK; + TRACE_C('R'); + driver_output_binary(desc->port, header, 1, + d->c.read_file.binp, + 0, d->c.read_file.offset); + } + free_read_file(data); + break; + case FILE_WRITE: + if (d->reply) { + if (! d->result_ok) { + reply_error(desc, &d->errInfo); + } else { + reply_Uint(desc, d->c.writev.reply_size); + } + } else { + if (! d->result_ok) { + desc->write_error = !0; + desc->write_errInfo = d->errInfo; + } + } + free_writev(data); + break; + case FILE_LSEEK: + if (d->reply) { + if (d->result_ok) + reply_Sint64(desc, d->c.lseek.location); + else + reply_error(desc, &d->errInfo); + } + free_data(data); + break; + case FILE_MKDIR: + case FILE_RMDIR: + case FILE_CHDIR: + case FILE_DELETE: + case FILE_FSYNC: + case FILE_TRUNCATE: + case FILE_LINK: + case FILE_SYMLINK: + case FILE_RENAME: + case FILE_WRITE_INFO: + reply(desc, d->result_ok, &d->errInfo); + free_data(data); + break; + case FILE_ALTNAME: + case FILE_PWD: + case FILE_READLINK: + { + int length; + char *resbuf = d->b; + + if (!d->result_ok) + reply_error(desc, &d->errInfo); + else { + resbuf[0] = FILE_RESP_OK; + length = 1+strlen((char*) resbuf+1); + TRACE_C('R'); + driver_output2(desc->port, resbuf, length, NULL, 0); + } + free_data(data); + break; + } + case FILE_OPEN: + if (!d->result_ok) { + reply_error(desc, &d->errInfo); + } else { + desc->fd = d->fd; + desc->flags = d->flags; + reply_Uint(desc, d->fd); + } + free_data(data); + break; + case FILE_FSTAT: + case FILE_LSTAT: + { + if (d->result_ok) { + resbuf[0] = FILE_RESP_INFO; + + put_int32(d->info.size_high, &resbuf[1 + (0 * 4)]); + put_int32(d->info.size_low, &resbuf[1 + (1 * 4)]); + put_int32(d->info.type, &resbuf[1 + (2 * 4)]); + + PUT_TIME(d->info.accessTime, resbuf + 1 + 3*4); + PUT_TIME(d->info.modifyTime, resbuf + 1 + 9*4); + PUT_TIME(d->info.cTime, resbuf + 1 + 15*4); + + put_int32(d->info.mode, &resbuf[1 + (21 * 4)]); + put_int32(d->info.links, &resbuf[1 + (22 * 4)]); + put_int32(d->info.major_device, &resbuf[1 + (23 * 4)]); + put_int32(d->info.minor_device, &resbuf[1 + (24 * 4)]); + put_int32(d->info.inode, &resbuf[1 + (25 * 4)]); + put_int32(d->info.uid, &resbuf[1 + (26 * 4)]); + put_int32(d->info.gid, &resbuf[1 + (27 * 4)]); + put_int32(d->info.access, &resbuf[1 + (28 * 4)]); + +#define RESULT_SIZE (1 + (29 * 4)) + TRACE_C('R'); + driver_output2(desc->port, resbuf, RESULT_SIZE, NULL, 0); +#undef RESULT_SIZE + } else + reply_error(desc, &d->errInfo); + } + free_data(data); + break; + case FILE_READDIR: + if (!d->result_ok) + reply_error(desc, &d->errInfo); + else { + struct t_readdir_buf *b1 = d->c.read_dir.first_buf; + TRACE_C('R'); + ASSERT(b1); + while (b1) { + struct t_readdir_buf *b2 = b1; + char *p = &b1->buf[0]; + int sz = get_int32(p); + while (sz) { /* 0 == EOB */ + p += 4; + driver_output2(desc->port, p, sz, NULL, 0); + p += sz; + sz = get_int32(p); + } + b1 = b1->next; + EF_FREE(b2); + } + d->c.read_dir.first_buf = NULL; + d->c.read_dir.last_buf = NULL; + } + free_readdir(data); + break; + /* See file_stop */ + case FILE_CLOSE: + if (d->reply) { + TRACE_C('K'); + reply_ok(desc); + } + free_data(data); + break; + case FILE_PWRITEV: + if (!d->result_ok) { + reply_Uint_error(desc, d->c.pwritev.cnt, &d->errInfo); + } else { + reply_Uint(desc, d->c.pwritev.n); + } + free_pwritev(data); + break; + case FILE_PREADV: + if (!d->result_ok) { + reply_error(desc, &d->errInfo); + } else { + reply_ev(desc, FILE_RESP_LDATA, &d->c.preadv.eiov); + } + free_preadv(data); + break; + case FILE_IPREAD: + if (!d->result_ok) { + reply_error(desc, &d->errInfo); + } else if (!d->c.preadv.eiov.vsize) { + reply_eof(desc); + } else { + reply_ev(desc, FILE_RESP_N2DATA, &d->c.preadv.eiov); + } + free_preadv(data); + break; + default: + abort(); + } + if (desc->write_buffered != 0 && desc->timer_state == timer_idle) { + desc->timer_state = timer_write; + driver_set_timer(desc->port, desc->write_delay); + } + cq_execute(desc); +} + +/********************************************************************* + * Driver entry point -> output + */ +static void +file_output(ErlDrvData e, char* buf, int count) +{ + file_descriptor* desc = (file_descriptor*)e; + Efile_error errInfo; /* The error codes for the last operation. */ + Sint fd; /* The file descriptor for this port, if any, + * -1 if none. + */ + char* name; /* Points to the filename in buf. */ + int command; + struct t_data *d = NULL; + + + TRACE_C('o'); + + fd = desc->fd; + name = buf+1; + command = *(uchar*)buf++; + + switch(command) { + + case FILE_MKDIR: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + + strcpy(d->b, name); + d->command = command; + d->invoke = invoke_mkdir; + d->free = free_data; + d->level = 2; + goto done; + } + case FILE_RMDIR: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + + strcpy(d->b, name); + d->command = command; + d->invoke = invoke_rmdir; + d->free = free_data; + d->level = 2; + goto done; + } + case FILE_DELETE: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + + strcpy(d->b, name); + d->command = command; + d->invoke = invoke_delete_file; + d->free = free_data; + d->level = 2; + goto done; + } + case FILE_RENAME: + { + char* new_name; + + new_name = name+strlen(name)+1; + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + + strlen(name) + 1 + + strlen(new_name) + 1); + + strcpy(d->b, name); + strcpy(d->b + strlen(name) + 1, new_name); + d->flags = desc->flags; + d->fd = fd; + d->command = command; + d->invoke = invoke_rename; + d->free = free_data; + d->level = 2; + goto done; + } + case FILE_CHDIR: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + + strcpy(d->b, name); + d->command = command; + d->invoke = invoke_chdir; + d->free = free_data; + d->level = 2; + goto done; + } + case FILE_PWD: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1); + + d->drive = *(uchar*)buf; + d->command = command; + d->invoke = invoke_pwd; + d->free = free_data; + d->level = 2; + goto done; + } + + case FILE_READDIR: +#ifdef USE_THREADS + if (sys_info.async_threads > 0) + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + + strcpy(d->b, name); + d->dir_handle = NULL; + d->command = command; + d->invoke = invoke_readdir; + d->free = free_readdir; + d->level = 2; + d->c.read_dir.first_buf = NULL; + d->c.read_dir.last_buf = NULL; + goto done; + } + else +#endif + { + char resbuf[RESBUFSIZE+1]; + EFILE_DIR_HANDLE dir_handle; /* Handle to open directory. */ + + errInfo.posix_errno = 0; + dir_handle = NULL; + resbuf[0] = FILE_RESP_OK; + + while (efile_readdir(&errInfo, name, &dir_handle, + resbuf+1, RESBUFSIZE)) { + int length = 1 + strlen(resbuf+1); + driver_output2(desc->port, resbuf, length, NULL, 0); + } + if (errInfo.posix_errno != 0) { + reply_error(desc, &errInfo); + return; + } + TRACE_C('R'); + driver_output2(desc->port, resbuf, 1, NULL, 0); + return; + } + case FILE_OPEN: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(buf+4) + 1); + + d->flags = get_int32((uchar*)buf); + name = buf+4; + strcpy(d->b, name); + d->command = command; + d->invoke = invoke_open; + d->free = free_data; + d->level = 2; + goto done; + } + + case FILE_FSYNC: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data)); + + d->fd = fd; + d->command = command; + d->invoke = invoke_fsync; + d->free = free_data; + d->level = 2; + goto done; + } + + + case FILE_FSTAT: + case FILE_LSTAT: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + + strcpy(d->b, name); + d->fd = fd; + d->command = command; + d->invoke = invoke_flstat; + d->free = free_data; + d->level = 2; + goto done; + } + + case FILE_TRUNCATE: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data)); + + d->flags = desc->flags; + d->fd = fd; + d->command = command; + d->invoke = invoke_truncate; + d->free = free_data; + d->level = 2; + goto done; + } + + case FILE_WRITE_INFO: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + + strlen(buf+21*4) + 1); + + d->info.mode = get_int32(buf + 0 * 4); + d->info.uid = get_int32(buf + 1 * 4); + d->info.gid = get_int32(buf + 2 * 4); + GET_TIME(d->info.accessTime, buf + 3 * 4); + GET_TIME(d->info.modifyTime, buf + 9 * 4); + GET_TIME(d->info.cTime, buf + 15 * 4); + strcpy(d->b, buf+21*4); + d->command = command; + d->invoke = invoke_write_info; + d->free = free_data; + d->level = 2; + goto done; + } + + case FILE_READLINK: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1); + + strcpy(d->b, name); + d->command = command; + d->invoke = invoke_readlink; + d->free = free_data; + d->level = 2; + goto done; + } + + case FILE_ALTNAME: + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1); + strcpy(d->b, name); + d->command = command; + d->invoke = invoke_altname; + d->free = free_data; + d->level = 2; + goto done; + } + + + case FILE_LINK: + { + char* new_name; + + new_name = name+strlen(name)+1; + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + + strlen(name) + 1 + + strlen(new_name) + 1); + + strcpy(d->b, name); + strcpy(d->b + strlen(name) + 1, new_name); + d->flags = desc->flags; + d->fd = fd; + d->command = command; + d->invoke = invoke_link; + d->free = free_data; + d->level = 2; + goto done; + } + + case FILE_SYMLINK: + { + char* new_name; + + new_name = name+strlen(name)+1; + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + + strlen(name) + 1 + + strlen(new_name) + 1); + + strcpy(d->b, name); + strcpy(d->b + strlen(name) + 1, new_name); + d->flags = desc->flags; + d->fd = fd; + d->command = command; + d->invoke = invoke_symlink; + d->free = free_data; + d->level = 2; + goto done; + } + + } + + /* + * Ignore anything else -- let the caller hang. + */ + + return; + + done: + if (d) { + cq_enq(desc, d); + } +} + +/********************************************************************* + * Driver entry point -> flush + */ +static void +file_flush(ErlDrvData e) { + file_descriptor *desc = (file_descriptor *)e; + int r; + + TRACE_C('f'); + + r = flush_write(desc, NULL); + /* Only possible reason for bad return value is ENOMEM, and + * there is nobody to tell... + */ + ASSERT(r == 0); + r = 0; /* Avoiding warning */ + cq_execute(desc); +} + + + +/********************************************************************* + * Driver entry point -> control + */ +static int +file_control(ErlDrvData e, unsigned int command, + char* buf, int len, char **rbuf, int rlen) { + file_descriptor *desc = (file_descriptor *)e; + switch (command) { + default: + return 0; + } /* switch (command) */ + ASSERT(0); + desc = NULL; /* XXX Avoid warning while empty switch */ + return 0; +} + +/********************************************************************* + * Driver entry point -> timeout + */ +static void +file_timeout(ErlDrvData e) { + file_descriptor *desc = (file_descriptor *)e; + enum e_timer timer_state = desc->timer_state; + + TRACE_C('t'); + + desc->timer_state = timer_idle; + switch (timer_state) { + case timer_idle: + ASSERT(0); + break; + case timer_again: + ASSERT(desc->invoke); + ASSERT(desc->free); + driver_async(desc->port, KEY(desc), desc->invoke, desc->d, desc->free); + break; + case timer_write: { + int r = flush_write(desc, NULL); + /* Only possible reason for bad return value is ENOMEM, and + * there is nobody to tell... + */ + ASSERT(r == 0); + r = 0; /* Avoiding warning */ + cq_execute(desc); + } break; + } /* case */ +} + + + +/********************************************************************* + * Driver entry point -> outputv + */ +static void +file_outputv(ErlDrvData e, ErlIOVec *ev) { + file_descriptor* desc = (file_descriptor*)e; + char command; + int p, q; + int err; + + TRACE_C('v'); + + p = 0; q = 1; + if (! EV_GET_CHAR(ev, &command, &p, &q)) { + /* Empty command */ + reply_posix_error(desc, EINVAL); + goto done; + } + /* 'command' contains the decoded command number, + * 'p' and 'q' point out the next byte in the command: + * ((char *)ev->iov[q].iov_base) + p; + */ + + TRACE_F(("%i", (int) command)); + + switch (command) { + + case FILE_CLOSE: { + flush_read(desc); + if (flush_write_check_error(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (ev->size != 1) { + /* Wrong command length */ + reply_posix_error(desc, EINVAL); + goto done; + } + if (desc->fd != FILE_FD_INVALID) { + struct t_data *d; + if (! (d = EF_ALLOC(sizeof(struct t_data)))) { + reply_posix_error(desc, ENOMEM); + } else { + d->command = command; + d->reply = !0; + d->fd = desc->fd; + d->flags = desc->flags; + d->invoke = invoke_close; + d->free = free_data; + d->level = 2; + cq_enq(desc, d); + desc->fd = FILE_FD_INVALID; + desc->flags = 0; + } + } else { + reply_posix_error(desc, EBADF); + } + } goto done; + + case FILE_READ: { + Uint32 sizeH, sizeL; + size_t size, alloc_size; + struct t_data *d; + if (flush_write_check_error(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } +#if ALWAYS_READ_LINE_AHEAD + if (desc->read_bufsize == 0 && desc->read_binp != NULL && desc->read_size > 0) { + /* We have allocated a buffer for line mode but should not really have a + read-ahead buffer... */ + if (lseek_flush_read(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + } +#endif + if (ev->size != 1+8 + || !EV_GET_UINT32(ev, &sizeH, &p, &q) + || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { + /* Wrong buffer length to contain the read count */ + reply_posix_error(desc, EINVAL); + goto done; + } +#if SIZEOF_SIZE_T == 4 + if (sizeH != 0) { + reply_posix_error(desc, EINVAL); + goto done; + } + size = sizeL; +#else + size = ((size_t)sizeH << 32) | sizeL; +#endif + if ((desc->fd == FILE_FD_INVALID) + || (! (desc->flags & EFILE_MODE_READ)) ) { + reply_posix_error(desc, EBADF); + goto done; + } + if (size == 0) { + reply_buf(desc, &command, 0); + goto done; + } + if (desc->read_size >= size) { + /* We already have all data */ + TRACE_C('D'); + reply_data(desc, desc->read_binp, desc->read_offset, size); + desc->read_offset += size; + desc->read_size -= size; + try_free_read_bin(desc); + goto done; + } + /* We may have some of the data + */ + /* Justification for the following strange formula: + * If the read request is for such a large block as more than + * half the buffer size it may lead to a lot of unnecessary copying, + * since the tail of the old buffer is copied to the head of the + * new, and if the tail is almost half the buffer it is a lot + * to copy. Therefore allocate the exact amount needed in + * this case, giving no lingering tail. */ + alloc_size = + size > (desc->read_bufsize>>1) ? + size : desc->read_bufsize; + if (! desc->read_binp) { + /* Need to allocate a new binary for the result */ + if (! (desc->read_binp = driver_alloc_binary(alloc_size))) { + reply_posix_error(desc, ENOMEM); + goto done; + } + } else { + /* We already have a buffer */ + if (desc->read_binp->orig_size - desc->read_offset < size) { + /* Need to allocate a new binary for the result */ + ErlDrvBinary *binp; + if (! (binp = driver_alloc_binary(alloc_size))) { + reply_posix_error(desc, ENOMEM); + goto done; + } + /* Move data we already have to the new binary */ + sys_memcpy(binp->orig_bytes, + desc->read_binp->orig_bytes + desc->read_offset, + desc->read_size); + driver_free_binary(desc->read_binp); + desc->read_offset = 0; + desc->read_binp = binp; + } + } + if (! (d = EF_ALLOC(sizeof(struct t_data)))) { + reply_posix_error(desc, ENOMEM); + goto done; + } + d->command = command; + d->reply = !0; + d->fd = desc->fd; + d->flags = desc->flags; + d->c.read.binp = desc->read_binp; + d->c.read.bin_offset = desc->read_offset + desc->read_size; + d->c.read.bin_size = desc->read_binp->orig_size - d->c.read.bin_offset; + d->c.read.size = size; + driver_binary_inc_refc(d->c.read.binp); + d->invoke = invoke_read; + d->free = free_read; + d->level = 1; + cq_enq(desc, d); + } goto done; /* case FILE_READ: */ + + case FILE_READ_LINE: { + /* + * Icky little creature... We do mostly as ordinary file read, but with a few differences. + * 1) We have to scan for proper newline sequence if there is a buffer already, we cannot know + * in advance if the buffer contains a whole line without scanning. + * 2) We do not know how large the buffer needs to be in advance. We give a default buffer, + * but the worker may need to allocate a new one. Freeing the old and rereferencing a newly + * allocated binary + dealing with offsets and lengts are done in file_async ready + * for this OP. + */ + struct t_data *d; + if (flush_write_check_error(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (ev->size != 1) { + /* Wrong command length */ + reply_posix_error(desc, EINVAL); + goto done; + } + if ((desc->fd == FILE_FD_INVALID) + || (! (desc->flags & EFILE_MODE_READ)) ) { + reply_posix_error(desc, EBADF); + goto done; + } + if (desc->read_size > 0) { + /* look for '\n' in what we'we already got */ + void *nl_ptr = memchr(desc->read_binp->orig_bytes + desc->read_offset,'\n',desc->read_size); + if (nl_ptr != NULL) { + /* If found, we're done */ + int skip = 0; + size_t size = ((char *) nl_ptr) - + ((char *) (desc->read_binp->orig_bytes + desc->read_offset)) + 1; + if (size > 1 && + *(((char *) nl_ptr) - 1) == '\r') { + *(((char *) nl_ptr) - 1) = '\n'; + skip = 1; + --size; + } + reply_data(desc, desc->read_binp, desc->read_offset, size); + desc->read_offset += (size + skip); + desc->read_size -= (size + skip); + try_free_read_bin(desc); + goto done; + } + } + /* Now, it's up to the thread to work out the need for more buffers and such, it's + no use doing it in this thread as we do not have the information required anyway. + Even a NULL buffer could be handled by the thread, but code is simplified by us + allocating it */ + if (! desc->read_binp) { + int alloc_size = (desc->read_bufsize > DEFAULT_LINEBUF_SIZE) ? desc->read_bufsize : + DEFAULT_LINEBUF_SIZE; + /* Allocate a new binary for the result */ + if (! (desc->read_binp = driver_alloc_binary(alloc_size))) { + reply_posix_error(desc, ENOMEM); + goto done; + } + } + if (! (d = EF_ALLOC(sizeof(struct t_data)))) { + reply_posix_error(desc, ENOMEM); + goto done; + } + + d->command = command; + d->reply = !0; + d->fd = desc->fd; + d->flags = desc->flags; + d->c.read_line.binp = desc->read_binp; + d->c.read_line.read_offset = desc->read_offset; + d->c.read_line.read_size = desc->read_size; +#if !ALWAYS_READ_LINE_AHEAD + d->c.read_line.read_ahead = (desc->read_bufsize > 0); +#endif + driver_binary_inc_refc(d->c.read.binp); + d->invoke = invoke_read_line; + d->free = free_read_line; + d->level = 1; + cq_enq(desc, d); + } goto done; + case FILE_WRITE: { + int skip = 1; + int size = ev->size - skip; + if (lseek_flush_read(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (! (desc->flags & EFILE_MODE_WRITE)) { + reply_posix_error(desc, EBADF); + goto done; + } + if (size <= 0) { + reply_Uint(desc, size); + goto done; + } + MUTEX_LOCK(desc->q_mtx); + if (driver_enqv(desc->port, ev, skip)) { + MUTEX_UNLOCK(desc->q_mtx); + reply_posix_error(desc, ENOMEM); + goto done; + } + desc->write_buffered += size; + if (desc->write_buffered < desc->write_bufsize) { + MUTEX_UNLOCK(desc->q_mtx); + reply_Uint(desc, size); + if (desc->timer_state == timer_idle) { + desc->timer_state = timer_write; + driver_set_timer(desc->port, desc->write_delay); + } + } else { + if (async_write(desc, &err, !0, size) != 0) { + MUTEX_UNLOCK(desc->q_mtx); + reply_posix_error(desc, err); + goto done; + } else { + MUTEX_UNLOCK(desc->q_mtx); + } + } + } goto done; /* case FILE_WRITE */ + + case FILE_PWRITEV: { + Uint32 i, j, n; + size_t total; + struct t_data *d; + if (lseek_flush_read(desc, &err) < 0) { + reply_Uint_posix_error(desc, 0, err); + goto done; + } + if (flush_write_check_error(desc, &err) < 0) { + reply_Uint_posix_error(desc, 0, err); + goto done; + } + if (ev->size < 1+4 + || !EV_GET_UINT32(ev, &n, &p, &q)) { + /* Buffer too short to contain even the number of pos/size specs */ + reply_Uint_posix_error(desc, 0, EINVAL); + goto done; + } + if (n == 0) { + /* Trivial case - nothing to write */ + if (ev->size != 1+4) { + reply_posix_error(desc, err); + } else { + reply_Uint(desc, 0); + } + goto done; + } + if (ev->size < 1+4+8*(2*n)) { + /* Buffer too short to contain even the pos/size specs */ + reply_Uint_posix_error(desc, 0, EINVAL); + goto done; + } + d = EF_ALLOC(sizeof(struct t_data) + + (n * sizeof(struct t_pbuf_spec))); + if (! d) { + reply_Uint_posix_error(desc, 0, ENOMEM); + goto done; + } + d->command = command; + d->reply = !0; + d->fd = desc->fd; + d->flags = desc->flags; + d->c.pwritev.port = desc->port; + d->c.pwritev.q_mtx = desc->q_mtx; + d->c.pwritev.n = n; + d->c.pwritev.cnt = 0; + total = 0; + j = 0; + /* Create pos/size specs in the thread data structure + * for all non-zero size binaries. Calculate total size. + */ + for(i = 0; i < n; i++) { + Uint32 sizeH, sizeL; + size_t size; + if ( !EV_GET_UINT64(ev, &d->c.pwritev.specs[i].offset, &p, &q) + || !EV_GET_UINT32(ev, &sizeH, &p, &q) + || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { + /* Misalignment in buffer */ + reply_Uint_posix_error(desc, 0, EINVAL); + EF_FREE(d); + goto done; + } +#if SIZEOF_SIZE_T == 4 + if (sizeH != 0) { + reply_Uint_posix_error(desc, 0, EINVAL); + EF_FREE(d); + goto done; + } + size = sizeL; +#else + size = ((size_t)sizeH<<32) | sizeL; +#endif + if (size > 0) { + total += size; + d->c.pwritev.specs[j].size = size; + j++; + } + } + d->c.pwritev.size = total; + d->c.pwritev.free_size = 0; + if (j == 0) { + /* Trivial case - nothing to write */ + EF_FREE(d); + reply_Uint(desc, 0); + } else { + size_t skip = 1 + 4 + 8*(2*n); + if (skip + total != ev->size) { + /* Actual amount of data does not match + * total of all pos/size specs + */ + EF_FREE(d); + reply_Uint_posix_error(desc, 0, EINVAL); + } else { + /* Enqueue the data */ + MUTEX_LOCK(desc->q_mtx); + driver_enqv(desc->port, ev, skip); + MUTEX_UNLOCK(desc->q_mtx); + /* Execute the command */ + d->invoke = invoke_pwritev; + d->free = free_pwritev; + d->level = 1; + cq_enq(desc, d); + } + } + } goto done; /* case FILE_PWRITEV: */ + + case FILE_PREADV: { + register void * void_ptr; + Uint32 i, n; + struct t_data *d; + ErlIOVec *res_ev; + if (lseek_flush_read(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (flush_write_check_error(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (ev->size < 1+8 + || !EV_GET_UINT32(ev, &n, &p, &q) + || !EV_GET_UINT32(ev, &n, &p, &q)) { + /* Buffer too short to contain even the number of pos/size specs */ + reply_posix_error(desc, EINVAL); + goto done; + } + if (ev->size != 1+8+8*(2*n)) { + /* Buffer wrong length to contain the pos/size specs */ + reply_posix_error(desc, EINVAL); + goto done; + } + /* Create the thread data structure with the contained ErlIOVec + * and corresponding binaries for the response + */ + d = EF_ALLOC(sizeof(*d) + + (n * sizeof(*d->c.preadv.offsets)) + + ((1+n) * (sizeof(*res_ev->iov) + + sizeof(*res_ev->binv)))); + if (! d) { + reply_posix_error(desc, ENOMEM); + goto done; + } + d->command = command; + d->reply = !0; + d->fd = desc->fd; + d->flags = desc->flags; + d->c.preadv.n = n; + d->c.preadv.cnt = 0; + d->c.preadv.size = 0; + res_ev = &d->c.preadv.eiov; + /* XXX possible alignment problems here for weird machines */ + res_ev->vsize = 1+d->c.preadv.n; + res_ev->iov = void_ptr = &d->c.preadv.offsets[d->c.preadv.n]; + res_ev->binv = void_ptr = &res_ev->iov[res_ev->vsize]; + /* Read in the pos/size specs and allocate binaries for the results */ + for (i = 1; i < 1+n; i++) { + Uint32 sizeH, sizeL; + size_t size; + if ( !EV_GET_UINT64(ev, &d->c.preadv.offsets[i-1], &p, &q) + || !EV_GET_UINT32(ev, &sizeH, &p, &q) + || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { + reply_posix_error(desc, EINVAL); + break; + } +#if SIZEOF_SIZE_T == 4 + if (sizeH != 0) { + reply_posix_error(desc, EINVAL); + break; + } + size = sizeL; +#else + size = ((size_t)sizeH<<32) | sizeL; +#endif + if (! (res_ev->binv[i] = driver_alloc_binary(size))) { + reply_posix_error(desc, ENOMEM); + break; + } else { + res_ev->iov[i].iov_len = size; + res_ev->iov[i].iov_base = res_ev->binv[i]->orig_bytes; + } + } + if (i < 1+n) { + for (i--; i > 0; i--) { + driver_free_binary(res_ev->binv[i]); + } + EF_FREE(d); + goto done; + } + /* Allocate the header binary (index 0) */ + res_ev->binv[0] = driver_alloc_binary(4+4+8*n); + if (! res_ev->binv[0]) { + reply_posix_error(desc, ENOMEM); + for (i = 1; i < 1+n; i++) { + driver_free_binary(res_ev->binv[i]); + } + EF_FREE(d); + goto done; + } + res_ev->iov[0].iov_len = 4+4+8*n; + res_ev->iov[0].iov_base = res_ev->binv[0]->orig_bytes; + /* Fill in the number of buffers in the header */ + put_int32(0, res_ev->iov[0].iov_base); + put_int32(n, res_ev->iov[0].iov_base+4); + /**/ + res_ev->size = res_ev->iov[0].iov_len; + if (n == 0) { + /* Trivial case - nothing to read */ + reply_ev(desc, FILE_RESP_LDATA, res_ev); + free_preadv(d); + goto done; + } else { + d->invoke = invoke_preadv; + d->free = free_preadv; + d->level = 1; + cq_enq(desc, d); + } + } goto done; /* case FILE_PREADV: */ + + case FILE_LSEEK: { + Sint64 offset; /* Offset for seek */ + Uint32 origin; /* Origin of seek. */ + if (lseek_flush_read(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (flush_write_check_error(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (ev->size != 1+8+4 + || !EV_GET_UINT64(ev, &offset, &p, &q) + || !EV_GET_UINT32(ev, &origin, &p, &q)) { + /* Wrong length of buffer to contain offset and origin */ + reply_posix_error(desc, EINVAL); + goto done; + } + if (async_lseek(desc, &err, !0, offset, origin) < 0) { + reply_posix_error(desc, err); + goto done; + } + } goto done; + + case FILE_READ_FILE: { + struct t_data *d; + if (ev->size < 1+1) { + /* Buffer contains empty name */ + reply_posix_error(desc, ENOENT); + goto done; + } + if (ev->size-1 != ev->iov[q].iov_len-p) { + /* Name not in one single buffer */ + reply_posix_error(desc, EINVAL); + goto done; + } + d = EF_ALLOC(sizeof(struct t_data) + ev->size); + if (! d) { + reply_posix_error(desc, ENOMEM); + goto done; + } + d->command = command; + d->reply = !0; + /* Copy name */ + memcpy(d->c.read_file.name, EV_CHAR_P(ev, p, q), ev->size-1); + d->c.read_file.name[ev->size-1] = '\0'; + d->c.read_file.binp = NULL; + d->invoke = invoke_read_file; + d->free = free_read_file; + d->level = 2; + cq_enq(desc, d); + } goto done; + + case FILE_IPREAD: { + /* This operation cheets by using invoke_preadv() and free_preadv() + * plus its own invoke_ipread. Therefore the result format is + * a bit awkward - the header binary contains one extra 64 bit + * field that invoke_preadv() fortunately ignores, + * and the first 64 bit field does not contain the number of + * data binaries which invoke_preadv() also ignores. + */ + register void * void_ptr; + char mode; + Sint64 hdr_offset; + Uint32 max_size; + struct t_data *d; + ErlIOVec *res_ev; + int vsize; + if (! EV_GET_CHAR(ev, &mode, &p, &q)) { + /* Empty command */ + reply_posix_error(desc, EINVAL); + goto done; + } + if (mode != IPREAD_S32BU_P32BU) { + reply_posix_error(desc, EINVAL); + goto done; + } + if (lseek_flush_read(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (flush_write_check_error(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (ev->size < 1+1+8+4 + || !EV_GET_UINT64(ev, &hdr_offset, &p, &q) + || !EV_GET_UINT32(ev, &max_size, &p, &q)) { + /* Buffer too short to contain + * the header offset and max size spec */ + reply_posix_error(desc, EINVAL); + goto done; + } + /* Create the thread data structure with the contained ErlIOVec + * and corresponding binaries for the response + */ + vsize = 2; + d = EF_ALLOC(sizeof(*d) + + vsize*(sizeof(*res_ev->iov) + sizeof(*res_ev->binv))); + if (! d) { + reply_posix_error(desc, ENOMEM); + goto done; + } + d->command = command; + d->reply = !0; + d->fd = desc->fd; + d->flags = desc->flags; + d->c.preadv.offsets[0] = hdr_offset; + d->c.preadv.size = max_size; + res_ev = &d->c.preadv.eiov; + /* XXX possible alignment problems here for weird machines */ + res_ev->iov = void_ptr = d + 1; + res_ev->binv = void_ptr = res_ev->iov + vsize; + res_ev->size = 0; + res_ev->vsize = 0; + d->invoke = invoke_ipread; + d->free = free_preadv; + d->level = 1; + cq_enq(desc, d); + } goto done; /* case FILE_IPREAD: */ + + case FILE_SETOPT: { + char opt; + if (ev->size < 1+1 + || !EV_GET_CHAR(ev, &opt, &p, &q)) { + /* Buffer too short to contain even the option type */ + reply_posix_error(desc, EINVAL); + goto done; + } + switch (opt) { + case FILE_OPT_DELAYED_WRITE: { + Uint32 sizeH, sizeL, delayH, delayL; + if (ev->size != 1+1+4*sizeof(Uint32) + || !EV_GET_UINT32(ev, &sizeH, &p, &q) + || !EV_GET_UINT32(ev, &sizeL, &p, &q) + || !EV_GET_UINT32(ev, &delayH, &p, &q) + || !EV_GET_UINT32(ev, &delayL, &p, &q)) { + /* Buffer has wrong length to contain the option values */ + reply_posix_error(desc, EINVAL); + goto done; + } +#if SIZEOF_SIZE_T == 4 + if (sizeH != 0) { + reply_posix_error(desc, EINVAL); + goto done; + } + desc->write_bufsize = sizeL; +#else + desc->write_bufsize = ((size_t)sizeH << 32) | sizeL; +#endif +#if SIZEOF_LONG == 4 + if (delayH != 0) { + reply_posix_error(desc, EINVAL); + goto done; + } + desc->write_delay = delayL; +#else + desc->write_delay = ((unsigned long)delayH << 32) | delayL; +#endif + TRACE_C('K'); + reply_ok(desc); + } goto done; + case FILE_OPT_READ_AHEAD: { + Uint32 sizeH, sizeL; + if (ev->size != 1+1+2*sizeof(Uint32) + || !EV_GET_UINT32(ev, &sizeH, &p, &q) + || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { + /* Buffer has wrong length to contain the option values */ + reply_posix_error(desc, EINVAL); + goto done; + } +#if SIZEOF_SIZE_T == 4 + if (sizeH != 0) { + reply_posix_error(desc, EINVAL); + goto done; + } + desc->read_bufsize = sizeL; +#else + desc->read_bufsize = ((size_t)sizeH << 32) | sizeL; +#endif + TRACE_C('K'); + reply_ok(desc); + } goto done; + default: + reply_posix_error(desc, EINVAL); + goto done; + } /* case FILE_OPT_DELAYED_WRITE: */ + } ASSERT(0); goto done; /* case FILE_SETOPT: */ + + } /* switch(command) */ + + if (lseek_flush_read(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } + if (flush_write_check_error(desc, &err) < 0) { + reply_posix_error(desc, err); + goto done; + } else { + /* Flatten buffer and send it to file_output(desc, buf, len) */ + int len = ev->size; + char *buf = EF_ALLOC(len); + if (! buf) { + reply_posix_error(desc, ENOMEM); + goto done; + } + driver_vec_to_buf(ev, buf, len); + file_output((ErlDrvData) desc, buf, len); + EF_FREE(buf); + goto done; + } + + done: + cq_execute(desc); +} diff --git a/erts/emulator/drivers/common/erl_efile.h b/erts/emulator/drivers/common/erl_efile.h new file mode 100644 index 0000000000..9aa941e550 --- /dev/null +++ b/erts/emulator/drivers/common/erl_efile.h @@ -0,0 +1,152 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1997-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * Defines the interfaces between the generic efile driver and its + * operating-system dependent helpers. + */ + +#include "sys.h" +#include "erl_driver.h" + +/* + * Open modes for efile_openfile(). + */ +#define EFILE_MODE_READ 1 +#define EFILE_MODE_WRITE 2 /* Implies truncating file when used alone. */ +#define EFILE_MODE_READ_WRITE 3 +#define EFILE_MODE_APPEND 4 +#define EFILE_COMPRESSED 8 +#define EFILE_NO_TRUNCATE 16 /* Special for reopening on VxWorks */ + +/* + * Seek modes for efile_seek(). + */ +#define EFILE_SEEK_SET 0 +#define EFILE_SEEK_CUR 1 +#define EFILE_SEEK_END 2 + +/* + * File types returned by efile_fileinfo(). + */ +#define FT_DEVICE 1 +#define FT_DIRECTORY 2 +#define FT_REGULAR 3 +#define FT_SYMLINK 4 +#define FT_OTHER 5 + +/* + * Access attributes returned by efile_fileinfo() (the bits can be ORed + * together). + */ +#define FA_NONE 0 +#define FA_WRITE 1 +#define FA_READ 2 + +/* + * An handle to an open directory. To be cast to the correct type + * in the system-dependent directory functions. + */ + +typedef struct _Efile_Dir_Handle* EFILE_DIR_HANDLE; + +/* + * Error information from the last call. + */ +typedef struct _Efile_error { + int posix_errno; /* Posix error number, as in <errno.h>. */ + int os_errno; /* Os-dependent error number (not used). */ +} Efile_error; + +/* + * This structure contains date and time. + */ +typedef struct _Efile_time { + unsigned year; /* (4 digits). */ + unsigned month; /* (1..12). */ + unsigned day; /* (1..31). */ + unsigned hour; /* (0..23). */ + unsigned minute; /* (0..59). */ + unsigned second; /* (0..59). */ +} Efile_time; + + +/* + * Describes what is returned by file:file_info/1. + */ + +typedef struct _Efile_info { + Uint32 size_low; /* Size of file, lower 32 bits.. */ + Uint32 size_high; /* Size of file, higher 32 bits. */ + Uint32 type; /* Type of file -- one of FT_*. */ + Uint32 access; /* Access to file -- one of FA_*. */ + Uint32 mode; /* Access permissions -- bit field. */ + Uint32 links; /* Number of links to file. */ + Uint32 major_device; /* Major device or file system. */ + Uint32 minor_device; /* Minor device (for devices). */ + Uint32 inode; /* Inode number. */ + Uint32 uid; /* User id of owner. */ + Uint32 gid; /* Group id of owner. */ + Efile_time accessTime; /* Last time the file was accessed. */ + Efile_time modifyTime; /* Last time the file was modified. */ + Efile_time cTime; /* Creation time (Windows) or last + * inode change (Unix). + */ +} Efile_info; + +/* + * Functions. + */ + +int efile_mkdir(Efile_error* errInfo, char* name); +int efile_rmdir(Efile_error* errInfo, char* name); +int efile_delete_file(Efile_error* errInfo, char* name); +int efile_rename(Efile_error* errInfo, char* src, char* dst); +int efile_chdir(Efile_error* errInfo, char* name); +int efile_getdcwd(Efile_error* errInfo, int drive, + char* buffer, size_t size); +int efile_readdir(Efile_error* errInfo, char* name, + EFILE_DIR_HANDLE* dir_handle, + char* buffer, size_t size); +int efile_openfile(Efile_error* errInfo, char* name, int flags, + int* pfd, Sint64* pSize); +void efile_closefile(int fd); +int efile_fsync(Efile_error* errInfo, int fd); +int efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, + char *name, int info_for_link); +int efile_write_info(Efile_error* errInfo, Efile_info* pInfo, char *name); +int efile_write(Efile_error* errInfo, int flags, int fd, + char* buf, size_t count); +int efile_writev(Efile_error* errInfo, int flags, int fd, + SysIOVec* iov, int iovcnt, size_t size); +int efile_read(Efile_error* errInfo, int flags, int fd, + char* buf, size_t count, size_t* pBytesRead); +int efile_seek(Efile_error* errInfo, int fd, + Sint64 offset, int origin, Sint64* new_location); +int efile_truncate_file(Efile_error* errInfo, int *fd, int flags); +int efile_pwrite(Efile_error* errInfo, int fd, + char* buf, size_t count, Sint64 offset); +int efile_pread(Efile_error* errInfo, int fd, + Sint64 offset, char* buf, size_t count, size_t* pBytesRead); +int efile_readlink(Efile_error* errInfo, char *name, + char* buffer, size_t size); +int efile_altname(Efile_error* errInfo, char *name, + char* buffer, size_t size); +int efile_link(Efile_error* errInfo, char* old, char* new); +int efile_symlink(Efile_error* errInfo, char* old, char* new); +int efile_may_openfile(Efile_error* errInfo, char *name); diff --git a/erts/emulator/drivers/common/gzio.c b/erts/emulator/drivers/common/gzio.c new file mode 100644 index 0000000000..801bc61d4d --- /dev/null +++ b/erts/emulator/drivers/common/gzio.c @@ -0,0 +1,822 @@ +/* + * Original version by Jean-loup Gailly. Modified for use by the + * Erlang run-time system and efile_driver; names of all external + * functions changed to avoid conflicts with the official gzio.c file. + * + * gzio.c -- IO on .gz files + * Copyright (C) 1995-1996 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ +/* %ExternalCopyright% */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdio.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <ctype.h> +#include "erl_driver.h" +#include "sys.h" + +#ifdef VXWORKS +/* pull in FOPEN from zutil.h instead */ +#undef F_OPEN +#endif + +#ifdef __WIN32__ +#define HAVE_CONFLICTING_FREAD_DECLARATION +#endif + +#ifdef STDC +# define zstrerror(errnum) strerror(errnum) +#else +# define zstrerror(errnum) "" +#endif + +#include "gzio_zutil.h" +#include "erl_zlib.h" +#include "gzio.h" + +/********struct internal_state {int dummy;}; / * for buggy compilers */ + +#define Z_BUFSIZE 4096 + +#define ALLOC(size) driver_alloc(size) +#define TRYFREE(p) {if (p) driver_free(p);} + +static int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +typedef struct gz_stream { + z_stream stream; + int z_err; /* error code for last stream operation */ + int z_eof; /* set if end of input file */ +#ifdef UNIX + int file; /* .gz file descriptor */ +#else + FILE *file; /* .gz file */ +#endif + Byte *inbuf; /* input buffer */ + Byte *outbuf; /* output buffer */ + uLong crc; /* crc32 of uncompressed data */ + char *msg; /* error message */ + char *path; /* path name for debugging only */ + int transparent; /* 1 if input file is not a .gz file */ + char mode; /* 'w' or 'r' */ + int position; /* Position (for seek) */ + int (*destroy)OF((struct gz_stream*)); /* Function to destroy + * this structure. */ +} gz_stream; + +local gzFile gz_open OF((const char *path, const char *mode)); +local int get_byte OF((gz_stream *s)); +local void check_header OF((gz_stream *s)); +local int destroy OF((gz_stream *s)); +local uLong getLong OF((gz_stream *s)); + +#ifdef UNIX +/* + * In Solaris 8 and earlier, fopen() and its friends cannot handle + * file descriptors larger than 255. Therefore, we use read()/write() + * on all Unix systems. + */ +# define ERTS_GZWRITE(File, Buf, Count) write((File), (Buf), (Count)) +# define ERTS_GZREAD(File, Buf, Count) read((File), (Buf), (Count)) +#else +/* + * On all other operating systems, using fopen(), fread()/fwrite(), since + * there is not guaranteed to exist any read()/write() (not part of + * ANSI/ISO-C). + */ +# define ERTS_GZWRITE(File, Buf, Count) fwrite((Buf), 1, (Count), (File)) +# define ERTS_GZREAD(File, Buf, Count) fread((Buf), 1, (Count), (File)) +#endif + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb"). The file is given either by file descriptor + or path name (if fd == -1). + gz_open return NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). +*/ +local gzFile gz_open (path, mode) + const char *path; + const char *mode; +{ + int err; + int level = Z_DEFAULT_COMPRESSION; /* compression level */ + char *p = (char*)mode; + gz_stream *s; + char fmode[80]; /* copy of mode, without the compression level */ + char *m = fmode; + + if (!path || !mode) return Z_NULL; + + s = (gz_stream *)ALLOC(sizeof(gz_stream)); + if (!s) return Z_NULL; + + erl_zlib_alloc_init(&s->stream); + s->stream.next_in = s->inbuf = Z_NULL; + s->stream.next_out = s->outbuf = Z_NULL; + s->stream.avail_in = s->stream.avail_out = 0; +#ifdef UNIX + s->file = -1; +#else + s->file = NULL; +#endif + s->z_err = Z_OK; + s->z_eof = 0; + s->crc = crc32(0L, Z_NULL, 0); + s->msg = NULL; + s->transparent = 0; + s->position = 0; + s->destroy = destroy; + + s->path = (char*)ALLOC(strlen(path)+1); + if (s->path == NULL) { + return s->destroy(s), (gzFile)Z_NULL; + } + strcpy(s->path, path); /* do this early for debugging */ + + s->mode = '\0'; + do { + if (*p == 'r') + s->mode = 'r'; + if (*p == 'w' || *p == 'a') + s->mode = 'w'; + if (isdigit((int)*p)) { + level = *p - '0'; + } else { + *m++ = *p; /* Copy the mode */ + } + } while (*p++ && m < fmode + sizeof(fmode) - 1); + *m = '\0'; + if (s->mode == '\0') + return s->destroy(s), (gzFile)Z_NULL; + + if (s->mode == 'w') { + err = deflateInit2(&(s->stream), level, + Z_DEFLATED, MAX_WBITS+16, DEF_MEM_LEVEL, 0); + /* windowBits is passed < 0 to suppress zlib header */ + + s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); + + if (err != Z_OK || s->outbuf == Z_NULL) { + return s->destroy(s), (gzFile)Z_NULL; + } + } else { + /* + * It is tempting to use the built-in support in zlib + * for handling GZIP headers, but unfortunately it + * cannot handle multiple GZIP headers (which occur when + * several GZIP files have been concatenated). + */ + + err = inflateInit2(&(s->stream), -MAX_WBITS); + s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); + + if (err != Z_OK || s->inbuf == Z_NULL) { + return s->destroy(s), (gzFile)Z_NULL; + } + } + s->stream.avail_out = Z_BUFSIZE; + + errno = 0; +#ifdef UNIX + if (s->mode == 'r') { + s->file = open(path, O_RDONLY); + } else { + s->file = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + } + if (s->file == -1) { + return s->destroy(s), (gzFile)Z_NULL; + } +#else + s->file = F_OPEN(path, fmode); + if (s->file == NULL) { + return s->destroy(s), (gzFile)Z_NULL; + } +#endif + if (s->mode == 'r') { + check_header(s); /* skip the .gz header */ + } + return (gzFile)s; +} + +/* =========================================================================== + Rewind a gzfile back to the beginning. +*/ + +local int gz_rewind (gz_stream *s) +{ + TRYFREE(s->msg); + +#ifdef UNIX + lseek(s->file, 0L, SEEK_SET); +#else + fseek(s->file, 0L, SEEK_SET); +#endif + inflateReset(&(s->stream)); + s->stream.next_in = Z_NULL; + s->stream.next_out = Z_NULL; + s->stream.avail_in = s->stream.avail_out = 0; + s->z_err = Z_OK; + s->z_eof = 0; + s->crc = crc32(0L, Z_NULL, 0); + s->msg = NULL; + s->position = 0; + s->stream.next_in = s->inbuf; + + s->stream.avail_out = Z_BUFSIZE; + + check_header(s); /* skip the .gz header */ + return 1; +} + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. +*/ +gzFile erts_gzopen (path, mode) + const char *path; + const char *mode; +{ + return gz_open (path, mode); +} + + +/* =========================================================================== + Read a byte from a gz_stream; update next_in and avail_in. Return EOF + for end of file. + IN assertion: the stream s has been sucessfully opened for reading. +*/ +local int get_byte(s) + gz_stream *s; +{ + if (s->z_eof) return EOF; + if (s->stream.avail_in == 0) { +#ifdef UNIX + size_t res; + errno = 0; + res = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); + if (res == 0) { + s->stream.avail_in = 0; + s->z_eof = 1; + return EOF; + } else if (res < 0) { + s->stream.avail_in = 0; + s->z_eof = 1; + s->z_err = Z_ERRNO; + return EOF; + } else { + s->stream.avail_in = (uInt) res; + } +#else + errno = 0; + s->stream.avail_in = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (s->file && ferror(s->file)) + s->z_err = Z_ERRNO; + return EOF; + } +#endif + s->stream.next_in = s->inbuf; + } + s->stream.avail_in--; + return *(s->stream.next_in)++; +} + +/* =========================================================================== + Check the gzip header of a gz_stream opened for reading. Set the stream + mode to transparent if the gzip magic header is not present; set s->err + to Z_DATA_ERROR if the magic header is present but the rest of the header + is incorrect. + IN assertion: the stream s has already been created sucessfully; + s->stream.avail_in is zero for the first time, but may be non-zero + for concatenated .gz files. +*/ +local void check_header(s) + gz_stream *s; +{ + int method; /* method byte */ + int flags; /* flags byte */ + uInt len; + int c; + + /* Check the gzip magic header */ + for (len = 0; len < 2; len++) { + c = get_byte(s); + if (c != gz_magic[len]) { + if (len != 0) s->stream.avail_in++, s->stream.next_in--; + if (c != EOF) { + s->stream.avail_in++, s->stream.next_in--; + s->transparent = 1; + } + s->z_err = s->stream.avail_in != 0 ? Z_OK : Z_STREAM_END; + return; + } + } + method = get_byte(s); + flags = get_byte(s); + if (method != Z_DEFLATED || (flags & RESERVED) != 0) { + s->z_err = Z_DATA_ERROR; + return; + } + + /* Discard time, xflags and OS code: */ + for (len = 0; len < 6; len++) (void)get_byte(s); + + if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ + len = (uInt)get_byte(s); + len += ((uInt)get_byte(s))<<8; + /* len is garbage if EOF but the loop below will quit anyway */ + while (len-- != 0 && get_byte(s) != EOF) ; + } + if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ + for (len = 0; len < 2; len++) (void)get_byte(s); + } + s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK; +} + + /* =========================================================================== + * Cleanup then free the given gz_stream. Return a zlib error code. + Try freeing in the reverse order of allocations. + */ +local int destroy (s) + gz_stream *s; +{ + int err = Z_OK; + + if (!s) return Z_STREAM_ERROR; + + TRYFREE(s->msg); + + if (s->stream.state != NULL) { + if (s->mode == 'w') { + err = deflateEnd(&(s->stream)); + } else if (s->mode == 'r') { + err = inflateEnd(&(s->stream)); + } + } +#ifdef UNIX + if (s->file != -1 && close(s->file)) { + err = Z_ERRNO; + } +#else + if (s->file != NULL && fclose(s->file)) { + err = Z_ERRNO; + } +#endif + if (s->z_err < 0) err = s->z_err; + + TRYFREE(s->inbuf); + TRYFREE(s->outbuf); + TRYFREE(s->path); + TRYFREE(s); + return err; +} + +/* =========================================================================== + Reads the given number of uncompressed bytes from the compressed file. + gzread returns the number of bytes actually read (0 for end of file). +*/ +int +erts_gzread(gzFile file, voidp buf, unsigned len) +{ + gz_stream *s = (gz_stream*)file; + Bytef *start = buf; /* starting point for crc computation */ + Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */ + + if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR; + + if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1; + if (s->z_err == Z_STREAM_END) return 0; /* EOF */ + + s->stream.next_out = next_out = buf; + s->stream.avail_out = len; + + while (s->stream.avail_out != 0) { + + if (s->transparent) { + /* Copy first the lookahead bytes: */ + uInt n = s->stream.avail_in; + if (n > s->stream.avail_out) n = s->stream.avail_out; + if (n > 0) { + zmemcpy(s->stream.next_out, s->stream.next_in, n); + next_out += n; + s->stream.next_out = next_out; + s->stream.next_in += n; + s->stream.avail_out -= n; + s->stream.avail_in -= n; + } + if (s->stream.avail_out > 0) { + s->stream.avail_out -= ERTS_GZREAD(s->file, next_out, + s->stream.avail_out); + } + len -= s->stream.avail_out; + s->stream.total_in += (uLong)len; + s->stream.total_out += (uLong)len; + if (len == 0) s->z_eof = 1; + s->position += (int)len; + return (int)len; + } + if (s->stream.avail_in == 0 && !s->z_eof) { +#ifdef UNIX + size_t res; + errno = 0; + res = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); + if (res == 0) { + s->stream.avail_in = 0; + s->z_eof = 1; + return EOF; + } else if (res < 0) { + s->stream.avail_in = 0; + s->z_eof = 1; + s->z_err = Z_ERRNO; + return EOF; + } else { + s->stream.avail_in = (uInt) res; + } +#else + errno = 0; + s->stream.avail_in = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (s->file && ferror(s->file)) { + s->z_err = Z_ERRNO; + break; + } + } +#endif + s->stream.next_in = s->inbuf; + } + s->z_err = inflate(&(s->stream), Z_NO_FLUSH); + + if (s->z_err == Z_STREAM_END) { + /* Check CRC and original size */ + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + start = s->stream.next_out; + + if (getLong(s) != s->crc) { + s->z_err = Z_DATA_ERROR; + } else { + (void)getLong(s); + /* The uncompressed length returned by above getlong() may + * be different from s->stream.total_out) in case of + * concatenated .gz files. Check for such files: + */ + check_header(s); + if (s->z_err == Z_OK) { + uLong total_in = s->stream.total_in; + uLong total_out = s->stream.total_out; + + inflateReset(&(s->stream)); + s->stream.total_in = total_in; + s->stream.total_out = total_out; + s->crc = crc32(0L, Z_NULL, 0); + } + } + } + if (s->z_err != Z_OK || s->z_eof) break; + } + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + + s->position += (int)(len - s->stream.avail_out); + + return (int)(len - s->stream.avail_out); +} + +/* =========================================================================== + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of bytes actually written (0 in case of error). +*/ +int +erts_gzwrite(gzFile file, voidp buf, unsigned len) +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.next_in = buf; + s->stream.avail_in = len; + + while (s->stream.avail_in != 0) { + + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (ERTS_GZWRITE(s->file, s->outbuf, Z_BUFSIZE) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + break; + } + s->stream.avail_out = Z_BUFSIZE; + } + s->z_err = deflate(&(s->stream), Z_NO_FLUSH); + if (s->z_err != Z_OK) break; + } + s->position += (int)(len - s->stream.avail_in); + return (int)(len - s->stream.avail_in); +} + +/* + * For use by Erlang file driver. + * + * XXX Limitations: + * - SEEK_END is not allowed (length of file is not known). + * - When writing, only forward seek is supported. + */ + +int +erts_gzseek(gzFile file, int offset, int whence) +{ + int pos; + gz_stream* s = (gz_stream *) file; + + if (s == NULL) { + errno = EINVAL; + return -1; + } + if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) { + errno = EIO; + return -1; + } + + switch (whence) { + case SEEK_SET: pos = offset; break; + case SEEK_CUR: pos = s->position+offset; break; + case SEEK_END: + default: + errno = EINVAL; return -1; + } + + if (pos == s->position) { + return pos; + } + + if (pos < s->position) { + if (s->mode == 'w') { + errno = EINVAL; + return -1; + } + gz_rewind(s); + } + + while (s->position < pos) { + char buf[512]; + int n; + + n = pos - s->position; + if (n > sizeof(buf)) + n = sizeof(buf); + + if (s->mode == 'r') { + erts_gzread(file, buf, n); + } else { + memset(buf, '\0', n); + erts_gzwrite(file, buf, n); + } + } + + return s->position; +} + +/* =========================================================================== + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. + gzflush should be called only when strictly necessary because it can + degrade compression. +*/ +int +erts_gzflush(gzFile file, int flush) +{ + uInt len; + int done = 0; + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.avail_in = 0; /* should be zero already anyway */ + + for (;;) { + len = Z_BUFSIZE - s->stream.avail_out; + + if (len != 0) { + if ((uInt)ERTS_GZWRITE(s->file, s->outbuf, len) != len) { + s->z_err = Z_ERRNO; + return Z_ERRNO; + } + s->stream.next_out = s->outbuf; + s->stream.avail_out = Z_BUFSIZE; + } + if (done) break; + s->z_err = deflate(&(s->stream), flush); + + /* deflate has finished flushing only when it hasn't used up + * all the available space in the output buffer: + */ + done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END); + + if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break; + } +#ifndef UNIX + fflush(s->file); +#endif + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets +*/ +local uLong getLong (s) + gz_stream *s; +{ + uLong x = (uLong)get_byte(s); + int c; + + x += ((uLong)get_byte(s))<<8; + x += ((uLong)get_byte(s))<<16; + c = get_byte(s); + if (c == EOF) s->z_err = Z_DATA_ERROR; + x += ((uLong)c)<<24; + return x; +} + +/* =========================================================================== + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. +*/ +int +erts_gzclose(gzFile file) +{ + int err; + gz_stream *s = (gz_stream*)file; + + if (s == NULL) return Z_STREAM_ERROR; + + if (s->mode == 'w') { + err = erts_gzflush (file, Z_FINISH); + if (err != Z_OK) return s->destroy(file); + } + return s->destroy(file); +} + + +/* =========================================================================== + Uncompresses the buffer given and returns a pointer to a binary. + If the buffer was not compressed with gzip, the buffer contents + will be copied unchanged into the binary. + + If a `gzip' header was found, but there were subsequent errors, + a NULL pointer is returned. +*/ + +ErlDrvBinary* +erts_gzinflate_buffer(char* start, uLong size) +{ + ErlDrvBinary* bin; + ErlDrvBinary* bin2; + z_stream zstr; + unsigned char* bptr; + + /* + * Check for the magic bytes beginning a GZIP header. + */ + bptr = (unsigned char *) start; + if (size < 2 || bptr[0] != gz_magic[0] || bptr[1] != gz_magic[1]) { + /* No GZIP header -- just copy the data into a new binary */ + if ((bin = driver_alloc_binary(size)) == NULL) { + return NULL; + } + memcpy(bin->orig_bytes, start, size); + return bin; + } + + /* + * The magic bytes for a GZIP header are there. Now try to decompress. + * It is an error if the GZIP header is not correct. + */ + + zstr.next_in = (unsigned char*) start; + zstr.avail_in = size; + erl_zlib_alloc_init(&zstr); + size *= 2; + if ((bin = driver_alloc_binary(size)) == NULL) { + return NULL; + } + if (inflateInit2(&zstr, 15+16) != Z_OK) { /* Decode GZIP format */ + driver_free(bin); + return NULL; + } + for (;;) { + int status; + + zstr.next_out = (unsigned char *) bin->orig_bytes + zstr.total_out; + zstr.avail_out = size - zstr.total_out; + status = inflate(&zstr, Z_NO_FLUSH); + if (status == Z_OK) { + size *= 2; + if ((bin2 = driver_realloc_binary(bin, size)) == NULL) { + error: + driver_free_binary(bin); + inflateEnd(&zstr); + return NULL; + } + bin = bin2; + } else if (status == Z_STREAM_END) { + if ((bin2 = driver_realloc_binary(bin, zstr.total_out)) == NULL) { + goto error; + } + inflateEnd(&zstr); + return bin2; + } else { + goto error; + } + } +} + +/* =========================================================================== + Compresses the buffer given and returns a pointer to a binary. + A NULL pointer is returned if any error occurs. + Writes a gzip header as well. +*/ + +#define GZIP_HD_SIZE 10 +#define GZIP_TL_SIZE 8 + +#define GZIP_X_SIZE (GZIP_HD_SIZE+GZIP_TL_SIZE) + +ErlDrvBinary* +erts_gzdeflate_buffer(char* start, uLong size) +{ + z_stream c_stream; /* compression stream */ + ErlDrvBinary* bin; + ErlDrvBinary* bin2; + uLong crc; /* crc32 of uncompressed data */ + uLong szIn; + Byte* ptr; + int comprLen = size + (size/1000) + 1 + 12; /* see zlib.h */ + + crc = crc32(0L, Z_NULL, 0); + erl_zlib_alloc_init(&c_stream); + + if (deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) + return NULL; + + if ((bin = driver_alloc_binary(comprLen+GZIP_X_SIZE)) == NULL) + return NULL; + sprintf(bin->orig_bytes, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1], + Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE); + + c_stream.next_out = ((Byte*) bin->orig_bytes)+GZIP_HD_SIZE; + c_stream.avail_out = (uInt) bin->orig_size - GZIP_HD_SIZE; + c_stream.next_in = (Byte*) start; + c_stream.avail_in = (uInt) size; + + if (deflate(&c_stream, Z_FINISH) != Z_STREAM_END) { + driver_free_binary(bin); + return NULL; + } + crc = crc32(crc, (unsigned char*)start, size); + ptr = c_stream.next_out; + szIn = c_stream.total_in; + + *ptr++ = (crc & 0xff); crc >>= 8; + *ptr++ = (crc & 0xff); crc >>= 8; + *ptr++ = (crc & 0xff); crc >>= 8; + *ptr++ = (crc & 0xff); crc >>= 8; + + *ptr++ = (szIn & 0xff); szIn >>= 8; + *ptr++ = (szIn & 0xff); szIn >>= 8; + *ptr++ = (szIn & 0xff); szIn >>= 8; + *ptr++ = (szIn & 0xff); szIn >>= 8; + + if (deflateEnd(&c_stream) != Z_OK) { + driver_free_binary(bin); + return NULL; + } + size = ptr - (Byte*)bin->orig_bytes; + + if ((bin2 = driver_realloc_binary(bin, size)) == NULL) + driver_free_binary(bin); + return bin2; +} + diff --git a/erts/emulator/drivers/common/gzio.h b/erts/emulator/drivers/common/gzio.h new file mode 100644 index 0000000000..3f1e546140 --- /dev/null +++ b/erts/emulator/drivers/common/gzio.h @@ -0,0 +1,27 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1999-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +gzFile erts_gzopen (const char *path, const char *mode); +int erts_gzread(gzFile file, voidp buf, unsigned len); +int erts_gzwrite(gzFile file, voidp buf, unsigned len); +int erts_gzseek(gzFile, int, int); +int erts_gzflush(gzFile file, int flush); +int erts_gzclose(gzFile file); +ErlDrvBinary* erts_gzinflate_buffer(char*, uLong); +ErlDrvBinary* erts_gzdeflate_buffer(char*, uLong); diff --git a/erts/emulator/drivers/common/gzio_zutil.h b/erts/emulator/drivers/common/gzio_zutil.h new file mode 100644 index 0000000000..00eccc80fc --- /dev/null +++ b/erts/emulator/drivers/common/gzio_zutil.h @@ -0,0 +1,82 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* A replacement for zlib internal header file zutil.h. + * Just the minimal things that our gzio.c seems to need. + * We don't want to be dependant on some internal header file + * that may change or not exist at all. + */ + +#ifndef HAVE_LIBZ +/* Use our "real" copy of zutil.h if we don't use shared zlib */ +#include "zutil.h" + +#else /* HAVE_LIBZ: Shared zlib is used */ + +#define local static +#define DEF_MEM_LEVEL 8 +#define zmemcpy sys_memcpy + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#ifdef WIN32 +# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ +# define OS_CODE 0x0b +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0f +#endif + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + + +#endif /* HAVE_LIBZ */ + diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c new file mode 100644 index 0000000000..b7b577da5b --- /dev/null +++ b/erts/emulator/drivers/common/inet_drv.c @@ -0,0 +1,9949 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1997-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* If we HAVE_SCTP_H and Solaris, we need to define the following in + order to get SCTP working: +*/ +#if (defined(HAVE_SCTP_H) && defined(__sun) && defined(__SVR4)) +#define SOLARIS10 1 +/* WARNING: This is not quite correct, it may also be Solaris 11! */ +#define _XPG4_2 +#define __EXTENSIONS__ +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/types.h> +#include <errno.h> + +#define IDENTITY(c) c +#define STRINGIFY_1(b) IDENTITY(#b) +#define STRINGIFY(a) STRINGIFY_1(a) + +#ifndef _OSE_ +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_SYS_UIO_H +#include <sys/uio.h> +#endif +#endif + + +/* All platforms fail on malloc errors. */ +#define FATAL_MALLOC + + +#include "erl_driver.h" + +#ifdef __WIN32__ +#define STRNCASECMP strncasecmp + +#define INCL_WINSOCK_API_TYPEDEFS 1 + +#ifndef WINDOWS_H_INCLUDES_WINSOCK2_H +#include <winsock2.h> +#endif +#include <windows.h> + +#include <Ws2tcpip.h> /* NEED VC 6.0 !!! */ + +#undef WANT_NONBLOCKING +#include "sys.h" + +#undef EWOULDBLOCK +#undef ETIMEDOUT + +#define HAVE_MULTICAST_SUPPORT + +#define ERRNO_BLOCK WSAEWOULDBLOCK + +#define EWOULDBLOCK WSAEWOULDBLOCK +#define EINPROGRESS WSAEINPROGRESS +#define EALREADY WSAEALREADY +#define ENOTSOCK WSAENOTSOCK +#define EDESTADDRREQ WSAEDESTADDRREQ +#define EMSGSIZE WSAEMSGSIZE +#define EPROTOTYPE WSAEPROTOTYPE +#define ENOPROTOOPT WSAENOPROTOOPT +#define EPROTONOSUPPORT WSAEPROTONOSUPPORT +#define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT +#define EOPNOTSUPP WSAEOPNOTSUPP +#define EPFNOSUPPORT WSAEPFNOSUPPORT +#define EAFNOSUPPORT WSAEAFNOSUPPORT +#define EADDRINUSE WSAEADDRINUSE +#define EADDRNOTAVAIL WSAEADDRNOTAVAIL +#define ENETDOWN WSAENETDOWN +#define ENETUNREACH WSAENETUNREACH +#define ENETRESET WSAENETRESET +#define ECONNABORTED WSAECONNABORTED +#define ECONNRESET WSAECONNRESET +#define ENOBUFS WSAENOBUFS +#define EISCONN WSAEISCONN +#define ENOTCONN WSAENOTCONN +#define ESHUTDOWN WSAESHUTDOWN +#define ETOOMANYREFS WSAETOOMANYREFS +#define ETIMEDOUT WSAETIMEDOUT +#define ECONNREFUSED WSAECONNREFUSED +#define ELOOP WSAELOOP +#undef ENAMETOOLONG +#define ENAMETOOLONG WSAENAMETOOLONG +#define EHOSTDOWN WSAEHOSTDOWN +#define EHOSTUNREACH WSAEHOSTUNREACH +#undef ENOTEMPTY +#define ENOTEMPTY WSAENOTEMPTY +#define EPROCLIM WSAEPROCLIM +#define EUSERS WSAEUSERS +#define EDQUOT WSAEDQUOT +#define ESTALE WSAESTALE +#define EREMOTE WSAEREMOTE + +#define INVALID_EVENT WSA_INVALID_EVENT + +static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); + +#define sock_open(af, type, proto) \ + make_noninheritable_handle(socket((af), (type), (proto))) +#define sock_close(s) closesocket((s)) +#define sock_shutdown(s, how) shutdown((s), (how)) + +#define sock_accept(s, addr, len) \ + make_noninheritable_handle(accept((s), (addr), (len))) +#define sock_connect(s, addr, len) connect((s), (addr), (len)) +#define sock_listen(s, b) listen((s), (b)) +#define sock_bind(s, addr, len) bind((s), (addr), (len)) +#define sock_getopt(s,t,n,v,l) getsockopt((s),(t),(n),(v),(l)) +#define sock_setopt(s,t,n,v,l) setsockopt((s),(t),(n),(v),(l)) +#define sock_name(s, addr, len) getsockname((s), (addr), (len)) +#define sock_peer(s, addr, len) getpeername((s), (addr), (len)) +#define sock_ntohs(x) ntohs((x)) +#define sock_ntohl(x) ntohl((x)) +#define sock_htons(x) htons((x)) +#define sock_htonl(x) htonl((x)) +#define sock_send(s,buf,len,flag) send((s),(buf),(len),(flag)) +#define sock_sendv(s, vec, size, np, flag) \ + WSASend((s),(WSABUF*)(vec),\ + (size),(np),(flag),NULL,NULL) +#define sock_recv(s,buf,len,flag) recv((s),(buf),(len),(flag)) + +#define sock_recvfrom(s,buf,blen,flag,addr,alen) \ + recvfrom((s),(buf),(blen),(flag),(addr),(alen)) +#define sock_sendto(s,buf,blen,flag,addr,alen) \ + sendto((s),(buf),(blen),(flag),(addr),(alen)) +#define sock_hostname(buf, len) gethostname((buf), (len)) + +#define sock_getservbyname(name,proto) getservbyname((name),(proto)) +#define sock_getservbyport(port,proto) getservbyport((port),(proto)) + +#define sock_errno() WSAGetLastError() +#define sock_create_event(d) WSACreateEvent() +#define sock_close_event(e) WSACloseEvent(e) + +#define sock_select(D, Flags, OnOff) winsock_event_select(D, Flags, OnOff) + +#define SET_BLOCKING(s) ioctlsocket(s, FIONBIO, &zero_value) +#define SET_NONBLOCKING(s) ioctlsocket(s, FIONBIO, &one_value) + + +static unsigned long zero_value = 0; +static unsigned long one_value = 1; + +#else + +#ifdef VXWORKS +#include <sockLib.h> +#include <sys/times.h> +#include <iosLib.h> +#include <taskLib.h> +#include <selectLib.h> +#include <ioLib.h> +#else +#include <sys/time.h> +#ifdef NETDB_H_NEEDS_IN_H +#include <netinet/in.h> +#endif +#include <netdb.h> +#endif + +#ifndef _OSE_ +#include <sys/socket.h> +#include <netinet/in.h> +#else +/* datatypes and macros from Solaris socket.h */ +struct linger { + int l_onoff; /* option on/off */ + int l_linger; /* linger time */ +}; +#define SO_OOBINLINE 0x0100 /* leave received OOB data in line */ +#define SO_LINGER 0x0080 /* linger on close if data present */ +#endif + +#ifdef VXWORKS +#include <rpc/rpctypes.h> +#endif +#ifdef DEF_INADDR_LOOPBACK_IN_RPC_TYPES_H +#include <rpc/types.h> +#endif + +#ifndef _OSE_ +#include <netinet/tcp.h> +#include <arpa/inet.h> +#endif + +#if (!defined(VXWORKS) && !defined(_OSE_)) +#include <sys/param.h> +#ifdef HAVE_ARPA_NAMESER_H +#include <arpa/nameser.h> +#endif +#endif + +#ifdef HAVE_SYS_SOCKIO_H +#include <sys/sockio.h> +#endif + +#ifdef HAVE_SYS_IOCTL_H +#include <sys/ioctl.h> +#endif + +#ifndef _OSE_ +#include <net/if.h> +#else +#define IFF_MULTICAST 0x00000800 +#endif + +#ifdef _OSE_ +#include "inet.h" +#include "ineterr.h" +#include "ose_inet_drv.h" +#include "nameser.h" +#include "resolv.h" +#define SET_ASYNC(s) setsockopt((s), SOL_SOCKET, SO_OSEEVENT, (&(s)), sizeof(int)) + +extern void select_release(void); + +#endif /* _OSE_ */ + +/* Solaris headers, only to be used with SFK */ +#ifdef _OSE_SFK_ +#include <ctype.h> +#include <string.h> +#endif + +/* SCTP support -- currently for UNIX platforms only: */ +#undef HAVE_SCTP +#if (!defined(VXWORKS) && !defined(_OSE_) && !defined(__WIN32__) && defined(HAVE_SCTP_H)) + +#include <netinet/sctp.h> + +/* SCTP Socket API Draft from version 11 on specifies that netinet/sctp.h must + explicitly define HAVE_SCTP in case when SCTP is supported, but Solaris 10 + still apparently uses Draft 10, and does not define that symbol, so we have + to define it explicitly: +*/ +#ifndef HAVE_SCTP +# define HAVE_SCTP +#endif + +/* These changed in draft 11, so SOLARIS10 uses the old MSG_* */ +#if ! HAVE_DECL_SCTP_UNORDERED +# define SCTP_UNORDERED MSG_UNORDERED +#endif +#if ! HAVE_DECL_SCTP_ADDR_OVER +# define SCTP_ADDR_OVER MSG_ADDR_OVER +#endif +#if ! HAVE_DECL_SCTP_ABORT +# define SCTP_ABORT MSG_ABORT +#endif +#if ! HAVE_DECL_SCTP_EOF +# define SCTP_EOF MSG_EOF +#endif + +/* New spelling in lksctp 2.6.22 or maybe even earlier: + * adaption -> adaptation + */ +#if !defined(SCTP_ADAPTATION_LAYER) && defined (SCTP_ADAPTION_LAYER) +# define SCTP_ADAPTATION_LAYER SCTP_ADAPTION_LAYER +# define SCTP_ADAPTATION_INDICATION SCTP_ADAPTION_INDICATION +# define sctp_adaptation_event sctp_adaption_event +# define sctp_setadaptation sctp_setadaption +# define sn_adaptation_event sn_adaption_event +# define sai_adaptation_ind sai_adaption_ind +# define ssb_adaptation_ind ssb_adaption_ind +# define sctp_adaptation_layer_event sctp_adaption_layer_event +#endif + +static void *h_libsctp = NULL; +#ifdef __GNUC__ +static typeof(sctp_bindx) *p_sctp_bindx = NULL; +#else +static int (*p_sctp_bindx)(int sd, struct sockaddr *addrs, + int addrcnt, int flags) = NULL; +#endif + +#endif /* SCTP supported */ + +#ifndef WANT_NONBLOCKING +#define WANT_NONBLOCKING +#endif +#include "sys.h" + +/* #define INET_DRV_DEBUG 1 */ +#ifdef INET_DRV_DEBUG +#define DEBUG 1 +#undef DEBUGF +#define DEBUGF(X) printf X +#endif + +#if !defined(__WIN32__) && !defined(HAVE_STRNCASECMP) +#define STRNCASECMP my_strncasecmp + +static int my_strncasecmp(const char *s1, const char *s2, size_t n) +{ + int i; + + for (i=0;i<n-1 && s1[i] && s2[i] && toupper(s1[i]) == toupper(s2[i]);++i) + ; + return (toupper(s1[i]) - toupper(s2[i])); +} + + +#else +#define STRNCASECMP strncasecmp +#endif + +#define INVALID_SOCKET -1 +#define INVALID_EVENT -1 +#define SOCKET_ERROR -1 +#define SOCKET int +#define HANDLE long int +#define FD_READ ERL_DRV_READ +#define FD_WRITE ERL_DRV_WRITE +#define FD_CLOSE 0 +#define FD_CONNECT ERL_DRV_WRITE +#define FD_ACCEPT ERL_DRV_READ + +#define sock_connect(s, addr, len) connect((s), (addr), (len)) +#define sock_listen(s, b) listen((s), (b)) +#define sock_bind(s, addr, len) bind((s), (addr), (len)) +#ifdef VXWORKS +#define sock_getopt(s,t,n,v,l) wrap_sockopt(&getsockopt,\ + s,t,n,v,(unsigned int)(l)) +#define sock_setopt(s,t,n,v,l) wrap_sockopt(&setsockopt,\ + s,t,n,v,(unsigned int)(l)) +#else +#define sock_getopt(s,t,n,v,l) getsockopt((s),(t),(n),(v),(l)) +#define sock_setopt(s,t,n,v,l) setsockopt((s),(t),(n),(v),(l)) +#endif +#define sock_name(s, addr, len) getsockname((s), (addr), (len)) +#define sock_peer(s, addr, len) getpeername((s), (addr), (len)) +#define sock_ntohs(x) ntohs((x)) +#define sock_ntohl(x) ntohl((x)) +#define sock_htons(x) htons((x)) +#define sock_htonl(x) htonl((x)) + +#ifdef _OSE_ +#define sock_accept(s, addr, len) ose_inet_accept((s), (addr), (len)) +#define sock_send(s,buf,len,flag) ose_inet_send((s),(buf),(len),(flag)) +#define sock_sendto(s,buf,blen,flag,addr,alen) \ + ose_inet_sendto((s),(buf),(blen),(flag),(addr),(alen)) +#define sock_sendv(s, vec, size, np, flag) \ + (*(np) = ose_inet_sendv((s), (SysIOVec*)(vec), (size))) +#define sock_open(af, type, proto) ose_inet_socket((af), (type), (proto)) +#define sock_close(s) ose_inet_close((s)) +#define sock_hostname(buf, len) ose_gethostname((buf), (len)) +#define sock_getservbyname(name,proto) ose_getservbyname((name), (proto)) +#define sock_getservbyport(port,proto) ose_getservbyport((port), (proto)) + +#else +#define sock_accept(s, addr, len) accept((s), (addr), (len)) +#define sock_send(s,buf,len,flag) send((s),(buf),(len),(flag)) +#define sock_sendto(s,buf,blen,flag,addr,alen) \ + sendto((s),(buf),(blen),(flag),(addr),(alen)) +#define sock_sendv(s, vec, size, np, flag) \ + (*(np) = writev((s), (struct iovec*)(vec), (size))) +#define sock_sendmsg(s,msghdr,flag) sendmsg((s),(msghdr),(flag)) + +#define sock_open(af, type, proto) socket((af), (type), (proto)) +#define sock_close(s) close((s)) +#define sock_shutdown(s, how) shutdown((s), (how)) + +#define sock_hostname(buf, len) gethostname((buf), (len)) +#define sock_getservbyname(name,proto) getservbyname((name), (proto)) +#define sock_getservbyport(port,proto) getservbyport((port), (proto)) +#endif /* _OSE_ */ + +#define sock_recv(s,buf,len,flag) recv((s),(buf),(len),(flag)) +#define sock_recvfrom(s,buf,blen,flag,addr,alen) \ + recvfrom((s),(buf),(blen),(flag),(addr),(alen)) +#define sock_recvmsg(s,msghdr,flag) recvmsg((s),(msghdr),(flag)) + +#define sock_errno() errno +#define sock_create_event(d) ((d)->s) /* return file descriptor */ +#define sock_close_event(e) /* do nothing */ + +#ifdef _OSE_ +#define inet_driver_select(port, e, mode, on) \ + ose_inet_select(port, e, mode, on) +#else +#define inet_driver_select(port, e, mode, on) \ + driver_select(port, e, mode | (on?ERL_DRV_USE:0), on) +#endif /* _OSE_ */ + +#define sock_select(d, flags, onoff) do { \ + (d)->event_mask = (onoff) ? \ + ((d)->event_mask | (flags)) : \ + ((d)->event_mask & ~(flags)); \ + DEBUGF(("sock_select(%ld): flags=%02X, onoff=%d, event_mask=%02lX\r\n", \ + (long) (d)->port, (flags), (onoff), (unsigned long) (d)->event_mask)); \ + inet_driver_select((d)->port, (ErlDrvEvent)(long)(d)->event, (flags), (onoff)); \ + } while(0) + + +#endif /* __WIN32__ */ + +#include "packet_parser.h" + +#define get_int24(s) ((((unsigned char*) (s))[0] << 16) | \ + (((unsigned char*) (s))[1] << 8) | \ + (((unsigned char*) (s))[2])) + +#define get_little_int32(s) ((((unsigned char*) (s))[3] << 24) | \ + (((unsigned char*) (s))[2] << 16) | \ + (((unsigned char*) (s))[1] << 8) | \ + (((unsigned char*) (s))[0])) + +/*---------------------------------------------------------------------------- +** Interface constants. +** +** This section must be "identical" to the corresponding inet_int.hrl +*/ + +/* general address encode/decode tag */ +#define INET_AF_INET 1 +#define INET_AF_INET6 2 +#define INET_AF_ANY 3 /* INADDR_ANY or IN6ADDR_ANY_INIT */ +#define INET_AF_LOOPBACK 4 /* INADDR_LOOPBACK or IN6ADDR_LOOPBACK_INIT */ + +/* INET_REQ_GETTYPE enumeration */ +#define INET_TYPE_STREAM 1 +#define INET_TYPE_DGRAM 2 +#define INET_TYPE_SEQPACKET 3 + +/* INET_LOPT_MODE options */ +#define INET_MODE_LIST 0 +#define INET_MODE_BINARY 1 + +/* INET_LOPT_DELIVER options */ +#define INET_DELIVER_PORT 0 +#define INET_DELIVER_TERM 1 + +/* INET_LOPT_ACTIVE options */ +#define INET_PASSIVE 0 /* false */ +#define INET_ACTIVE 1 /* true */ +#define INET_ONCE 2 /* true; active once then passive */ + +/* INET_REQ_GETSTATUS enumeration */ +#define INET_F_OPEN 0x0001 +#define INET_F_BOUND 0x0002 +#define INET_F_ACTIVE 0x0004 +#define INET_F_LISTEN 0x0008 +#define INET_F_CON 0x0010 +#define INET_F_ACC 0x0020 +#define INET_F_LST 0x0040 +#define INET_F_BUSY 0x0080 +#define INET_F_MULTI_CLIENT 0x0100 /* Multiple clients for one descriptor, i.e. multi-accept */ + +/* One numberspace for *_REC_* so if an e.g UDP request is issued +** for a TCP socket, the driver can protest. +*/ +#define INET_REQ_OPEN 1 +#define INET_REQ_CLOSE 2 +#define INET_REQ_CONNECT 3 +#define INET_REQ_PEER 4 +#define INET_REQ_NAME 5 +#define INET_REQ_BIND 6 +#define INET_REQ_SETOPTS 7 +#define INET_REQ_GETOPTS 8 +/* #define INET_REQ_GETIX 9 NOT USED ANY MORE */ +/* #define INET_REQ_GETIF 10 REPLACE BY NEW STUFF */ +#define INET_REQ_GETSTAT 11 +#define INET_REQ_GETHOSTNAME 12 +#define INET_REQ_FDOPEN 13 +#define INET_REQ_GETFD 14 +#define INET_REQ_GETTYPE 15 +#define INET_REQ_GETSTATUS 16 +#define INET_REQ_GETSERVBYNAME 17 +#define INET_REQ_GETSERVBYPORT 18 +#define INET_REQ_SETNAME 19 +#define INET_REQ_SETPEER 20 +#define INET_REQ_GETIFLIST 21 +#define INET_REQ_IFGET 22 +#define INET_REQ_IFSET 23 +#define INET_REQ_SUBSCRIBE 24 +/* TCP requests */ +#define TCP_REQ_ACCEPT 40 +#define TCP_REQ_LISTEN 41 +#define TCP_REQ_RECV 42 +#define TCP_REQ_UNRECV 43 +#define TCP_REQ_SHUTDOWN 44 +#define TCP_REQ_MULTI_OP 45 +/* UDP and SCTP requests */ +#define PACKET_REQ_RECV 60 /* Common for UDP and SCTP */ +#define SCTP_REQ_LISTEN 61 /* Different from TCP; not for UDP */ +#define SCTP_REQ_BINDX 62 /* Multi-home SCTP bind */ + +/* INET_REQ_SUBSCRIBE sub-requests */ +#define INET_SUBS_EMPTY_OUT_Q 1 + +/* TCP additional flags */ +#define TCP_ADDF_DELAY_SEND 1 +#define TCP_ADDF_CLOSE_SENT 2 /* Close sent (active mode only) */ +#define TCP_ADDF_DELAYED_CLOSE_RECV 4 /* If receive fails, report {error,closed} (passive mode) */ +#define TCP_ADDF_DELAYED_CLOSE_SEND 8 /* If send fails, report {error,closed} (passive mode) */ + +/* *_REQ_* replies */ +#define INET_REP_ERROR 0 +#define INET_REP_OK 1 +#define INET_REP_SCTP 2 + +/* INET_REQ_SETOPTS and INET_REQ_GETOPTS options */ +#define INET_OPT_REUSEADDR 0 /* enable/disable local address reuse */ +#define INET_OPT_KEEPALIVE 1 /* enable/disable keep connections alive */ +#define INET_OPT_DONTROUTE 2 /* enable/disable routing for messages */ +#define INET_OPT_LINGER 3 /* linger on close if data is present */ +#define INET_OPT_BROADCAST 4 /* enable/disable transmission of broadcast */ +#define INET_OPT_OOBINLINE 5 /* enable/disable out-of-band data in band */ +#define INET_OPT_SNDBUF 6 /* set send buffer size */ +#define INET_OPT_RCVBUF 7 /* set receive buffer size */ +#define INET_OPT_PRIORITY 8 /* set priority */ +#define INET_OPT_TOS 9 /* Set type of service */ +#define TCP_OPT_NODELAY 10 /* don't delay send to coalesce packets */ +#define UDP_OPT_MULTICAST_IF 11 /* set/get IP multicast interface */ +#define UDP_OPT_MULTICAST_TTL 12 /* set/get IP multicast timetolive */ +#define UDP_OPT_MULTICAST_LOOP 13 /* set/get IP multicast loopback */ +#define UDP_OPT_ADD_MEMBERSHIP 14 /* add an IP group membership */ +#define UDP_OPT_DROP_MEMBERSHIP 15 /* drop an IP group membership */ +/* LOPT is local options */ +#define INET_LOPT_BUFFER 20 /* min buffer size hint */ +#define INET_LOPT_HEADER 21 /* list header size */ +#define INET_LOPT_ACTIVE 22 /* enable/disable active receive */ +#define INET_LOPT_PACKET 23 /* packet header type (TCP) */ +#define INET_LOPT_MODE 24 /* list or binary mode */ +#define INET_LOPT_DELIVER 25 /* port or term delivery */ +#define INET_LOPT_EXITONCLOSE 26 /* exit port on active close or not ! */ +#define INET_LOPT_TCP_HIWTRMRK 27 /* set local high watermark */ +#define INET_LOPT_TCP_LOWTRMRK 28 /* set local low watermark */ +#define INET_LOPT_BIT8 29 /* set 8 bit detection */ +#define INET_LOPT_TCP_SEND_TIMEOUT 30 /* set send timeout */ +#define INET_LOPT_TCP_DELAY_SEND 31 /* Delay sends until next poll */ +#define INET_LOPT_PACKET_SIZE 32 /* Max packet size */ +#define INET_LOPT_UDP_READ_PACKETS 33 /* Number of packets to read */ +#define INET_OPT_RAW 34 /* Raw socket options */ +#define INET_LOPT_TCP_SEND_TIMEOUT_CLOSE 35 /* auto-close on send timeout or not */ +/* SCTP options: a separate range, from 100: */ +#define SCTP_OPT_RTOINFO 100 +#define SCTP_OPT_ASSOCINFO 101 +#define SCTP_OPT_INITMSG 102 +#define SCTP_OPT_AUTOCLOSE 103 +#define SCTP_OPT_NODELAY 104 +#define SCTP_OPT_DISABLE_FRAGMENTS 105 +#define SCTP_OPT_I_WANT_MAPPED_V4_ADDR 106 +#define SCTP_OPT_MAXSEG 107 +#define SCTP_OPT_SET_PEER_PRIMARY_ADDR 108 +#define SCTP_OPT_PRIMARY_ADDR 109 +#define SCTP_OPT_ADAPTATION_LAYER 110 +#define SCTP_OPT_PEER_ADDR_PARAMS 111 +#define SCTP_OPT_DEFAULT_SEND_PARAM 112 +#define SCTP_OPT_EVENTS 113 +#define SCTP_OPT_DELAYED_ACK_TIME 114 +#define SCTP_OPT_STATUS 115 +#define SCTP_OPT_GET_PEER_ADDR_INFO 116 + +/* INET_REQ_IFGET and INET_REQ_IFSET options */ +#define INET_IFOPT_ADDR 1 +#define INET_IFOPT_BROADADDR 2 +#define INET_IFOPT_DSTADDR 3 +#define INET_IFOPT_MTU 4 +#define INET_IFOPT_NETMASK 5 +#define INET_IFOPT_FLAGS 6 +#define INET_IFOPT_HWADDR 7 + +/* INET_LOPT_BIT8 options */ +#define INET_BIT8_CLEAR 0 +#define INET_BIT8_SET 1 +#define INET_BIT8_ON 2 +#define INET_BIT8_OFF 3 + +/* INET_REQ_GETSTAT enumeration */ +#define INET_STAT_RECV_CNT 1 +#define INET_STAT_RECV_MAX 2 +#define INET_STAT_RECV_AVG 3 +#define INET_STAT_RECV_DVI 4 +#define INET_STAT_SEND_CNT 5 +#define INET_STAT_SEND_MAX 6 +#define INET_STAT_SEND_AVG 7 +#define INET_STAT_SEND_PND 8 +#define INET_STAT_RECV_OCT 9 /* received octets */ +#define INET_STAT_SEND_OCT 10 /* sent octets */ + +/* INET_IFOPT_FLAGS enumeration */ +#define INET_IFF_UP 0x0001 +#define INET_IFF_BROADCAST 0x0002 +#define INET_IFF_LOOPBACK 0x0004 +#define INET_IFF_POINTTOPOINT 0x0008 +#define INET_IFF_RUNNING 0x0010 +#define INET_IFF_MULTICAST 0x0020 +/* Complement flags for turning them off */ +#define INET_IFF_DOWN 0x0100 +#define INET_IFF_NBROADCAST 0x0200 +/* #define INET_IFF_NLOOPBACK 0x0400 */ +#define INET_IFF_NPOINTTOPOINT 0x0800 +/* #define INET_IFF_NRUNNING 0x1000 */ +/* #define INET_IFF_NMULTICAST 0x2000 */ + +/* Flags for "sctp_sndrcvinfo". Used in a bitmask -- must be powers of 2: +** INET_REQ_SETOPTS:SCTP_OPT_DEFAULT_SEND_PARAM +*/ +#define SCTP_FLAG_UNORDERED (1 /* am_unordered */) +#define SCTP_FLAG_ADDR_OVER (2 /* am_addr_over */) +#define SCTP_FLAG_ABORT (4 /* am_abort */) +#define SCTP_FLAG_EOF (8 /* am_eof */) +#define SCTP_FLAG_SNDALL (16 /* am_sndall, NOT YET IMPLEMENTED */) + +/* Flags for "sctp_set_opts" (actually for SCTP_OPT_PEER_ADDR_PARAMS). +** These flags are also used in a bitmask, so they must be powers of 2: +*/ +#define SCTP_FLAG_HB_ENABLE (1 /* am_hb_enable */) +#define SCTP_FLAG_HB_DISABLE (2 /* am_hb_disable */) +#define SCTP_FLAG_HB_DEMAND (4 /* am_hb_demand */) +#define SCTP_FLAG_PMTUD_ENABLE (8 /* am_pmtud_enable */) +#define SCTP_FLAG_PMTUD_DISABLE (16 /* am_pmtud_disable */) +#define SCTP_FLAG_SACDELAY_ENABLE (32 /* am_sackdelay_enable */) +#define SCTP_FLAG_SACDELAY_DISABLE (64 /* am_sackdelay_disable */) + +/* +** End of interface constants. +**--------------------------------------------------------------------------*/ + +#define INET_STATE_CLOSED 0 +#define INET_STATE_OPEN (INET_F_OPEN) +#define INET_STATE_BOUND (INET_STATE_OPEN | INET_F_BOUND) +#define INET_STATE_CONNECTED (INET_STATE_BOUND | INET_F_ACTIVE) + +#define IS_OPEN(d) \ + (((d)->state & INET_F_OPEN) == INET_F_OPEN) + +#define IS_BOUND(d) \ + (((d)->state & INET_F_BOUND) == INET_F_BOUND) + +#define IS_CONNECTED(d) \ + (((d)->state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) + +#define IS_CONNECTING(d) \ + (((d)->state & INET_F_CON) == INET_F_CON) + +#define IS_BUSY(d) \ + (((d)->state & INET_F_BUSY) == INET_F_BUSY) + +#define INET_DEF_BUFFER 1460 /* default buffer size */ +#define INET_MIN_BUFFER 1 /* internal min buffer */ +#define INET_MAX_BUFFER (1024*64) /* internal max buffer */ + +/* Note: INET_HIGH_WATERMARK MUST be less than 2*INET_MAX_BUFFER */ +#define INET_HIGH_WATERMARK (1024*8) /* 8k pending high => busy */ +/* Note: INET_LOW_WATERMARK MUST be less than INET_MAX_BUFFER and +** less than INET_HIGH_WATERMARK +*/ +#define INET_LOW_WATERMARK (1024*4) /* 4k pending => allow more */ + +#define INET_INFINITY 0xffffffff /* infinity value */ + +#define INET_MAX_ASYNC 1 /* max number of async queue ops */ + +/* INET_LOPT_UDP_PACKETS */ +#define INET_PACKET_POLL 5 /* maximum number of packets to poll */ + +/* Max interface name */ +#define INET_IFNAMSIZ 16 + +/* Max length of Erlang Term Buffer (for outputting structured terms): */ +#ifdef HAVE_SCTP +#define PACKET_ERL_DRV_TERM_DATA_LEN 512 +#else +#define PACKET_ERL_DRV_TERM_DATA_LEN 32 +#endif + + +#define BIN_REALLOC_LIMIT(x) (((x)*3)/4) /* 75% */ + +/* The general purpose sockaddr */ +typedef union { + struct sockaddr sa; + struct sockaddr_in sai; +#ifdef HAVE_IN6 + struct sockaddr_in6 sai6; +#endif +} inet_address; + + +/* for AF_INET & AF_INET6 */ +#define inet_address_port(x) ((x)->sai.sin_port) + +#if defined(HAVE_IN6) && defined(AF_INET6) +#define addrlen(family) \ + ((family == AF_INET) ? sizeof(struct in_addr) : \ + ((family == AF_INET6) ? sizeof(struct in6_addr) : 0)) +#else +#define addrlen(family) \ + ((family == AF_INET) ? sizeof(struct in_addr) : 0) +#endif + +typedef struct _multi_timer_data { + ErlDrvNowData when; + ErlDrvTermData caller; + void (*timeout_function)(ErlDrvData drv_data, ErlDrvTermData caller); + struct _multi_timer_data *next; + struct _multi_timer_data *prev; +} MultiTimerData; + +static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, + ErlDrvTermData caller, unsigned timeout, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)); +static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, + ErlDrvData data); +static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p); + +static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller); +static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port); + +typedef struct { + int id; /* id used to identify reply */ + ErlDrvTermData caller; /* recipient of async reply */ + int req; /* Request id (CONNECT/ACCEPT/RECV) */ + union { + unsigned value; /* Request timeout (since op issued,not started) */ + MultiTimerData *mtd; + } tmo; + ErlDrvMonitor monitor; +} inet_async_op; + +typedef struct inet_async_multi_op_ { + inet_async_op op; + struct inet_async_multi_op_ *next; +} inet_async_multi_op; + + +typedef struct subs_list_ { + ErlDrvTermData subscriber; + struct subs_list_ *next; +} subs_list; + +#define NO_PROCESS 0 +#define NO_SUBSCRIBERS(SLP) ((SLP)->subscriber == NO_PROCESS) +static void send_to_subscribers(ErlDrvPort, subs_list *, int, + ErlDrvTermData [], int); +static void free_subscribers(subs_list*); +static int save_subscriber(subs_list *, ErlDrvTermData); + +typedef struct { + SOCKET s; /* the socket or INVALID_SOCKET if not open */ + HANDLE event; /* Event handle (same as s in unix) */ + long event_mask; /* current FD events */ +#ifdef __WIN32__ + long forced_events; /* Mask of events that are forcefully signalled + on windows see winsock_event_select + for details */ + int send_would_block; /* Last send attempt failed with "WOULDBLOCK" */ +#endif + ErlDrvPort port; /* the port identifier */ + ErlDrvTermData dport; /* the port identifier as DriverTermData */ + int state; /* status */ + int prebound; /* only set when opened with inet_fdopen */ + int mode; /* BINARY | LIST + (affect how to interpret hsz) */ + int exitf; /* exit port on close or not */ + int bit8f; /* check if data has bit number 7 set */ + int deliver; /* Delivery mode, TERM or PORT */ + + ErlDrvTermData caller; /* recipient of sync reply */ + ErlDrvTermData busy_caller; /* recipient of sync reply when caller busy. + * Only valid while INET_F_BUSY. */ + + inet_async_op* oph; /* queue head or NULL */ + inet_async_op* opt; /* queue tail or NULL */ + inet_async_op op_queue[INET_MAX_ASYNC]; /* call queue */ + + int active; /* 0 = passive, 1 = active, 2 = active once */ + int stype; /* socket type: + SOCK_STREAM/SOCK_DGRAM/SOCK_SEQPACKET */ + int sprotocol; /* socket protocol: + IPPROTO_TCP|IPPROTO_UDP|IPPROTO_SCTP */ + int sfamily; /* address family */ + enum PacketParseType htype; /* header type (TCP only?) */ + unsigned int psize; /* max packet size (TCP only?) */ + int bit8; /* set if bit8f==true and data some data + seen had the 7th bit set */ + inet_address remote; /* remote address for connected sockets */ + inet_address peer_addr; /* fake peer address */ + inet_address name_addr; /* fake local address */ + + inet_address* peer_ptr; /* fake peername or NULL */ + inet_address* name_ptr; /* fake sockname or NULL */ + + int bufsz; /* minimum buffer constraint */ + unsigned int hsz; /* the list header size, -1 is large !!! */ + /* statistics */ + unsigned long recv_oct[2]; /* number of received octets >= 64 bits */ + unsigned long recv_cnt; /* number of packets received */ + unsigned long recv_max; /* maximum packet size received */ + double recv_avg; /* average packet size received */ + double recv_dvi; /* avarage deviation from avg_size */ + unsigned long send_oct[2]; /* number of octets sent >= 64 bits */ + unsigned long send_cnt; /* number of packets sent */ + unsigned long send_max; /* maximum packet send */ + double send_avg; /* average packet size sent */ + + subs_list empty_out_q_subs; /* Empty out queue subscribers */ +} inet_descriptor; + + + +#define TCP_STATE_CLOSED INET_STATE_CLOSED +#define TCP_STATE_OPEN (INET_F_OPEN) +#define TCP_STATE_BOUND (TCP_STATE_OPEN | INET_F_BOUND) +#define TCP_STATE_CONNECTED (TCP_STATE_BOUND | INET_F_ACTIVE) +#define TCP_STATE_LISTEN (TCP_STATE_BOUND | INET_F_LISTEN) +#define TCP_STATE_CONNECTING (TCP_STATE_BOUND | INET_F_CON) +#define TCP_STATE_ACCEPTING (TCP_STATE_LISTEN | INET_F_ACC) +#define TCP_STATE_MULTI_ACCEPTING (TCP_STATE_ACCEPTING | INET_F_MULTI_CLIENT) + + +#define TCP_MAX_PACKET_SIZE 0x4000000 /* 64 M */ + +#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O + * vector sock_sendv(). + */ + +static int tcp_inet_init(void); +static void tcp_inet_stop(ErlDrvData); +static void tcp_inet_command(ErlDrvData, char*, int); +static void tcp_inet_commandv(ErlDrvData, ErlIOVec*); +static void tcp_inet_flush(ErlDrvData drv_data); +static void tcp_inet_drv_input(ErlDrvData, ErlDrvEvent); +static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event); +static ErlDrvData tcp_inet_start(ErlDrvPort, char* command); +static int tcp_inet_ctl(ErlDrvData, unsigned int, char*, int, char**, int); +static void tcp_inet_timeout(ErlDrvData); +static void tcp_inet_process_exit(ErlDrvData, ErlDrvMonitor *); +static void inet_stop_select(ErlDrvEvent, void*); +#ifdef __WIN32__ +static void tcp_inet_event(ErlDrvData, ErlDrvEvent); +static void find_dynamic_functions(void); +#endif + +static struct erl_drv_entry tcp_inet_driver_entry = +{ + tcp_inet_init, /* inet_init will add this driver !! */ + tcp_inet_start, + tcp_inet_stop, + tcp_inet_command, +#ifdef __WIN32__ + tcp_inet_event, + NULL, +#else + tcp_inet_drv_input, + tcp_inet_drv_output, +#endif + "tcp_inet", + NULL, + NULL, + tcp_inet_ctl, + tcp_inet_timeout, + tcp_inet_commandv, + NULL, + tcp_inet_flush, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING|ERL_DRV_FLAG_SOFT_BUSY, + NULL, + tcp_inet_process_exit, + inet_stop_select +}; + +#define PACKET_STATE_CLOSED INET_STATE_CLOSED +#define PACKET_STATE_OPEN (INET_F_OPEN) +#define PACKET_STATE_BOUND (PACKET_STATE_OPEN | INET_F_BOUND) +#define SCTP_STATE_LISTEN (PACKET_STATE_BOUND | INET_F_LISTEN) +#define SCTP_STATE_CONNECTING (PACKET_STATE_BOUND | INET_F_CON) +#define PACKET_STATE_CONNECTED (PACKET_STATE_BOUND | INET_F_ACTIVE) + + +static int packet_inet_init(void); +static void packet_inet_stop(ErlDrvData); +static void packet_inet_command(ErlDrvData, char*, int); +static void packet_inet_drv_input(ErlDrvData data, ErlDrvEvent event); +static void packet_inet_drv_output(ErlDrvData data, ErlDrvEvent event); +static ErlDrvData udp_inet_start(ErlDrvPort, char* command); +#ifdef HAVE_SCTP +static ErlDrvData sctp_inet_start(ErlDrvPort, char* command); +#endif +static int packet_inet_ctl(ErlDrvData, unsigned int, char*, + int, char**, int); +static void packet_inet_timeout(ErlDrvData); +#ifdef __WIN32__ +static void packet_inet_event(ErlDrvData, ErlDrvEvent); +static SOCKET make_noninheritable_handle(SOCKET s); +static int winsock_event_select(inet_descriptor *, int, int); +#endif + +static struct erl_drv_entry udp_inet_driver_entry = +{ + packet_inet_init, /* inet_init will add this driver !! */ + udp_inet_start, + packet_inet_stop, + packet_inet_command, +#ifdef __WIN32__ + packet_inet_event, + NULL, +#else + packet_inet_drv_input, + packet_inet_drv_output, +#endif + "udp_inet", + NULL, + NULL, + packet_inet_ctl, + packet_inet_timeout, + NULL, + NULL, + NULL, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, + NULL, + inet_stop_select +}; + +#ifdef HAVE_SCTP +static struct erl_drv_entry sctp_inet_driver_entry = +{ + packet_inet_init, /* inet_init will add this driver !! */ + sctp_inet_start, + packet_inet_stop, + packet_inet_command, +#ifdef __WIN32__ + packet_inet_event, + NULL, +#else + packet_inet_drv_input, + packet_inet_drv_output, +#endif + "sctp_inet", + NULL, + NULL, + packet_inet_ctl, + packet_inet_timeout, + NULL, + NULL, + NULL, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, + NULL, /* process_exit */ + inet_stop_select +}; +#endif + +typedef struct { + inet_descriptor inet; /* common data structure (DON'T MOVE) */ + int high; /* high watermark */ + int low; /* low watermark */ + int send_timeout; /* timeout to use in send */ + int send_timeout_close; /* auto-close socket on send_timeout */ + int busy_on_send; /* busy on send with timeout! */ + int i_bufsz; /* current input buffer size (<= bufsz) */ + ErlDrvBinary* i_buf; /* current binary buffer */ + char* i_ptr; /* current pos in buf */ + char* i_ptr_start; /* packet start pos in buf */ + int i_remain; /* remaining chars to read */ + int tcp_add_flags;/* Additional TCP descriptor flags */ + int http_state; /* 0 = response|request 1=headers fields */ + inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */ + inet_async_multi_op *multi_last; + MultiTimerData *mtd; /* Timer structures for multiple accept */ +} tcp_descriptor; + +/* send function */ +static int tcp_send(tcp_descriptor* desc, char* ptr, int len); +static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev); +static int tcp_recv(tcp_descriptor* desc, int request_len); +static int tcp_deliver(tcp_descriptor* desc, int len); + +static int tcp_inet_output(tcp_descriptor* desc, HANDLE event); +static int tcp_inet_input(tcp_descriptor* desc, HANDLE event); + +typedef struct { + inet_descriptor inet; /* common data structure (DON'T MOVE) */ + int read_packets; /* Number of packets to read per invocation */ +} udp_descriptor; + + +static int packet_inet_input(udp_descriptor* udesc, HANDLE event); +static int packet_inet_output(udp_descriptor* udesc, HANDLE event); + +/* convert descriptor poiner to inet_descriptor pointer */ +#define INETP(d) (&(d)->inet) + +static int async_ref = 0; /* async reference id generator */ +#define NEW_ASYNC_ID() ((async_ref++) & 0xffff) + + +static ErlDrvTermData am_ok; +static ErlDrvTermData am_tcp; +static ErlDrvTermData am_udp; +static ErlDrvTermData am_error; +static ErlDrvTermData am_inet_async; +static ErlDrvTermData am_inet_reply; +static ErlDrvTermData am_timeout; +static ErlDrvTermData am_closed; +static ErlDrvTermData am_tcp_closed; +static ErlDrvTermData am_tcp_error; +static ErlDrvTermData am_udp_error; +static ErlDrvTermData am_empty_out_q; +static ErlDrvTermData am_ssl_tls; +#ifdef HAVE_SCTP +static ErlDrvTermData am_sctp; +static ErlDrvTermData am_sctp_error; +static ErlDrvTermData am_true; +static ErlDrvTermData am_false; +static ErlDrvTermData am_buffer; +static ErlDrvTermData am_mode; +static ErlDrvTermData am_list; +static ErlDrvTermData am_binary; +static ErlDrvTermData am_active; +static ErlDrvTermData am_once; +static ErlDrvTermData am_buffer; +static ErlDrvTermData am_linger; +static ErlDrvTermData am_recbuf; +static ErlDrvTermData am_sndbuf; +static ErlDrvTermData am_reuseaddr; +static ErlDrvTermData am_dontroute; +static ErlDrvTermData am_priority; +static ErlDrvTermData am_tos; +#endif + +/* speical errors for bad ports and sequences */ +#define EXBADPORT "exbadport" +#define EXBADSEQ "exbadseq" + + +static int inet_init(void); +static int ctl_reply(int, char*, int, char**, int); + +struct erl_drv_entry inet_driver_entry = +{ + inet_init, /* inet_init will add TCP, UDP and SCTP drivers */ + NULL, /* start */ + NULL, /* stop */ + NULL, /* output */ + NULL, /* ready_input */ + NULL, /* ready_output */ + "inet" +}; + +/* XXX: is this a driver interface function ??? */ +extern void erl_exit(int n, char*, _DOTS_); + +/* + * Malloc wrapper, + * we would like to change the behaviour for different + * systems here. + */ + +#ifdef FATAL_MALLOC + +static void *alloc_wrapper(size_t size){ + void *ret = driver_alloc(size); + if(ret == NULL) + erl_exit(1,"Out of virtual memory in malloc (%s)", __FILE__); + return ret; +} +#define ALLOC(X) alloc_wrapper(X) + +static void *realloc_wrapper(void *current, size_t size){ + void *ret = driver_realloc(current,size); + if(ret == NULL) + erl_exit(1,"Out of virtual memory in realloc (%s)", __FILE__); + return ret; +} +#define REALLOC(X,Y) realloc_wrapper(X,Y) +#define FREE(P) driver_free((P)) +#else /* FATAL_MALLOC */ + +#define ALLOC(X) driver_alloc((X)) +#define REALLOC(X,Y) driver_realloc((X), (Y)) +#define FREE(P) driver_free((P)) + +#endif /* FATAL_MALLOC */ + +#define INIT_ATOM(NAME) am_ ## NAME = driver_mk_atom(#NAME) + +#define LOAD_ATOM_CNT 2 +#define LOAD_ATOM(vec, i, atom) \ + (((vec)[(i)] = ERL_DRV_ATOM), \ + ((vec)[(i)+1] = (atom)), \ + ((i)+LOAD_ATOM_CNT)) + +#define LOAD_INT_CNT 2 +#define LOAD_INT(vec, i, val) \ + (((vec)[(i)] = ERL_DRV_INT), \ + ((vec)[(i)+1] = (ErlDrvTermData)(val)), \ + ((i)+LOAD_INT_CNT)) + +#define LOAD_UINT_CNT 2 +#define LOAD_UINT(vec, i, val) \ + (((vec)[(i)] = ERL_DRV_UINT), \ + ((vec)[(i)+1] = (ErlDrvTermData)(val)), \ + ((i)+LOAD_UINT_CNT)) + +#define LOAD_PORT_CNT 2 +#define LOAD_PORT(vec, i, port) \ + (((vec)[(i)] = ERL_DRV_PORT), \ + ((vec)[(i)+1] = (port)), \ + ((i)+LOAD_PORT_CNT)) + +#define LOAD_PID_CNT 2 +#define LOAD_PID(vec, i, pid) \ + (((vec)[(i)] = ERL_DRV_PID), \ + ((vec)[(i)+1] = (pid)), \ + ((i)+LOAD_PID_CNT)) + +#define LOAD_BINARY_CNT 4 +#define LOAD_BINARY(vec, i, bin, offs, len) \ + (((vec)[(i)] = ERL_DRV_BINARY), \ + ((vec)[(i)+1] = (ErlDrvTermData)(bin)), \ + ((vec)[(i)+2] = (len)), \ + ((vec)[(i)+3] = (offs)), \ + ((i)+LOAD_BINARY_CNT)) + +#define LOAD_BUF2BINARY_CNT 3 +#define LOAD_BUF2BINARY(vec, i, buf, len) \ + (((vec)[(i)] = ERL_DRV_BUF2BINARY), \ + ((vec)[(i)+1] = (ErlDrvTermData)(buf)), \ + ((vec)[(i)+2] = (len)), \ + ((i)+LOAD_BUF2BINARY_CNT)) + +#define LOAD_STRING_CNT 3 +#define LOAD_STRING(vec, i, str, len) \ + (((vec)[(i)] = ERL_DRV_STRING), \ + ((vec)[(i)+1] = (ErlDrvTermData)(str)), \ + ((vec)[(i)+2] = (len)), \ + ((i)+LOAD_STRING_CNT)) + +#define LOAD_STRING_CONS_CNT 3 +#define LOAD_STRING_CONS(vec, i, str, len) \ + (((vec)[(i)] = ERL_DRV_STRING_CONS), \ + ((vec)[(i)+1] = (ErlDrvTermData)(str)), \ + ((vec)[(i)+2] = (len)), \ + ((i)+LOAD_STRING_CONS_CNT)) + +#define LOAD_TUPLE_CNT 2 +#define LOAD_TUPLE(vec, i, size) \ + (((vec)[(i)] = ERL_DRV_TUPLE), \ + ((vec)[(i)+1] = (size)), \ + ((i)+LOAD_TUPLE_CNT)) + +#define LOAD_NIL_CNT 1 +#define LOAD_NIL(vec, i) \ + (((vec)[(i)] = ERL_DRV_NIL), \ + ((i)+LOAD_NIL_CNT)) + +#define LOAD_LIST_CNT 2 +#define LOAD_LIST(vec, i, size) \ + (((vec)[(i)] = ERL_DRV_LIST), \ + ((vec)[(i)+1] = (size)), \ + ((i)+LOAD_LIST_CNT)) + + +#ifdef HAVE_SCTP + /* "IS_SCTP": tells the difference between a UDP and an SCTP socket: */ +# define IS_SCTP(desc)((desc)->sprotocol==IPPROTO_SCTP) + + /* For AssocID, 4 bytes should be enough -- checked by "init": */ +# define GET_ASSOC_ID get_int32 +# define ASSOC_ID_LEN 4 +# define LOAD_ASSOC_ID LOAD_INT +# define LOAD_ASSOC_ID_CNT LOAD_INT_CNT +# define SCTP_ANC_BUFF_SIZE INET_DEF_BUFFER/2 /* XXX: not very good... */ +#endif + +static int load_ip_port(ErlDrvTermData* spec, int i, char* buf) +{ + spec[i++] = ERL_DRV_INT; + spec[i++] = (ErlDrvTermData) get_int16(buf); + return i; +} + +static int load_ip_address(ErlDrvTermData* spec, int i, int family, char* buf) +{ + int n; + if (family == AF_INET) { + for (n = 0; n < 4; n++) { + spec[i++] = ERL_DRV_INT; + spec[i++] = (ErlDrvTermData) ((unsigned char)buf[n]); + } + spec[i++] = ERL_DRV_TUPLE; + spec[i++] = 4; + } +#if defined(HAVE_IN6) && defined(AF_INET6) + else if (family == AF_INET6) { + for (n = 0; n < 16; n += 2) { + spec[i++] = ERL_DRV_INT; + spec[i++] = (ErlDrvTermData) get_int16(buf+n); + } + spec[i++] = ERL_DRV_TUPLE; + spec[i++] = 8; + } +#endif + else { + spec[i++] = ERL_DRV_TUPLE; + spec[i++] = 0; + } + return i; +} + + +#ifdef HAVE_SCTP +/* For SCTP, we often need to return {IP, Port} tuples: */ +static int inet_get_address + (int family, char* dst, inet_address* src, unsigned int* len); + +#define LOAD_IP_AND_PORT_CNT \ + (8*LOAD_INT_CNT + LOAD_TUPLE_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT) + +static int load_ip_and_port + (ErlDrvTermData* spec, int i, inet_descriptor* desc, + struct sockaddr_storage* addr) +{ + /* The size of the buffer used to stringify the addr is the same as + that of "sockaddr_storage" itself: only their layout is different: + */ + unsigned int len = sizeof(struct sockaddr_storage); + unsigned int alen = len; + char abuf [len]; + int res = + inet_get_address(desc->sfamily, abuf, (inet_address*) addr, &alen); + ASSERT(res==0); + res = 0; + /* Now "abuf" contains: Family(1b), Port(2b), IP(4|16b) */ + + /* NB: the following functions are safe to use, as they create tuples + of copied Ints on the "spec", and do not install any String pts -- + a ptr to "abuf" would be dangling upon exiting this function: */ + i = load_ip_address(spec, i, desc->sfamily, abuf+3); + i = load_ip_port (spec, i, abuf+1); + i = LOAD_TUPLE (spec, i, 2); + return i; +} + +/* Loading Boolean flags as Atoms: */ +#define LOAD_BOOL_CNT LOAD_ATOM_CNT +#define LOAD_BOOL(spec, i, flag) \ + LOAD_ATOM((spec), (i), (flag) ? am_true : am_false); +#endif /* HAVE_SCTP */ + +/* +** Binary Buffer Managment +** We keep a stack of usable buffers +*/ +#define BUFFER_STACK_SIZE 16 + +static erts_smp_spinlock_t inet_buffer_stack_lock; +static ErlDrvBinary* buffer_stack[BUFFER_STACK_SIZE]; +static int buffer_stack_pos = 0; + + +/* + * XXX + * The erts_smp_spin_* functions should not be used by drivers (but this + * driver is special). Replace when driver locking api has been implemented. + * /rickard + */ +#define BUFSTK_LOCK erts_smp_spin_lock(&inet_buffer_stack_lock); +#define BUFSTK_UNLOCK erts_smp_spin_unlock(&inet_buffer_stack_lock); + +#ifdef DEBUG +static int tot_buf_allocated = 0; /* memory in use for i_buf */ +static int tot_buf_stacked = 0; /* memory on stack */ +static int max_buf_allocated = 0; /* max allocated */ + +#define COUNT_BUF_ALLOC(sz) do { \ + BUFSTK_LOCK; \ + tot_buf_allocated += (sz); \ + if (tot_buf_allocated > max_buf_allocated) \ + max_buf_allocated = tot_buf_allocated; \ + BUFSTK_UNLOCK; \ +} while(0) + +#define COUNT_BUF_FREE(sz) do { \ + BUFSTK_LOCK; \ + tot_buf_allocated -= (sz); \ + BUFSTK_UNLOCK; \ + } while(0) + +#define COUNT_BUF_STACK(sz) do { \ + BUFSTK_LOCK; \ + tot_buf_stacked += (sz); \ + BUFSTK_UNLOCK; \ + } while(0) + +#else + +#define COUNT_BUF_ALLOC(sz) +#define COUNT_BUF_FREE(sz) +#define COUNT_BUF_STACK(sz) + +#endif + +static ErlDrvBinary* alloc_buffer(long minsz) +{ + ErlDrvBinary* buf = NULL; + + BUFSTK_LOCK; + + DEBUGF(("alloc_buffer: sz = %ld, tot = %d, max = %d\r\n", + minsz, tot_buf_allocated, max_buf_allocated)); + + if (buffer_stack_pos > 0) { + int origsz; + + buf = buffer_stack[--buffer_stack_pos]; + origsz = buf->orig_size; + BUFSTK_UNLOCK; + COUNT_BUF_STACK(-origsz); + if (origsz < minsz) { + if ((buf = driver_realloc_binary(buf, minsz)) == NULL) + return NULL; + COUNT_BUF_ALLOC(buf->orig_size - origsz); + } + } + else { + BUFSTK_UNLOCK; + if ((buf = driver_alloc_binary(minsz)) == NULL) + return NULL; + COUNT_BUF_ALLOC(buf->orig_size); + } + return buf; +} + +/* +** Max buffer memory "cached" BUFFER_STACK_SIZE * INET_MAX_BUFFER +** (16 * 64k ~ 1M) +*/ +/*#define CHECK_DOUBLE_RELEASE 1*/ +static void release_buffer(ErlDrvBinary* buf) +{ + DEBUGF(("release_buffer: %ld\r\n", (buf==NULL) ? 0 : buf->orig_size)); + if (buf == NULL) + return; + BUFSTK_LOCK; + if ((buf->orig_size > INET_MAX_BUFFER) || + (buffer_stack_pos >= BUFFER_STACK_SIZE)) { + BUFSTK_UNLOCK; + COUNT_BUF_FREE(buf->orig_size); + driver_free_binary(buf); + } + else { +#ifdef CHECK_DOUBLE_RELEASE +#ifdef __GNUC__ +#warning CHECK_DOUBLE_RELEASE is enabled, this is a custom build emulator +#endif + int i; + for (i = 0; i < buffer_stack_pos; ++i) { + if (buffer_stack[i] == buf) { + erl_exit(1,"Multiple buffer release in inet_drv, this is a " + "bug, save the core and send it to " + "[email protected]!"); + } + } +#endif + buffer_stack[buffer_stack_pos++] = buf; + BUFSTK_UNLOCK; + COUNT_BUF_STACK(buf->orig_size); + } +} + +static ErlDrvBinary* realloc_buffer(ErlDrvBinary* buf, long newsz) +{ + ErlDrvBinary* bin; +#ifdef DEBUG + long orig_size = buf->orig_size; +#endif + + if ((bin = driver_realloc_binary(buf,newsz)) != NULL) { + COUNT_BUF_ALLOC(newsz - orig_size); + ; + } + return bin; +} + +/* use a TRICK, access the refc field to see if any one else has + * a ref to this buffer then call driver_free_binary else + * release_buffer instead + */ +static void free_buffer(ErlDrvBinary* buf) +{ + DEBUGF(("free_buffer: %ld\r\n", (buf==NULL) ? 0 : buf->orig_size)); + + if (buf != NULL) { + if (driver_binary_get_refc(buf) == 1) + release_buffer(buf); + else { + COUNT_BUF_FREE(buf->orig_size); + driver_free_binary(buf); + } + } +} + + +#ifdef __WIN32__ + +static ErlDrvData dummy_start(ErlDrvPort port, char* command) +{ + return (ErlDrvData)port; +} + +static int dummy_ctl(ErlDrvData data, unsigned int cmd, char* buf, int len, + char** rbuf, int rsize) +{ + static char error[] = "no_winsock2"; + + driver_failure_atom((ErlDrvPort)data, error); + return ctl_reply(INET_REP_ERROR, error, sizeof(error), rbuf, rsize); +} + +static void dummy_command(ErlDrvData data, char* buf, int len) +{ +} + +static struct erl_drv_entry dummy_tcp_driver_entry = +{ + NULL, /* init */ + dummy_start, /* start */ + NULL, /* stop */ + dummy_command, /* command */ + NULL, /* input */ + NULL, /* output */ + "tcp_inet", /* name */ + NULL, + NULL, + dummy_ctl, + NULL, + NULL +}; + +static struct erl_drv_entry dummy_udp_driver_entry = +{ + NULL, /* init */ + dummy_start, /* start */ + NULL, /* stop */ + dummy_command, /* command */ + NULL, /* input */ + NULL, /* output */ + "udp_inet", /* name */ + NULL, + NULL, + dummy_ctl, + NULL, + NULL +}; + +#ifdef HAVE_SCTP +static struct erl_drv_entry dummy_sctp_driver_entry = +{ /* Though there is no SCTP for Win32 yet... */ + NULL, /* init */ + dummy_start, /* start */ + NULL, /* stop */ + dummy_command, /* command */ + NULL, /* input */ + NULL, /* output */ + "sctp_inet", /* name */ + NULL, + NULL, + dummy_ctl, + NULL, + NULL +}; +#endif + +#endif + +/* general control reply function */ +static int ctl_reply(int rep, char* buf, int len, char** rbuf, int rsize) +{ + char* ptr; + + if ((len+1) > rsize) { + ptr = ALLOC(len+1); + *rbuf = ptr; + } + else + ptr = *rbuf; + *ptr++ = rep; + memcpy(ptr, buf, len); + return len+1; +} + +/* general control error reply function */ +static int ctl_error(int err, char** rbuf, int rsize) +{ + char response[256]; /* Response buffer. */ + char* s; + char* t; + + for (s = erl_errno_id(err), t = response; *s; s++, t++) + *t = tolower(*s); + return ctl_reply(INET_REP_ERROR, response, t-response, rbuf, rsize); +} + +static int ctl_xerror(char* xerr, char** rbuf, int rsize) +{ + int n = strlen(xerr); + return ctl_reply(INET_REP_ERROR, xerr, n, rbuf, rsize); +} + + +static ErlDrvTermData error_atom(int err) +{ + char errstr[256]; + char* s; + char* t; + + for (s = erl_errno_id(err), t = errstr; *s; s++, t++) + *t = tolower(*s); + *t = '\0'; + return driver_mk_atom(errstr); +} + + +static void enq_old_multi_op(tcp_descriptor *desc, int id, int req, + ErlDrvTermData caller, MultiTimerData *timeout, + ErlDrvMonitor *monitorp) +{ + inet_async_multi_op *opp; + + opp = ALLOC(sizeof(inet_async_multi_op)); + + opp->op.id = id; + opp->op.caller = caller; + opp->op.req = req; + opp->op.tmo.mtd = timeout; + memcpy(&(opp->op.monitor), monitorp, sizeof(ErlDrvMonitor)); + opp->next = NULL; + + if (desc->multi_first == NULL) { + desc->multi_first = opp; + } else { + desc->multi_last->next = opp; + } + desc->multi_last = opp; +} + +static void enq_multi_op(tcp_descriptor *desc, char *buf, int req, + ErlDrvTermData caller, MultiTimerData *timeout, + ErlDrvMonitor *monitorp) +{ + int id = NEW_ASYNC_ID(); + enq_old_multi_op(desc,id,req,caller,timeout,monitorp); + if (buf != NULL) + put_int16(id, buf); +} + +static int deq_multi_op(tcp_descriptor *desc, int *id_p, int *req_p, + ErlDrvTermData *caller_p, MultiTimerData **timeout_p, + ErlDrvMonitor *monitorp) +{ + inet_async_multi_op *opp; + opp = desc->multi_first; + if (!opp) { + return -1; + } + desc->multi_first = opp->next; + if (desc->multi_first == NULL) { + desc->multi_last = NULL; + } + *id_p = opp->op.id; + *req_p = opp->op.req; + *caller_p = opp->op.caller; + if (timeout_p != NULL) { + *timeout_p = opp->op.tmo.mtd; + } + if (monitorp != NULL) { + memcpy(monitorp,&(opp->op.monitor),sizeof(ErlDrvMonitor)); + } + FREE(opp); + return 0; +} + +static int remove_multi_op(tcp_descriptor *desc, int *id_p, int *req_p, + ErlDrvTermData caller, MultiTimerData **timeout_p, + ErlDrvMonitor *monitorp) +{ + inet_async_multi_op *opp, *slap; + for (opp = desc->multi_first, slap = NULL; + opp != NULL && opp->op.caller != caller; + slap = opp, opp = opp->next) + ; + if (!opp) { + return -1; + } + if (slap == NULL) { + desc->multi_first = opp->next; + } else { + slap->next = opp->next; + } + if (desc->multi_last == opp) { + desc->multi_last = slap; + } + *id_p = opp->op.id; + *req_p = opp->op.req; + if (timeout_p != NULL) { + *timeout_p = opp->op.tmo.mtd; + } + if (monitorp != NULL) { + memcpy(monitorp,&(opp->op.monitor),sizeof(ErlDrvMonitor)); + } + FREE(opp); + return 0; +} + +/* setup a new async id + caller (format async_id into buf) */ + +static int enq_async_w_tmo(inet_descriptor* desc, char* buf, int req, unsigned timeout, + ErlDrvMonitor *monitorp) +{ + int id = NEW_ASYNC_ID(); + inet_async_op* opp; + + if ((opp = desc->oph) == NULL) /* queue empty */ + opp = desc->oph = desc->opt = desc->op_queue; + else if (desc->oph == desc->opt) { /* queue full */ + DEBUGF(("enq(%ld): queue full\r\n", (long)desc->port)); + return -1; + } + + opp->id = id; + opp->caller = driver_caller(desc->port); + opp->req = req; + opp->tmo.value = timeout; + if (monitorp != NULL) { + memcpy(&(opp->monitor),monitorp,sizeof(ErlDrvMonitor)); + } + + DEBUGF(("enq(%ld): %d %ld %d\r\n", + (long) desc->port, opp->id, opp->caller, opp->req)); + + opp++; + if (opp >= desc->op_queue + INET_MAX_ASYNC) + desc->oph = desc->op_queue; + else + desc->oph = opp; + + if (buf != NULL) + put_int16(id, buf); + return 0; +} + +static int enq_async(inet_descriptor* desc, char* buf, int req) +{ + return enq_async_w_tmo(desc,buf,req,INET_INFINITY, NULL); +} + +static int deq_async_w_tmo(inet_descriptor* desc, int* ap, ErlDrvTermData* cp, + int* rp, unsigned *tp, ErlDrvMonitor *monitorp) +{ + inet_async_op* opp; + + if ((opp = desc->opt) == NULL) { /* queue empty */ + DEBUGF(("deq(%ld): queue empty\r\n", (long)desc->port)); + return -1; + } + *ap = opp->id; + *cp = opp->caller; + *rp = opp->req; + if (tp != NULL) { + *tp = opp->tmo.value; + } + if (monitorp != NULL) { + memcpy(monitorp,&(opp->monitor),sizeof(ErlDrvMonitor)); + } + + DEBUGF(("deq(%ld): %d %ld %d\r\n", + (long)desc->port, opp->id, opp->caller, opp->req)); + + opp++; + if (opp >= desc->op_queue + INET_MAX_ASYNC) + desc->opt = desc->op_queue; + else + desc->opt = opp; + + if (desc->opt == desc->oph) + desc->opt = desc->oph = NULL; + return 0; +} + +static int deq_async(inet_descriptor* desc, int* ap, ErlDrvTermData* cp, int* rp) +{ + return deq_async_w_tmo(desc,ap,cp,rp,NULL,NULL); +} +/* send message: +** {inet_async, Port, Ref, ok} +*/ +static int +send_async_ok(ErlDrvPort port, ErlDrvTermData Port, int Ref, + ErlDrvTermData recipient) +{ + ErlDrvTermData spec[2*LOAD_ATOM_CNT + LOAD_PORT_CNT + + LOAD_INT_CNT + LOAD_TUPLE_CNT]; + int i = 0; + + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, Port); + i = LOAD_INT(spec, i, Ref); + i = LOAD_ATOM(spec, i, am_ok); + i = LOAD_TUPLE(spec, i, 4); + + ASSERT(i == sizeof(spec)/sizeof(*spec)); + + return driver_send_term(port, recipient, spec, i); +} + +/* send message: +** {inet_async, Port, Ref, {ok,Port2}} +*/ +static int +send_async_ok_port(ErlDrvPort port, ErlDrvTermData Port, int Ref, + ErlDrvTermData recipient, ErlDrvTermData Port2) +{ + ErlDrvTermData spec[2*LOAD_ATOM_CNT + 2*LOAD_PORT_CNT + + LOAD_INT_CNT + 2*LOAD_TUPLE_CNT]; + int i = 0; + + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, Port); + i = LOAD_INT(spec, i, Ref); + { + i = LOAD_ATOM(spec, i, am_ok); + i = LOAD_PORT(spec, i, Port2); + i = LOAD_TUPLE(spec, i, 2); + } + i = LOAD_TUPLE(spec, i, 4); + + ASSERT(i == sizeof(spec)/sizeof(*spec)); + + return driver_send_term(port, recipient, spec, i); +} + +/* send message: +** {inet_async, Port, Ref, {error,Reason}} +*/ +static int +send_async_error(ErlDrvPort port, ErlDrvTermData Port, int Ref, + ErlDrvTermData recipient, ErlDrvTermData Reason) +{ + ErlDrvTermData spec[3*LOAD_ATOM_CNT + LOAD_PORT_CNT + + LOAD_INT_CNT + 2*LOAD_TUPLE_CNT]; + int i = 0; + + i = 0; + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, Port); + i = LOAD_INT(spec, i, Ref); + { + i = LOAD_ATOM(spec, i, am_error); + i = LOAD_ATOM(spec, i, Reason); + i = LOAD_TUPLE(spec, i, 2); + } + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i == sizeof(spec)/sizeof(*spec)); + DEBUGF(("send_async_error %ld %ld\r\n", recipient, Reason)); + return driver_send_term(port, recipient, spec, i); +} + + +static int async_ok(inet_descriptor* desc) +{ + int req; + int aid; + ErlDrvTermData caller; + + if (deq_async(desc, &aid, &caller, &req) < 0) + return -1; + return send_async_ok(desc->port, desc->dport, aid, caller); +} + +static int async_ok_port(inet_descriptor* desc, ErlDrvTermData Port2) +{ + int req; + int aid; + ErlDrvTermData caller; + + if (deq_async(desc, &aid, &caller, &req) < 0) + return -1; + return send_async_ok_port(desc->port, desc->dport, aid, caller, Port2); +} + +static int async_error_am(inet_descriptor* desc, ErlDrvTermData reason) +{ + int req; + int aid; + ErlDrvTermData caller; + + if (deq_async(desc, &aid, &caller, &req) < 0) + return -1; + return send_async_error(desc->port, desc->dport, aid, caller, + reason); +} + +/* dequeue all operations */ +static int async_error_am_all(inet_descriptor* desc, ErlDrvTermData reason) +{ + int req; + int aid; + ErlDrvTermData caller; + + while (deq_async(desc, &aid, &caller, &req) == 0) { + send_async_error(desc->port, desc->dport, aid, caller, + reason); + } + return 0; +} + + +static int async_error(inet_descriptor* desc, int err) +{ + return async_error_am(desc, error_atom(err)); +} + +/* send: +** {inet_reply, S, ok} +*/ + +static int inet_reply_ok(inet_descriptor* desc) +{ + ErlDrvTermData spec[2*LOAD_ATOM_CNT + LOAD_PORT_CNT + LOAD_TUPLE_CNT]; + ErlDrvTermData caller = desc->caller; + int i = 0; + + i = LOAD_ATOM(spec, i, am_inet_reply); + i = LOAD_PORT(spec, i, desc->dport); + i = LOAD_ATOM(spec, i, am_ok); + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i == sizeof(spec)/sizeof(*spec)); + + desc->caller = 0; + return driver_send_term(desc->port, caller, spec, i); +} + +/* send: +** {inet_reply, S, {error, Reason}} +*/ +static int inet_reply_error_am(inet_descriptor* desc, ErlDrvTermData reason) +{ + ErlDrvTermData spec[3*LOAD_ATOM_CNT + LOAD_PORT_CNT + 2*LOAD_TUPLE_CNT]; + ErlDrvTermData caller = desc->caller; + int i = 0; + + i = LOAD_ATOM(spec, i, am_inet_reply); + i = LOAD_PORT(spec, i, desc->dport); + i = LOAD_ATOM(spec, i, am_error); + i = LOAD_ATOM(spec, i, reason); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i == sizeof(spec)/sizeof(*spec)); + desc->caller = 0; + + DEBUGF(("inet_reply_error_am %ld %ld\r\n", caller, reason)); + return driver_send_term(desc->port, caller, spec, i); +} + +/* send: +** {inet_reply, S, {error, Reason}} +*/ +static int inet_reply_error(inet_descriptor* desc, int err) +{ + return inet_reply_error_am(desc, error_atom(err)); +} + +/* +** Deliver port data from buffer +*/ +static int inet_port_data(inet_descriptor* desc, const char* buf, int len) +{ + unsigned int hsz = desc->hsz; + + DEBUGF(("inet_port_data(%ld): len = %d\r\n", (long)desc->port, len)); + + if ((desc->mode == INET_MODE_LIST) || (hsz > len)) + return driver_output2(desc->port, (char*)buf, len, NULL, 0); + else if (hsz > 0) + return driver_output2(desc->port, (char*)buf, hsz, (char*)buf+hsz, len-hsz); + else + return driver_output(desc->port, (char*)buf, len); +} + +/* +** Deliver port data from binary (for an active mode socket) +*/ +static int +inet_port_binary_data(inet_descriptor* desc, ErlDrvBinary* bin, int offs, int len) +{ + unsigned int hsz = desc->hsz; + + DEBUGF(("inet_port_binary_data(%ld): offs=%d, len = %d\r\n", + (long)desc->port, offs, len)); + + if ((desc->mode == INET_MODE_LIST) || (hsz > len)) + return driver_output2(desc->port, bin->orig_bytes+offs, len, NULL, 0); + else + return driver_output_binary(desc->port, bin->orig_bytes+offs, hsz, + bin, offs+hsz, len-hsz); +} + +static ErlDrvTermData am_http_eoh; +static ErlDrvTermData am_http_header; +static ErlDrvTermData am_http_request; +static ErlDrvTermData am_http_response; +static ErlDrvTermData am_http_error; +static ErlDrvTermData am_abs_path; +static ErlDrvTermData am_absoluteURI; +static ErlDrvTermData am_star; +static ErlDrvTermData am_undefined; +static ErlDrvTermData am_http; +static ErlDrvTermData am_https; +static ErlDrvTermData am_scheme; + +static int http_load_string(tcp_descriptor* desc, ErlDrvTermData* spec, int i, + const char* str, int len) +{ + if (desc->inet.htype >= TCP_PB_HTTP_BIN) { + ASSERT(desc->inet.htype == TCP_PB_HTTP_BIN || + desc->inet.htype == TCP_PB_HTTPH_BIN); + i = LOAD_BUF2BINARY(spec, i, str, len); + } else { + i = LOAD_STRING(spec, i, str, len); + } + return i; +} + +static int http_response_inetdrv(void *arg, int major, int minor, + int status, const char* phrase, int phrase_len) +{ + tcp_descriptor* desc = (tcp_descriptor*) arg; + int i = 0; + ErlDrvTermData spec[27]; + ErlDrvTermData caller; + + if (desc->inet.active == INET_PASSIVE) { + /* {inet_async,S,Ref,{ok,{http_response,Version,Status,Phrase}}} */ + int req; + int aid; + + if (deq_async(INETP(desc), &aid, &caller, &req) < 0) + return -1; + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_INT(spec, i, aid); + i = LOAD_ATOM(spec, i, am_ok); + } + else { + /* {http, S, {http_response,Version,Status,Phrase}} */ + i = LOAD_ATOM(spec, i, am_http); + i = LOAD_PORT(spec, i, desc->inet.dport); + } + i = LOAD_ATOM(spec, i, am_http_response); + i = LOAD_INT(spec, i, major); + i = LOAD_INT(spec, i, minor); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_INT(spec, i, status); + i = http_load_string(desc, spec, i, phrase, phrase_len); + i = LOAD_TUPLE(spec, i, 4); + + if (desc->inet.active == INET_PASSIVE) { + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i<=27); + return driver_send_term(desc->inet.port, caller, spec, i); + } + else { + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i<=27); + return driver_output_term(desc->inet.port, spec, i); + } +} + +static int http_load_uri(tcp_descriptor* desc, ErlDrvTermData* spec, int i, + const PacketHttpURI* uri) +{ + ErlDrvTermData scheme; + + switch (uri->type) { + case URI_STAR: + i = LOAD_ATOM(spec, i, am_star); + break; + case URI_ABS_PATH: + i = LOAD_ATOM(spec, i, am_abs_path); + i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len); + i = LOAD_TUPLE(spec, i, 2); + break; + case URI_HTTP: + scheme = am_http; + goto http_common; + case URI_HTTPS: + scheme = am_https; + http_common: + i = LOAD_ATOM(spec, i, am_absoluteURI); + i = LOAD_ATOM(spec, i, scheme); + i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len); + if (uri->port == 0) { + i = LOAD_ATOM(spec, i, am_undefined); + } else { + i = LOAD_INT(spec, i, uri->port); + } + i = http_load_string(desc, spec, i, uri->s2_ptr, uri->s2_len); + i = LOAD_TUPLE(spec, i, 5); + break; + + case URI_STRING: + i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len); + break; + case URI_SCHEME: + i = LOAD_ATOM(spec, i, am_scheme); + i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len); + i = http_load_string(desc, spec, i, uri->s2_ptr, uri->s2_len); + i = LOAD_TUPLE(spec, i, 3); + } + return i; +} + + +static int +http_request_inetdrv(void* arg, const http_atom_t* meth, const char* meth_ptr, + int meth_len, const PacketHttpURI* uri, + int major, int minor) +{ + tcp_descriptor* desc = (tcp_descriptor*) arg; + int i = 0; + ErlDrvTermData spec[43]; + ErlDrvTermData caller; + + if (desc->inet.active == INET_PASSIVE) { + /* {inet_async, S, Ref, {ok,{http_request,Meth,Uri,Version}}} */ + int req; + int aid; + + if (deq_async(INETP(desc), &aid, &caller, &req) < 0) + return -1; + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_INT(spec, i, aid); + i = LOAD_ATOM(spec, i, am_ok); + } + else { + /* {http, S, {http_request,Meth,Uri,Version}}} */ + i = LOAD_ATOM(spec, i, am_http); + i = LOAD_PORT(spec, i, desc->inet.dport); + } + + i = LOAD_ATOM(spec, i, am_http_request); + if (meth != NULL) + i = LOAD_ATOM(spec, i, meth->atom); + else + i = http_load_string(desc, spec, i, meth_ptr, meth_len); + i = http_load_uri(desc, spec, i, uri); + i = LOAD_INT(spec, i, major); + i = LOAD_INT(spec, i, minor); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + + if (desc->inet.active == INET_PASSIVE) { + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i <= 43); + return driver_send_term(desc->inet.port, caller, spec, i); + } + else { + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 43); + return driver_output_term(desc->inet.port, spec, i); + } +} + +static int +http_header_inetdrv(void* arg, const http_atom_t* name, const char* name_ptr, + int name_len, const char* value_ptr, int value_len) +{ + tcp_descriptor* desc = (tcp_descriptor*) arg; + int i = 0; + ErlDrvTermData spec[26]; + ErlDrvTermData caller; + + if (desc->inet.active == INET_PASSIVE) { + /* {inet_async,S,Ref,{ok,{http_header,Bit,Name,IValue,Value}} */ + int req; + int aid; + + + if (deq_async(INETP(desc), &aid, &caller, &req) < 0) + return -1; + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_INT(spec, i, aid); + i = LOAD_ATOM(spec, i, am_ok); + } + else { + /* {http, S, {http_header,Bit,Name,IValue,Value}} */ + i = LOAD_ATOM(spec, i, am_http); + i = LOAD_PORT(spec, i, desc->inet.dport); + } + + i = LOAD_ATOM(spec, i, am_http_header); + if (name != NULL) { + i = LOAD_INT(spec, i, name->index+1); + i = LOAD_ATOM(spec, i, name->atom); + } + else { + i = LOAD_INT(spec, i, 0); + i = http_load_string(desc, spec, i, name_ptr, name_len); + } + i = LOAD_ATOM(spec, i, am_undefined); + i = http_load_string(desc, spec, i, value_ptr, value_len); + i = LOAD_TUPLE(spec, i, 5); + + if (desc->inet.active == INET_PASSIVE) { + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i <= 26); + return driver_send_term(desc->inet.port, caller, spec, i); + } + else { + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 26); + return driver_output_term(desc->inet.port, spec, i); + } +} + +static int http_eoh_inetdrv(void* arg) +{ + tcp_descriptor* desc = (tcp_descriptor*) arg; + int i = 0; + ErlDrvTermData spec[14]; + + if (desc->inet.active == INET_PASSIVE) { + /* {inet_async,S,Ref,{ok,http_eoh}} */ + int req; + int aid; + ErlDrvTermData caller; + + if (deq_async(INETP(desc), &aid, &caller, &req) < 0) + return -1; + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_INT(spec, i, aid); + i = LOAD_ATOM(spec, i, am_ok); + i = LOAD_ATOM(spec, i, am_http_eoh); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i <= 14); + return driver_send_term(desc->inet.port, caller, spec, i); + } + else { + /* {http, S, http_eoh} */ + i = LOAD_ATOM(spec, i, am_http); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_ATOM(spec, i, am_http_eoh); + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 14); + return driver_output_term(desc->inet.port, spec, i); + } +} + +static int http_error_inetdrv(void* arg, const char* buf, int len) +{ + tcp_descriptor* desc = (tcp_descriptor*) arg; + int i = 0; + ErlDrvTermData spec[19]; + + if (desc->inet.active == INET_PASSIVE) { + /* {inet_async,S,Ref,{error,{http_error,Line}}} */ + int req; + int aid; + ErlDrvTermData caller; + + if (deq_async(INETP(desc), &aid, &caller, &req) < 0) + return -1; + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_INT(spec, i, aid); + i = LOAD_ATOM(spec, i, am_error); + i = LOAD_ATOM(spec, i, am_http_error); + i = http_load_string(desc, spec, i, buf, len); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i <= 19); + return driver_send_term(desc->inet.port, caller, spec, i); + } + else { + /* {http, S, {http_error,Line} */ + i = LOAD_ATOM(spec, i, am_http); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_ATOM(spec, i, am_http_error); + i = http_load_string(desc, spec, i, buf, len); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 19); + return driver_output_term(desc->inet.port, spec, i); + } +} + + +static +int ssl_tls_inetdrv(void* arg, unsigned type, unsigned major, unsigned minor, + const char* buf, int len, const char* prefix, int plen) +{ + tcp_descriptor* desc = (tcp_descriptor*) arg; + int i = 0; + ErlDrvTermData spec[28]; + ErlDrvTermData caller; + ErlDrvBinary* bin; + int ret; + + if ((bin = driver_alloc_binary(plen+len)) == NULL) + return async_error(&desc->inet, ENOMEM); + memcpy(bin->orig_bytes+plen, buf, len); + if (plen) { + memcpy(bin->orig_bytes, prefix, plen); + len += plen; + } + + if (desc->inet.active == INET_PASSIVE) { + /* {inet_async,S,Ref,{ok,{ssl_tls,...}}} */ + int req; + int aid; + + if (deq_async(INETP(desc), &aid, &caller, &req) < 0) { + ret = -1; + goto done; + } + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_INT(spec, i, aid); + i = LOAD_ATOM(spec, i, am_ok); + } + + /* {ssl_tls,S,ContentType,{Major,Minor},Bin} */ + i = LOAD_ATOM(spec, i, am_ssl_tls); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_INT(spec, i, type); + i = LOAD_INT(spec, i, major); + i = LOAD_INT(spec, i, minor); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_BINARY(spec, i, bin, 0, len); + i = LOAD_TUPLE(spec, i, 5); + + if (desc->inet.active == INET_PASSIVE) { + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i <= 28); + ret = driver_send_term(desc->inet.port, caller, spec, i); + } + else { + ASSERT(i <= 28); + ret = driver_output_term(desc->inet.port, spec, i); + } +done: + driver_free_binary(bin); + return ret; +} + + +static PacketCallbacks packet_callbacks = +{ + http_response_inetdrv, + http_request_inetdrv, + http_eoh_inetdrv, + http_header_inetdrv, + http_error_inetdrv, + ssl_tls_inetdrv +}; + + +/* +** passive mode reply: +** {inet_async, S, Ref, {ok,[H1,...Hsz | Data]}} +** NB: this is for TCP only; +** UDP and SCTP use inet_async_binary_data . +*/ +static int inet_async_data(inet_descriptor* desc, const char* buf, int len) +{ + unsigned int hsz = desc->hsz; + ErlDrvTermData spec[20]; + ErlDrvTermData caller; + int req; + int aid; + int i = 0; + + DEBUGF(("inet_async_data(%ld): len = %d\r\n", (long)desc->port, len)); + + if (deq_async(desc, &aid, &caller, &req) < 0) + return -1; + + i = LOAD_ATOM(spec, i, am_inet_async); + i = LOAD_PORT(spec, i, desc->dport); + i = LOAD_INT(spec, i, aid); + + i = LOAD_ATOM(spec, i, am_ok); + if ((desc->mode == INET_MODE_LIST) || (hsz > len)) { + i = LOAD_STRING(spec, i, buf, len); /* => [H1,H2,...Hn] */ + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i == 15); + desc->caller = 0; + return driver_send_term(desc->port, caller, spec, i); + } + else { + /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] */ + int sz = len - hsz; + int code; + + i = LOAD_BUF2BINARY(spec, i, buf+hsz, sz); + if (hsz > 0) + i = LOAD_STRING_CONS(spec, i, buf, hsz); + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 4); + ASSERT(i <= 20); + desc->caller = 0; + code = driver_send_term(desc->port, caller, spec, i); + return code; + } +} + +#ifdef HAVE_SCTP +/* +** SCTP-related atoms: +*/ +static ErlDrvTermData am_sctp_rtoinfo, /* Option names */ + am_sctp_associnfo, am_sctp_initmsg, + am_sctp_autoclose, am_sctp_nodelay, + am_sctp_disable_fragments, am_sctp_i_want_mapped_v4_addr, + am_sctp_maxseg, am_sctp_set_peer_primary_addr, + am_sctp_primary_addr, am_sctp_adaptation_layer, + am_sctp_peer_addr_params, am_sctp_default_send_param, + am_sctp_events, am_sctp_delayed_ack_time, + am_sctp_status, am_sctp_get_peer_addr_info, + + /* Record names */ + am_sctp_sndrcvinfo, am_sctp_assoc_change, + am_sctp_paddr_change, am_sctp_remote_error, + am_sctp_send_failed, am_sctp_shutdown_event, + am_sctp_adaptation_event, am_sctp_pdapi_event, + am_sctp_assocparams, am_sctp_prim, + am_sctp_setpeerprim, am_sctp_setadaptation, + am_sctp_paddrparams, am_sctp_event_subscribe, + am_sctp_assoc_value, am_sctp_paddrinfo, + + /* For #sctp_sndrcvinfo{}: */ + am_unordered, am_addr_over, + am_abort, am_eof, + + /* For #sctp_assoc_change{}: */ + am_comm_up, am_comm_lost, + am_restart, am_shutdown_comp, + am_cant_assoc, + + /* For #sctp_paddr_change{}: */ + am_addr_available, am_addr_unreachable, + am_addr_removed, am_addr_added, + am_addr_made_prim, am_addr_confirmed, + + /* For #sctp_remote_error{}: */ + am_short_recv, am_wrong_anc_data, + + /* For #sctp_pdap_event{}: */ + am_partial_delivery_aborted, + + /* For #sctp_paddrparams{}: */ + am_hb_enable, am_hb_disable, + am_hb_demand, am_pmtud_enable, + am_pmtud_disable, am_sackdelay_enable, + am_sackdelay_disable, + + /* For #sctp_paddrinfo{}: */ + am_active, am_inactive, + + /* For #sctp_status{}: */ + am_empty, am_closed, + am_cookie_wait, am_cookie_echoed, + am_established, am_shutdown_pending, + am_shutdown_sent, am_shutdown_received, + am_shutdown_ack_sent; + /* Not yet implemented in the Linux kernel: + ** am_bound, am_listen; + */ + +/* +** Parsing of "sctp_sndrcvinfo": ancillary data coming with received msgs. +** This function is mainly used by "sctp_parse_ancillary_data", but also +** by "sctp_parse_async_event" in case of SCTP_SEND_FAILED: +*/ +#define SCTP_PARSE_SNDRCVINFO_CNT \ + (5*LOAD_ATOM_CNT + 5*LOAD_INT_CNT + 2*LOAD_UINT_CNT + \ + LOAD_NIL_CNT + LOAD_LIST_CNT + LOAD_ASSOC_ID_CNT + LOAD_TUPLE_CNT) +static int sctp_parse_sndrcvinfo + (ErlDrvTermData * spec, int i, struct sctp_sndrcvinfo * sri) +{ + int n; + + i = LOAD_ATOM (spec, i, am_sctp_sndrcvinfo); + i = LOAD_INT (spec, i, sri->sinfo_stream); + i = LOAD_INT (spec, i, sri->sinfo_ssn); + /* Now Flags, as a list: */ + n = 0; + if (sri->sinfo_flags & SCTP_UNORDERED) + { i = LOAD_ATOM (spec, i, am_unordered); n++; } + + if (sri->sinfo_flags & SCTP_ADDR_OVER) + { i = LOAD_ATOM (spec, i, am_addr_over); n++; } + + if (sri->sinfo_flags & SCTP_ABORT) + { i = LOAD_ATOM (spec, i, am_abort); n++; } + + if (sri->sinfo_flags & SCTP_EOF) + { i = LOAD_ATOM (spec, i, am_eof); n++; } + + /* SCTP_SENDALL is not yet supported by the Linux kernel */ + i = LOAD_NIL (spec, i); + i = LOAD_LIST (spec, i, n+1); + + /* Continue with other top-level fields: */ + i = LOAD_INT (spec, i, sock_ntohl(sri->sinfo_ppid)); + i = LOAD_INT (spec, i, sri->sinfo_context); + i = LOAD_INT (spec, i, sri->sinfo_timetolive); + i = LOAD_UINT (spec, i, sri->sinfo_tsn); + i = LOAD_UINT (spec, i, sri->sinfo_cumtsn); + i = LOAD_ASSOC_ID (spec, i, sri->sinfo_assoc_id); + + /* Close up the record: */ + i = LOAD_TUPLE (spec, i, 10); + return i; +} + +/* +** This function skips non-SCTP ancillary data, returns SCTP-specific anc.data +** (currently "sctp_sndrcvinfo" only) as a list of records: +*/ +static int sctp_parse_ancillary_data + (ErlDrvTermData * spec, int i, struct msghdr * mptr) +{ + /* First of all, check for ancillary data: */ + struct cmsghdr * cmsg, * frst_msg = CMSG_FIRSTHDR(mptr); + int s = 0; + for (cmsg = frst_msg; cmsg != NULL; cmsg = CMSG_NXTHDR(mptr,cmsg)) + { + struct sctp_sndrcvinfo * sri; + + /* Skip other possible ancillary data, e.g. from IPv6: */ + if (cmsg->cmsg_level != IPPROTO_SCTP || + cmsg->cmsg_type != SCTP_SNDRCV) + continue; + + if (((char*)cmsg + cmsg->cmsg_len) - (char*)frst_msg > + mptr->msg_controllen) + /* MUST check this in Linux -- the returned "cmsg" may actually + go too far! */ + break; + + /* The ONLY kind of ancillary SCTP data which can occur on receiving + is "sctp_sndrcvinfo" (on sending, "sctp_initmsg" can be specified + by the user). So parse this type: + */ + sri = (struct sctp_sndrcvinfo*) CMSG_DATA(cmsg); + i = sctp_parse_sndrcvinfo (spec, i, sri); + s ++; + } + /* Now make the list of tuples created above. Normally, it will be [] or + a singleton list. The list must first be closed with NIL, otherwise + traversing it in Erlang would be problematic: + */ + i = LOAD_NIL (spec, i); + i = LOAD_LIST(spec, i, s+1); + return i; +} + +/* +** Parsing of ERROR and ABORT SCTP chunks. The function returns a list of error +** causes (as atoms). The chunks also contain some extended cause info, but it +** is not very detailed anyway, and of no interest at the user level (it only +** concerns the protocol implementation), so we omit it: +*/ +static int sctp_parse_error_chunk + (ErlDrvTermData * spec, int i, char * chunk, int chlen) +{ + /* The "chunk" itself contains its length, which must not be greater than + the "chlen" derived from the over-all msg size: + */ + char *causes, *cause; + int coff, /* Cause offset */ + ccode, /* Cause code */ + clen, /* cause length */ + s; + int len = sock_ntohs (*((uint16_t*)(chunk+2))); + ASSERT(len >= 4 && len <= chlen); + + causes = chunk + 4; + coff = 0; + len -= 4; /* Total length of the "causes" fields */ + cause = causes; + s = 0; + + while (coff < len) + { + ccode = sock_ntohs (*((uint16_t*)(cause))); + clen = sock_ntohs (*((uint16_t*)(cause + 2))); + if (clen <= 0) + /* Strange, but must guard against that! */ + break; + + /* Install the corresp atom for this "ccode": */ + i = LOAD_INT (spec, i, ccode); + cause += clen; + coff += clen; + s ++; + } + i = LOAD_NIL (spec, i); + i = LOAD_LIST(spec, i, s+1); + return i; +} + +/* +** Parsing of SCTP notification events. NB: they are NOT ancillary data: they +** are sent IN PLACE OF, not in conjunction with, the normal data: +*/ +static int sctp_parse_async_event + (ErlDrvTermData * spec, int i, int ok_pos, + ErlDrvTermData error_atom, inet_descriptor* desc, + ErlDrvBinary * bin, int offs, int sz) +{ + char* body = bin->orig_bytes + offs; + union sctp_notification * nptr = (union sctp_notification *) body; + + switch (nptr->sn_header.sn_type) + { + case SCTP_ASSOC_CHANGE: + { /* {sctp_assoc_change, + State : Atom(), + Error : Atom(), + OutBoundStreams : Int(), + InBoundStreams : Int(), + AssocID : Int(), + // AbortCauses : [Atom()] // NOT YET IMPLEMENTED + } + */ + struct sctp_assoc_change* sptr = &(nptr->sn_assoc_change); + ASSERT(sptr->sac_length <= sz); /* No buffer overrun */ + + i = LOAD_ATOM (spec, i, am_sctp_assoc_change); + + switch (sptr->sac_state) + { + case SCTP_COMM_UP: + i = LOAD_ATOM (spec, i, am_comm_up); + break; + case SCTP_COMM_LOST: + i = LOAD_ATOM (spec, i, am_comm_lost); + break; + case SCTP_RESTART: + i = LOAD_ATOM (spec, i, am_restart); + break; + case SCTP_SHUTDOWN_COMP: + i = LOAD_ATOM (spec, i, am_shutdown_comp); + break; + case SCTP_CANT_STR_ASSOC: + i = LOAD_ATOM (spec, i, am_cant_assoc); + break; + default: + ASSERT(0); + } + i = LOAD_INT (spec, i, sptr->sac_error); + i = LOAD_INT (spec, i, sptr->sac_outbound_streams); + i = LOAD_INT (spec, i, sptr->sac_inbound_streams); + i = LOAD_INT (spec, i, sptr->sac_assoc_id); + + /* The ABORT chunk may or may not be present at the end, depending + on whether there was really an ABORT. In the Linux Kernel SCTP + implementation, this chunk is not delivered anyway, so we leave + it out. Just close up the tuple: + */ + i = LOAD_TUPLE (spec, i, 6); + break; + } + + case SCTP_PEER_ADDR_CHANGE: + { /* {sctp_paddr_change, + AffectedAddr : String(), + State : Atom(), + Error : Atom(), + AssocID : Int() + } + */ + struct sctp_paddr_change* sptr = &(nptr->sn_paddr_change); + ASSERT(sptr->spc_length <= sz); /* No buffer overrun */ + + i = LOAD_ATOM (spec, i, am_sctp_paddr_change); + i = load_ip_and_port(spec, i, desc, &sptr->spc_aaddr); + + switch (sptr->spc_state) + { + case SCTP_ADDR_AVAILABLE: + i = LOAD_ATOM (spec, i, am_addr_available); + break; + case SCTP_ADDR_UNREACHABLE: + i = LOAD_ATOM (spec, i, am_addr_unreachable); + break; + case SCTP_ADDR_REMOVED: + i = LOAD_ATOM (spec, i, am_addr_removed); + break; + case SCTP_ADDR_ADDED: + i = LOAD_ATOM (spec, i, am_addr_added); + break; + case SCTP_ADDR_MADE_PRIM: + i = LOAD_ATOM (spec, i, am_addr_made_prim); + break; +#if HAVE_DECL_SCTP_ADDR_CONFIRMED + case SCTP_ADDR_CONFIRMED: + i = LOAD_ATOM (spec, i, am_addr_confirmed); + break; +#endif + default: + ASSERT(0); + } + i = LOAD_INT (spec, i, sptr->spc_error); + i = LOAD_INT (spec, i, sptr->spc_assoc_id); + i = LOAD_TUPLE (spec, i, 5); + break; + } + + case SCTP_REMOTE_ERROR: + { /* This is an error condition, so we return an error term + {sctp_remote_error, + Error : Int(), + AssocID : Int(), + RemoteCauses : [Atom()] // Remote Error flags + } + */ + char *chunk; + int chlen; + struct sctp_remote_error * sptr = &(nptr->sn_remote_error); + ASSERT(sptr->sre_length <= sz); /* No buffer overrun */ + + /* Over-write the prev part of the response with an error: */ + (void)LOAD_ATOM(spec, ok_pos, error_atom); + + /* Continue from the curr pos: */ + i = LOAD_ATOM (spec, i, am_sctp_remote_error); + + i = LOAD_INT (spec, i, sock_ntohs(sptr->sre_error)); + i = LOAD_INT (spec, i, sptr->sre_assoc_id); + +# ifdef HAVE_STRUCT_SCTP_REMOTE_ERROR_SRE_DATA + chunk = (char*) (&(sptr->sre_data)); +# else + chunk = ((char*)sptr) + sizeof(*sptr); +# endif + chlen = sptr->sre_length - (chunk - (char *)sptr); + i = sctp_parse_error_chunk(spec, i, chunk, chlen); + + i = LOAD_TUPLE (spec, i, 4); + /* The {error, {...}} will be closed by the caller */ + break; + } + + case SCTP_SEND_FAILED: + { /* {sctp_send_failed, + DataSent : Atom() // true or false + Error : Atom(), + OrigInfo : Tuple(), + AssocID : Int(), + OrigData : Binary() + } + This is also an ERROR condition -- overwrite the 'ok': + */ + char *chunk; + int chlen, choff; + struct sctp_send_failed * sptr = &(nptr->sn_send_failed); + ASSERT(sptr->ssf_length <= sz); /* No buffer overrun */ + + /* Over-write 'ok' with 'error', continue from curr "i": */ + (void)LOAD_ATOM(spec, ok_pos, error_atom); + + i = LOAD_ATOM (spec, i, am_sctp_send_failed); + switch (sptr->ssf_flags) { + case SCTP_DATA_SENT: + i = LOAD_ATOM (spec, i, am_true); + break; + case SCTP_DATA_UNSENT: + i = LOAD_ATOM (spec, i, am_false); + break; + default: + ASSERT(0); + } + i = LOAD_INT (spec, i, sptr->ssf_error); + /* Now parse the orig SCTP_SNDRCV info */ + i = sctp_parse_sndrcvinfo (spec, i, &sptr->ssf_info); + i = LOAD_ASSOC_ID (spec, i, sptr->ssf_assoc_id); + + /* Load the orig data chunk, as an unparsed binary. Note that + in LOAD_BINARY below, we must specify the offset wrt bin-> + orig_bytes. In Solaris 10, we don't have ssf_data: + */ +# ifdef HAVE_STRUCT_SCTP_SEND_FAILED_SSF_DATA + chunk = (char*) (&(sptr->ssf_data)); +# else + chunk = ((char*)sptr) + sizeof(*sptr); +# endif + chlen = sptr->ssf_length - (chunk - (char*) sptr); + choff = chunk - bin->orig_bytes; + + i = LOAD_BINARY(spec, i, bin, choff, chlen); + i = LOAD_TUPLE (spec, i, 6); + /* The {error, {...}} tuple is not yet closed */ + break; + } + + case SCTP_SHUTDOWN_EVENT: + { /* {sctp_shutdown_event, + AssocID : Int() + } + */ + struct sctp_shutdown_event * sptr = &(nptr->sn_shutdown_event); + + ASSERT (sptr->sse_length == sizeof(struct sctp_shutdown_event) && + sptr->sse_length <= sz); /* No buffer overrun */ + + i = LOAD_ATOM (spec, i, am_sctp_shutdown_event); + i = LOAD_INT (spec, i, sptr->sse_assoc_id); + i = LOAD_TUPLE (spec, i, 2); + break; + } + + case SCTP_ADAPTATION_INDICATION: + { /* {sctp_adaptation_event, + Indication : Atom(), + AssocID : Int() + } + */ + struct sctp_adaptation_event * sptr = + &(nptr->sn_adaptation_event); + ASSERT (sptr->sai_length == sizeof(struct sctp_adaptation_event) + && sptr->sai_length <= sz); /* No buffer overrun */ + + i = LOAD_ATOM (spec, i, am_sctp_adaptation_event); + i = LOAD_INT (spec, i, sock_ntohl(sptr->sai_adaptation_ind)); + i = LOAD_INT (spec, i, sptr->sai_assoc_id); + i = LOAD_TUPLE (spec, i, 3); + break; + } + + case SCTP_PARTIAL_DELIVERY_EVENT: + { /* It is not clear whether this event is sent to the sender + (when the receiver gets only a part of a message), or to + the receiver itself. In any case, we do not support partial + delivery of msgs in this implementation, so this is an error + condition: + {sctp_pdapi_event, sctp_partial_delivery_aborted, AssocID}: + */ + struct sctp_pdapi_event * sptr; + (void) LOAD_ATOM (spec, ok_pos, error_atom); + + sptr = &(nptr->sn_pdapi_event); + ASSERT (sptr->pdapi_length == sizeof(struct sctp_pdapi_event) && + sptr->pdapi_length <= sz); /* No buffer overrun */ + + i = LOAD_ATOM (spec, i, am_sctp_pdapi_event); + + /* Currently, there is only one indication possible: */ + ASSERT (sptr->pdapi_indication == SCTP_PARTIAL_DELIVERY_ABORTED); + + i = LOAD_ATOM (spec, i, am_partial_delivery_aborted); + i = LOAD_INT (spec, i, sptr->pdapi_assoc_id); + i = LOAD_TUPLE (spec, i, 3); + /* The {error, {...}} tuple is not yet closed */ + break; + } + + /* XXX: No more supported SCTP Event types. The standard also provides + SCTP_AUTHENTICATION_EVENT, but it is not implemented in the Linux + kernel, hence not supported here either. It is not possible to + request delivery of such events in this implementation, so they + cannot occur: + */ + default: ASSERT(0); + } + return i; +} +#endif /* HAVE_SCTP */ + +/* +** passive mode reply: +** for UDP: +** {inet_async, S, Ref, {ok, Data=[H1,...,Hsz | BinData]}} +** or (in the list mode) +** {inet_async, S, Ref, {ok, Data=[H1,...,Hsz]}} +** +** for SCTP: +** {inet_async, S, Ref, {ok, {[H1,...,HSz], [AncilData], Data_OR_Event}}} +** where each AncilDatum:Tuple(); +** Data:List() or Binary(), but if List(), then without the Addr part, +** which is moved in front; +** Event:Tuple(); +** or +** {inet_async, S, Ref, {error, {[H1,...,HSz], [AncilData], ErrorTerm}}} +** +** Cf: the output of send_async_error() is +** {inet_async, S, Ref, {error, Cause:Atom()}} +*/ +static int +inet_async_binary_data + (inet_descriptor* desc, unsigned int phsz, + ErlDrvBinary * bin, int offs, int len, void * extra) +{ + unsigned int hsz = desc->hsz + phsz; + ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN]; + ErlDrvTermData caller = desc->caller; + int aid; + int req; + int i = 0; +#ifdef HAVE_SCTP + int ok_pos; +#endif + + DEBUGF(("inet_async_binary_data(%ld): offs=%d, len=%d\r\n", + (long)desc->port, offs, len)); + + if (deq_async(desc, &aid, &caller, &req) < 0) + return -1; + + i = LOAD_ATOM(spec, i, am_inet_async); /* 'inet_async' */ + i = LOAD_PORT(spec, i, desc->dport); /* S */ + i = LOAD_INT (spec, i, aid); /* Ref */ + +#ifdef HAVE_SCTP + /* Need to memoise the position of the 'ok' atom written, as it may + later be overridden by an 'error': */ + ok_pos = i; +#endif + i = LOAD_ATOM(spec, i, am_ok); + +#ifdef HAVE_SCTP + if (IS_SCTP(desc)) + { /* For SCTP we always have desc->hsz==0 (i.e., no application-level + headers are used), so hsz==phsz (see above): */ + struct msghdr* mptr; + int sz; + + ASSERT (hsz == phsz && hsz != 0); + sz = len - hsz; /* Size of the msg data proper, w/o the addr */ + + /* We always put the Addr as a list in front */ + i = LOAD_STRING(spec, i, bin->orig_bytes+offs, hsz); + + /* Put in the list (possibly empty) of Ancillary Data: */ + mptr = (struct msghdr *) extra; + i = sctp_parse_ancillary_data (spec, i, mptr); + + /* Then: Data or Event (Notification)? */ + if (mptr->msg_flags & MSG_NOTIFICATION) + /* This is an Event, parse it. It may indicate a normal or an error + condition; in the latter case, the 'ok' above is overridden by + an 'error', and the Event we receive contains the error term: */ + i = sctp_parse_async_event + (spec, i, ok_pos, am_error, desc, bin, offs+hsz, sz); + else + /* This is SCTP data, not a notification event. The data can be + returned as a List or as a Binary, similar to the generic case: + */ + if (desc->mode == INET_MODE_LIST) + /* INET_MODE_LIST => [H1,H2,...Hn], addr and data together, + butthe Addr has already been parsed, so start at offs+hsz: + */ + i = LOAD_STRING(spec, i, bin->orig_bytes+offs+hsz, sz); + else + /* INET_MODE_BINARY => Binary */ + i = LOAD_BINARY(spec, i, bin, offs+hsz, sz); + + /* Close up the {[H1,...,HSz], [AncilData], Event_OR_Data} tuple. This + is valid even in the case when Event is a error notification: */ + i = LOAD_TUPLE (spec, i, 3); + } + else +#endif /* HAVE_SCTP */ + /* Generic case. Both Addr and Data (or a single list of them together) are + returned: */ + + if ((desc->mode == INET_MODE_LIST) || (hsz > len)) { + /* INET_MODE_LIST => [H1,H2,...Hn] */ + i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len); + } + else { + /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] or [Binary]: */ + int sz = len - hsz; + i = LOAD_BINARY(spec, i, bin, offs+hsz, sz); + if (hsz > 0) + i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz); + } + /* Close up the {ok, ...} or {error, ...} tuple: */ + i = LOAD_TUPLE(spec, i, 2); + + /* Close up the outer {inet_async, S, Ref, {ok|error, ...}} tuple: */ + i = LOAD_TUPLE(spec, i, 4); + + ASSERT(i <= PACKET_ERL_DRV_TERM_DATA_LEN); + desc->caller = 0; + return driver_send_term(desc->port, caller, spec, i); +} + +/* +** active mode message: +** {tcp, S, [H1,...Hsz | Data]} +*/ +static int tcp_message(inet_descriptor* desc, const char* buf, int len) +{ + unsigned int hsz = desc->hsz; + ErlDrvTermData spec[20]; + int i = 0; + + DEBUGF(("tcp_message(%ld): len = %d\r\n", (long)desc->port, len)); + + i = LOAD_ATOM(spec, i, am_tcp); + i = LOAD_PORT(spec, i, desc->dport); + + if ((desc->mode == INET_MODE_LIST) || (hsz > len)) { + i = LOAD_STRING(spec, i, buf, len); /* => [H1,H2,...Hn] */ + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 20); + return driver_output_term(desc->port, spec, i); + } + else { + /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] */ + int sz = len - hsz; + int code; + + i = LOAD_BUF2BINARY(spec, i, buf+hsz, sz); + if (hsz > 0) + i = LOAD_STRING_CONS(spec, i, buf, hsz); + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 20); + code = driver_output_term(desc->port, spec, i); + return code; + } +} + +/* +** active mode message: +** {tcp, S, [H1,...Hsz | Data]} +*/ +static int +tcp_binary_message(inet_descriptor* desc, ErlDrvBinary* bin, int offs, int len) +{ + unsigned int hsz = desc->hsz; + ErlDrvTermData spec[20]; + int i = 0; + + DEBUGF(("tcp_binary_message(%ld): len = %d\r\n", (long)desc->port, len)); + + i = LOAD_ATOM(spec, i, am_tcp); + i = LOAD_PORT(spec, i, desc->dport); + + if ((desc->mode == INET_MODE_LIST) || (hsz > len)) { + /* INET_MODE_LIST => [H1,H2,...Hn] */ + i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len); + } + else { + /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] */ + int sz = len - hsz; + + i = LOAD_BINARY(spec, i, bin, offs+hsz, sz); + if (hsz > 0) + i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz); + } + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 20); + return driver_output_term(desc->port, spec, i); +} + +/* +** send: active mode {tcp_closed, S} +*/ +static int tcp_closed_message(tcp_descriptor* desc) +{ + ErlDrvTermData spec[6]; + int i = 0; + + DEBUGF(("tcp_closed_message(%ld):\r\n", (long)desc->inet.port)); + if (!(desc->tcp_add_flags & TCP_ADDF_CLOSE_SENT)) { + desc->tcp_add_flags |= TCP_ADDF_CLOSE_SENT; + + i = LOAD_ATOM(spec, i, am_tcp_closed); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_TUPLE(spec, i, 2); + ASSERT(i <= 6); + return driver_output_term(desc->inet.port, spec, i); + } + return 0; +} + +/* +** send active message {tcp_error, S, Error} +*/ +static int tcp_error_message(tcp_descriptor* desc, int err) +{ + ErlDrvTermData spec[8]; + ErlDrvTermData am_err = error_atom(err); + int i = 0; + + DEBUGF(("tcp_error_message(%ld): %d\r\n", (long)desc->inet.port, err)); + + i = LOAD_ATOM(spec, i, am_tcp_error); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_ATOM(spec, i, am_err); + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i <= 8); + return driver_output_term(desc->inet.port, spec, i); +} + +/* +** active mode message: +** {udp, S, IP, Port, [H1,...Hsz | Data]} or +** {sctp, S, IP, Port, {[AncilData], Event_or_Data}} +** where +** [H1,...,HSz] are msg headers (without IP/Port, UDP only), +** Data : List() | Binary() +*/ +static int packet_binary_message + (inet_descriptor* desc, ErlDrvBinary* bin, int offs, int len, void* extra) +{ + unsigned int hsz = desc->hsz; + ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN]; + int i = 0; + int alen; + + DEBUGF(("packet_binary_message(%ld): len = %d\r\n", + (long)desc->port, len)); +# ifdef HAVE_SCTP + i = LOAD_ATOM(spec, i, IS_SCTP(desc) ? am_sctp : am_udp); /* UDP|SCTP */ +# else + i = LOAD_ATOM(spec, i, am_udp ); /* UDP only */ +# endif + i = LOAD_PORT(spec, i, desc->dport); /* S */ + + alen = addrlen(desc->sfamily); + i = load_ip_address(spec, i, desc->sfamily, bin->orig_bytes+offs+3); + i = load_ip_port(spec, i, bin->orig_bytes+offs+1); /* IP, Port */ + + offs += (alen + 3); + len -= (alen + 3); + +# ifdef HAVE_SCTP + if (!IS_SCTP(desc)) + { +# endif + if ((desc->mode == INET_MODE_LIST) || (hsz > len)) + /* INET_MODE_LIST, or only headers => [H1,H2,...Hn] */ + i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len); + else { + /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] */ + int sz = len - hsz; + + i = LOAD_BINARY(spec, i, bin, offs+hsz, sz); + if (hsz > 0) + i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz); + } +# ifdef HAVE_SCTP + } + else + { /* For SCTP we always have desc->hsz==0 (i.e., no application-level + headers are used): */ + struct msghdr* mptr; + ASSERT(hsz == 0); + + /* Put in the list (possibly empty) of Ancillary Data: */ + mptr = (struct msghdr *) extra; + i = sctp_parse_ancillary_data (spec, i, mptr); + + /* Then: Data or Event (Notification)? */ + if (mptr->msg_flags & MSG_NOTIFICATION) + /* This is an Event, parse it. It may indicate a normal or an error + condition; in the latter case, the initial 'sctp' atom is over- + ridden by 'sctp_error', and the Event we receive contains the + error term: */ + i = sctp_parse_async_event + (spec, i, 0, am_sctp_error, desc, bin, offs, len); + else + /* This is SCTP data, not a notification event. The data can be + returned as a List or as a Binary, similar to the generic case: + */ + if (desc->mode == INET_MODE_LIST) + /* INET_MODE_LIST => [H1,H2,...Hn], addr and data together, + but the Addr has already been parsed, so start at offs: + */ + i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len); + else + /* INET_MODE_BINARY => Binary */ + i = LOAD_BINARY(spec, i, bin, offs, len); + + /* Close up the {[AncilData], Event_OR_Data} tuple: */ + i = LOAD_TUPLE (spec, i, 2); + } +# endif /* HAVE_SCTP */ + + /* Close up the outer 5-tuple: */ + i = LOAD_TUPLE(spec, i, 5); + ASSERT(i <= PACKET_ERL_DRV_TERM_DATA_LEN); + return driver_output_term(desc->port, spec, i); +} + +/* +** send active message {udp_error|sctp_error, S, Error} +*/ +static int packet_error_message(udp_descriptor* udesc, int err) +{ + inet_descriptor* desc = INETP(udesc); + ErlDrvTermData spec[2*LOAD_ATOM_CNT + LOAD_PORT_CNT + LOAD_TUPLE_CNT]; + ErlDrvTermData am_err = error_atom(err); + int i = 0; + + DEBUGF(("packet_error_message(%ld): %d\r\n", + (long)desc->port, err)); + +# ifdef HAVE_SCTP + if (IS_SCTP(desc) ) + i = LOAD_ATOM(spec, i, am_sctp_error); + else +# endif + i = LOAD_ATOM(spec, i, am_udp_error); + + i = LOAD_PORT(spec, i, desc->dport); + i = LOAD_ATOM(spec, i, am_err); + i = LOAD_TUPLE(spec, i, 3); + ASSERT(i == sizeof(spec)/sizeof(*spec)); + return driver_output_term(desc->port, spec, i); +} + + +/* scan buffer for bit 7 */ +static void scanbit8(inet_descriptor* desc, const char* buf, int len) +{ + int c; + + if (!desc->bit8f || desc->bit8) return; + c = 0; + while(len--) c |= *buf++; + desc->bit8 = ((c & 0x80) != 0); +} + +/* +** active=TRUE: +** (NOTE! distribution MUST use active=TRUE, deliver=PORT) +** deliver=PORT {S, {data, [H1,..Hsz | Data]}} +** deliver=TERM {tcp, S, [H1..Hsz | Data]} +** +** active=FALSE: +** {async, S, Ref, {ok,[H1,...Hsz | Data]}} +*/ +static int tcp_reply_data(tcp_descriptor* desc, char* buf, int len) +{ + int code; + const char* body = buf; + int bodylen = len; + + packet_get_body(desc->inet.htype, &body, &bodylen); + + scanbit8(INETP(desc), body, bodylen); + + if (desc->inet.deliver == INET_DELIVER_PORT) { + code = inet_port_data(INETP(desc), body, bodylen); + } + else if ((code=packet_parse(desc->inet.htype, buf, len, + &desc->http_state, &packet_callbacks, + desc)) == 0) { + /* No body parsing, return raw binary */ + if (desc->inet.active == INET_PASSIVE) + return inet_async_data(INETP(desc), body, bodylen); + else + code = tcp_message(INETP(desc), body, bodylen); + } + + if (code < 0) + return code; + if (desc->inet.active == INET_ONCE) + desc->inet.active = INET_PASSIVE; + return code; +} + +static int +tcp_reply_binary_data(tcp_descriptor* desc, ErlDrvBinary* bin, int offs, int len) +{ + int code; + const char* buf = bin->orig_bytes + offs; + const char* body = buf; + int bodylen = len; + + packet_get_body(desc->inet.htype, &body, &bodylen); + offs = body - bin->orig_bytes; /* body offset now */ + + scanbit8(INETP(desc), body, bodylen); + + if (desc->inet.deliver == INET_DELIVER_PORT) + code = inet_port_binary_data(INETP(desc), bin, offs, bodylen); + else if ((code=packet_parse(desc->inet.htype, buf, len, &desc->http_state, + &packet_callbacks,desc)) == 0) { + /* No body parsing, return raw data */ + if (desc->inet.active == INET_PASSIVE) + return inet_async_binary_data(INETP(desc), 0, bin, offs, bodylen, NULL); + else + code = tcp_binary_message(INETP(desc), bin, offs, bodylen); + } + if (code < 0) + return code; + if (desc->inet.active == INET_ONCE) + desc->inet.active = INET_PASSIVE; + return code; +} + + +static int +packet_reply_binary_data(inet_descriptor* desc, unsigned int hsz, + ErlDrvBinary * bin, int offs, int len, + void * extra) +{ + int code; + + scanbit8(desc, bin->orig_bytes+offs, len); + + if (desc->active == INET_PASSIVE) + /* "inet" is actually for both UDP and SCTP, as well as TCP! */ + return inet_async_binary_data(desc, hsz, bin, offs, len, extra); + else + { /* INET_ACTIVE or INET_ONCE: */ + if (desc->deliver == INET_DELIVER_PORT) + code = inet_port_binary_data(desc, bin, offs, len); + else + code = packet_binary_message(desc, bin, offs, len, extra); + if (code < 0) + return code; + if (desc->active == INET_ONCE) + desc->active = INET_PASSIVE; + return code; + } +} + +/* ---------------------------------------------------------------------------- + + INET + +---------------------------------------------------------------------------- */ + +static int +sock_init(void) /* May be called multiple times. */ +{ +#ifdef __WIN32__ + WORD wVersionRequested; + WSADATA wsaData; + static int res = -1; /* res < 0 == initialization never attempted */ + + if (res >= 0) + return res; + + wVersionRequested = MAKEWORD(2,0); + if (WSAStartup(wVersionRequested, &wsaData) != 0) + goto error; + + if ((LOBYTE(wsaData.wVersion) != 2) || (HIBYTE(wsaData.wVersion) != 0)) + goto error; + + find_dynamic_functions(); + + return res = 1; + + error: + + WSACleanup(); + return res = 0; +#else + return 1; +#endif +} + +#ifdef HAVE_SCTP +static void inet_init_sctp(void) { + INIT_ATOM(sctp); + INIT_ATOM(sctp_error); + INIT_ATOM(true); + INIT_ATOM(false); + INIT_ATOM(buffer); + INIT_ATOM(mode); + INIT_ATOM(list); + INIT_ATOM(binary); + INIT_ATOM(active); + INIT_ATOM(once); + INIT_ATOM(buffer); + INIT_ATOM(linger); + INIT_ATOM(recbuf); + INIT_ATOM(sndbuf); + INIT_ATOM(reuseaddr); + INIT_ATOM(dontroute); + INIT_ATOM(priority); + INIT_ATOM(tos); + + /* Option names */ + INIT_ATOM(sctp_rtoinfo); + INIT_ATOM(sctp_associnfo); + INIT_ATOM(sctp_initmsg); + INIT_ATOM(sctp_autoclose); + INIT_ATOM(sctp_nodelay); + INIT_ATOM(sctp_disable_fragments); + INIT_ATOM(sctp_i_want_mapped_v4_addr); + INIT_ATOM(sctp_maxseg); + INIT_ATOM(sctp_set_peer_primary_addr); + INIT_ATOM(sctp_primary_addr); + INIT_ATOM(sctp_adaptation_layer); + INIT_ATOM(sctp_peer_addr_params); + INIT_ATOM(sctp_default_send_param); + INIT_ATOM(sctp_events); + INIT_ATOM(sctp_delayed_ack_time); + INIT_ATOM(sctp_status); + INIT_ATOM(sctp_get_peer_addr_info); + + /* Record names */ + INIT_ATOM(sctp_sndrcvinfo); + INIT_ATOM(sctp_assoc_change); + INIT_ATOM(sctp_paddr_change); + INIT_ATOM(sctp_remote_error); + INIT_ATOM(sctp_send_failed); + INIT_ATOM(sctp_shutdown_event); + INIT_ATOM(sctp_adaptation_event); + INIT_ATOM(sctp_pdapi_event); + INIT_ATOM(sctp_assocparams); + INIT_ATOM(sctp_prim); + INIT_ATOM(sctp_setpeerprim); + INIT_ATOM(sctp_setadaptation); + INIT_ATOM(sctp_paddrparams); + INIT_ATOM(sctp_event_subscribe); + INIT_ATOM(sctp_assoc_value); + INIT_ATOM(sctp_paddrinfo); + + /* For #sctp_sndrcvinfo{}: */ + INIT_ATOM(unordered); + INIT_ATOM(addr_over); + INIT_ATOM(abort); + INIT_ATOM(eof); + + /* For #sctp_assoc_change{}: */ + INIT_ATOM(comm_up); + INIT_ATOM(comm_lost); + INIT_ATOM(restart); + INIT_ATOM(shutdown_comp); + INIT_ATOM(cant_assoc); + + /* For #sctp_paddr_change{}: */ + INIT_ATOM(addr_available); + INIT_ATOM(addr_unreachable); + INIT_ATOM(addr_removed); + INIT_ATOM(addr_added); + INIT_ATOM(addr_made_prim); + INIT_ATOM(addr_confirmed); + + INIT_ATOM(short_recv); + INIT_ATOM(wrong_anc_data); + + /* For #sctp_pdap_event{}: */ + INIT_ATOM(partial_delivery_aborted); + + /* For #sctp_paddrparams{}: */ + INIT_ATOM(hb_enable); + INIT_ATOM(hb_disable); + INIT_ATOM(hb_demand); + INIT_ATOM(pmtud_enable); + INIT_ATOM(pmtud_disable); + INIT_ATOM(sackdelay_enable); + INIT_ATOM(sackdelay_disable); + + /* For #sctp_paddrinfo{}: */ + INIT_ATOM(active); + INIT_ATOM(inactive); + + /* For #sctp_status{}: */ + INIT_ATOM(empty); + INIT_ATOM(closed); + INIT_ATOM(cookie_wait); + INIT_ATOM(cookie_echoed); + INIT_ATOM(established); + INIT_ATOM(shutdown_pending); + INIT_ATOM(shutdown_sent); + INIT_ATOM(shutdown_received); + INIT_ATOM(shutdown_ack_sent); + /* Not yet implemented in the Linux kernel: + ** INIT_ATOM(bound); + ** INIT_ATOM(listen); + */ +} +#endif /* HAVE_SCTP */ + +static int inet_init() +{ + if (!sock_init()) + goto error; + + buffer_stack_pos = 0; + + erts_smp_spinlock_init(&inet_buffer_stack_lock, "inet_buffer_stack_lock"); + + ASSERT(sizeof(struct in_addr) == 4); +# if defined(HAVE_IN6) && defined(AF_INET6) + ASSERT(sizeof(struct in6_addr) == 16); +# endif + +#ifdef DEBUG + tot_buf_allocated = 0; + max_buf_allocated = 0; + tot_buf_stacked = 0; +#endif + INIT_ATOM(ok); + INIT_ATOM(tcp); + INIT_ATOM(udp); + INIT_ATOM(error); + INIT_ATOM(inet_async); + INIT_ATOM(inet_reply); + INIT_ATOM(timeout); + INIT_ATOM(closed); + INIT_ATOM(tcp_closed); + INIT_ATOM(tcp_error); + INIT_ATOM(udp_error); + INIT_ATOM(empty_out_q); + INIT_ATOM(ssl_tls); + + INIT_ATOM(http_eoh); + INIT_ATOM(http_header); + INIT_ATOM(http_request); + INIT_ATOM(http_response); + INIT_ATOM(http_error); + INIT_ATOM(abs_path); + INIT_ATOM(absoluteURI); + am_star = driver_mk_atom("*"); + INIT_ATOM(undefined); + INIT_ATOM(http); + INIT_ATOM(https); + INIT_ATOM(scheme); + + /* add TCP, UDP and SCTP drivers */ +#ifdef _OSE_ + add_ose_tcp_drv_entry(&tcp_inet_driver_entry); + add_ose_udp_drv_entry(&udp_inet_driver_entry); +#else + add_driver_entry(&tcp_inet_driver_entry); + add_driver_entry(&udp_inet_driver_entry); +# ifdef HAVE_SCTP + /* Check the size of SCTP AssocID -- currently both this driver and the + Erlang part require 32 bit: */ + ASSERT(sizeof(sctp_assoc_t)==ASSOC_ID_LEN); +# ifndef LIBSCTP +# error LIBSCTP not defined +# endif + if (erts_sys_ddll_open_noext(STRINGIFY(LIBSCTP), &h_libsctp, NULL) == 0) { + void *ptr; + if (erts_sys_ddll_sym(h_libsctp, "sctp_bindx", &ptr) == 0) { + p_sctp_bindx = ptr; + inet_init_sctp(); + add_driver_entry(&sctp_inet_driver_entry); + } + } +# endif +#endif /* _OSE_ */ + /* remove the dummy inet driver */ + remove_driver_entry(&inet_driver_entry); + return 0; + + error: + remove_driver_entry(&inet_driver_entry); + return -1; +} + + +/* +** Set a inaddr structure: +** src = [P1,P0,X1,X2,.....] +** dst points to a structure large enugh to keep any kind +** of inaddr. +** *len is set to length of src on call +** and is set to actual length of dst on return +** return NULL on error and ptr after port address on success +*/ +static char* inet_set_address(int family, inet_address* dst, char* src, int* len) +{ + short port; + + if ((family == AF_INET) && (*len >= 2+4)) { + sys_memzero((char*)dst, sizeof(struct sockaddr_in)); + port = get_int16(src); + dst->sai.sin_family = family; + dst->sai.sin_port = sock_htons(port); + sys_memcpy(&dst->sai.sin_addr, src+2, 4); + *len = sizeof(struct sockaddr_in); + return src + 2+4; + } +#if defined(HAVE_IN6) && defined(AF_INET6) + else if ((family == AF_INET6) && (*len >= 2+16)) { + sys_memzero((char*)dst, sizeof(struct sockaddr_in6)); + port = get_int16(src); + dst->sai6.sin6_family = family; + dst->sai6.sin6_port = sock_htons(port); + dst->sai6.sin6_flowinfo = 0; /* XXX this may be set as well ?? */ + sys_memcpy(&dst->sai6.sin6_addr, src+2, 16); + *len = sizeof(struct sockaddr_in6); + return src + 2+16; + } +#endif + return NULL; +} +#ifdef HAVE_SCTP +/* +** Set an inaddr structure, address family comes from source data, +** or from argument if source data specifies constant address. +** +** src = [TAG,P1,P0] when TAG = INET_AF_ANY | INET_AF_LOOPBACK +** src = [TAG,P1,P0,X1,X2,...] when TAG = INET_AF_INET | INET_AF_INET6 +*/ +static char *inet_set_faddress(int family, inet_address* dst, + char *src, int* len) { + int tag; + + if (*len < 1) return NULL; + (*len) --; + tag = *(src ++); + switch (tag) { + case INET_AF_INET: + family = AF_INET; + break; +# if defined(HAVE_IN6) && defined(AF_INET6) + case INET_AF_INET6: + family = AF_INET6; + break; +# endif + case INET_AF_ANY: + case INET_AF_LOOPBACK: { + int port; + + if (*len < 2) return NULL; + port = get_int16(src); + switch (family) { + case AF_INET: { + struct in_addr addr; + switch (tag) { + case INET_AF_ANY: + addr.s_addr = sock_htonl(INADDR_ANY); + break; + case INET_AF_LOOPBACK: + addr.s_addr = sock_htonl(INADDR_LOOPBACK); + break; + default: + return NULL; + } + sys_memzero((char*)dst, sizeof(struct sockaddr_in)); + dst->sai.sin_family = family; + dst->sai.sin_port = sock_htons(port); + dst->sai.sin_addr.s_addr = addr.s_addr; + *len = sizeof(struct sockaddr_in); + } break; +# if defined(HAVE_IN6) && defined(AF_INET6) + case AF_INET6: { + const struct in6_addr* paddr; + switch (tag) { + case INET_AF_ANY: + paddr = &in6addr_any; + break; + case INET_AF_LOOPBACK: + paddr = &in6addr_loopback; + break; + default: + return NULL; + } + sys_memzero((char*)dst, sizeof(struct sockaddr_in6)); + dst->sai6.sin6_family = family; + dst->sai6.sin6_port = sock_htons(port); + dst->sai6.sin6_flowinfo = 0; /* XXX this may be set as well ?? */ + dst->sai6.sin6_addr = *paddr; + *len = sizeof(struct sockaddr_in6); + } break; +# endif + default: + return NULL; + } + return src + 2; + } break; + default: + return NULL; + } + return inet_set_address(family, dst, src, len); +} +#endif /* HAVE_SCTP */ + +/* Get a inaddr structure +** src = inaddr structure +** *len is the lenght of structure +** dst is filled with [F,P1,P0,X1,....] +** where F is the family code (coded) +** and *len is the length of dst on return +** (suitable to deliver to erlang) +*/ +static int inet_get_address(int family, char* dst, inet_address* src, unsigned int* len) +{ + short port; + + if ((family == AF_INET) && (*len >= sizeof(struct sockaddr_in))) { + dst[0] = INET_AF_INET; + port = sock_ntohs(src->sai.sin_port); + put_int16(port, dst+1); + sys_memcpy(dst+3, (char*)&src->sai.sin_addr, sizeof(struct in_addr)); + *len = 3 + sizeof(struct in_addr); + return 0; + } +#if defined(HAVE_IN6) && defined(AF_INET6) + else if ((family == AF_INET6) && (*len >= sizeof(struct sockaddr_in6))) { + dst[0] = INET_AF_INET6; + port = sock_ntohs(src->sai6.sin6_port); + put_int16(port, dst+1); + sys_memcpy(dst+3, (char*)&src->sai6.sin6_addr,sizeof(struct in6_addr)); + *len = 3 + sizeof(struct in6_addr); + return 0; + } +#endif + return -1; +} + +static void desc_close(inet_descriptor* desc) +{ + if (desc->s != INVALID_SOCKET) { +#ifdef __WIN32__ + winsock_event_select(desc, FD_READ|FD_WRITE|FD_CLOSE, 0); + sock_close(desc->s); + desc->forced_events = 0; + desc->send_would_block = 0; +#endif + driver_select(desc->port, (ErlDrvEvent)(long)desc->event, ERL_DRV_USE, 0); + desc->event = INVALID_EVENT; /* closed by stop_select callback */ + desc->s = INVALID_SOCKET; + desc->event_mask = 0; + } +} + +static void desc_close_read(inet_descriptor* desc) +{ + if (desc->s != INVALID_SOCKET) { +#ifdef __WIN32__ + /* This call can not be right??? + * We want to turn off read events but keep any write events. + * But on windows driver_select(...,READ,1) is only used as a + * way to hook into the pollset. sock_select is used to control + * which events to wait for. + * It seems we used to disabled all events for the socket here. + * + driver_select(desc->port, desc->event, DO_READ, 0); REMOVED */ +#endif + sock_select(desc, FD_READ | FD_CLOSE, 0); + } +} + + +static int erl_inet_close(inet_descriptor* desc) +{ + free_subscribers(&desc->empty_out_q_subs); + if ((desc->prebound == 0) && (desc->state & INET_F_OPEN)) { + desc_close(desc); + desc->state = INET_STATE_CLOSED; + } else if (desc->prebound && (desc->s != INVALID_SOCKET)) { + sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE, 0); + desc->event_mask = 0; +#ifdef __WIN32__ + desc->forced_events = 0; + desc->send_would_block = 0; +#endif + } + return 0; +} + + +static int inet_ctl_open(inet_descriptor* desc, int domain, int type, + char** rbuf, int rsize) +{ + if (desc->state != INET_STATE_CLOSED) + return ctl_xerror(EXBADSEQ, rbuf, rsize); + if ((desc->s = sock_open(domain, type, desc->sprotocol)) == INVALID_SOCKET) + return ctl_error(sock_errno(), rbuf, rsize); + if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) + return ctl_error(sock_errno(), rbuf, rsize); + SET_NONBLOCKING(desc->s); +#ifdef __WIN32__ + driver_select(desc->port, desc->event, ERL_DRV_READ, 1); +#endif + desc->state = INET_STATE_OPEN; + desc->stype = type; + desc->sfamily = domain; + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); +} + + +/* as inet_open but pass in an open socket (MUST BE OF RIGHT TYPE) */ +static int inet_ctl_fdopen(inet_descriptor* desc, int domain, int type, + SOCKET s, char** rbuf, int rsize) +{ + inet_address name; + unsigned int sz = sizeof(name); + + /* check that it is a socket and that the socket is bound */ + if (sock_name(s, (struct sockaddr*) &name, &sz) == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + desc->s = s; + if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) + return ctl_error(sock_errno(), rbuf, rsize); + SET_NONBLOCKING(desc->s); +#ifdef __WIN32__ + driver_select(desc->port, desc->event, ERL_DRV_READ, 1); +#endif + desc->state = INET_STATE_BOUND; /* assume bound */ + if (type == SOCK_STREAM) { /* check if connected */ + sz = sizeof(name); + if (sock_peer(s, (struct sockaddr*) &name, &sz) != SOCKET_ERROR) + desc->state = INET_STATE_CONNECTED; + } + + desc->prebound = 1; /* used to prevent a real close since + * the fd probably comes from an + * external wrapper program, so it is + * not certain that we can open it again */ + desc->stype = type; + desc->sfamily = domain; + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); +} + +/* +** store interface info as: (bytes) +** [Len] Name(Len) Flags(1) addr(4) baddr(4) mask(4) bw(4) +*/ +struct addr_if { + char name[INET_IFNAMSIZ]; + long flags; /* coded flags */ + struct in_addr addr; /* interface address */ + struct in_addr baddr; /* broadcast address */ + struct in_addr mask; /* netmask */ +}; + + +#ifndef SIOCGIFNETMASK +static struct in_addr net_mask(in) +struct in_addr in; +{ + register u_long i = sock_ntohl(in.s_addr); + + if (IN_CLASSA(i)) + in.s_addr = sock_htonl(IN_CLASSA_NET); + else if (IN_CLASSB(i)) + in.s_addr = sock_htonl(IN_CLASSB_NET); + else + in.s_addr = sock_htonl(IN_CLASSC_NET); + return in; +} +#endif + +#if defined(__WIN32__) && defined(SIO_GET_INTERFACE_LIST) + +/* format address in dot notation */ +static char* fmt_addr(unsigned long x, char* ptr) +{ + int i; + for (i = 0; i < 4; i++) { + int nb[3]; + int y = (x >> 24) & 0xff; + x <<= 8; + nb[0] = y % 10; y /= 10; + nb[1] = y % 10; y /= 10; + nb[2] = y % 10; y /= 10; + switch((nb[2] ? 3 : (nb[1] ? 2 : 1))) { + case 3: *ptr++ = nb[2] + '0'; + case 2: *ptr++ = nb[1] + '0'; + case 1: *ptr++ = nb[0] + '0'; + } + *ptr++ = '.'; + } + *(ptr-1) = '\0'; + return ptr; +} + +static int parse_addr(char* ptr, int n, long* x) +{ + long addr = 0; + int dots = 0; + int digs = 0; + int v = 0; + + while(n--) { + switch(*ptr) { + case '0': case '1': case '2':case '3':case '4':case '5': + case '6': case '7': case '8':case '9': + v = v*10 + *ptr - '0'; + if (++digs > 3) return -1; + break; + case '.': + if ((dots>2) || (digs==0) || (digs > 3) || (v > 0xff)) return -1; + dots++; + digs = 0; + addr = (addr << 8) | v; + v = 0; + break; + default: + return -1; + } + ptr++; + } + if ((dots!=3) || (digs==0) || (digs > 3) || (v > 0xff)) return -1; + addr = (addr << 8) | v; + *x = addr; + return 0; +} + +#endif + +#define buf_check(ptr, end, n) \ +do { if ((end)-(ptr) < (n)) goto error; } while(0) + +static char* sockaddr_to_buf(struct sockaddr* addr, char* ptr, char* end) +{ + if (addr->sa_family == AF_INET || addr->sa_family == 0) { + struct in_addr a; + buf_check(ptr,end,sizeof(struct in_addr)); + a = ((struct sockaddr_in*) addr)->sin_addr; + sys_memcpy(ptr, (char*)&a, sizeof(struct in_addr)); + return ptr + sizeof(struct in_addr); + } +#if defined(HAVE_IN6) && defined(AF_INET6) + else if (addr->sa_family == AF_INET6) { + struct in6_addr a; + buf_check(ptr,end,sizeof(struct in6_addr)); + a = ((struct sockaddr_in6*) addr)->sin6_addr; + sys_memcpy(ptr, (char*)&a, sizeof(struct in6_addr)); + return ptr + sizeof(struct in6_addr); + } +#endif + error: + return NULL; + +} + +static char* buf_to_sockaddr(char* ptr, char* end, struct sockaddr* addr) +{ + buf_check(ptr,end,sizeof(struct in_addr)); + sys_memcpy((char*) &((struct sockaddr_in*)addr)->sin_addr, ptr, + sizeof(struct in_addr)); + addr->sa_family = AF_INET; + return ptr + sizeof(struct in_addr); + + error: + return NULL; +} + + + +#if defined(__WIN32__) && defined(SIO_GET_INTERFACE_LIST) + +static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize) +{ + char ifbuf[BUFSIZ]; + char sbuf[BUFSIZ]; + char* sptr; + INTERFACE_INFO* ifp; + DWORD len; + int n; + int err; + + ifp = (INTERFACE_INFO*) ifbuf; + len = 0; + err = WSAIoctl(desc->s, SIO_GET_INTERFACE_LIST, NULL, 0, + (LPVOID) ifp, BUFSIZ, (LPDWORD) &len, + NULL, NULL); + + if (err == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + + n = (len + sizeof(INTERFACE_INFO) - 1) / sizeof(INTERFACE_INFO); + sptr = sbuf; + + while(n--) { + if (((struct sockaddr*)&ifp->iiAddress)->sa_family == desc->sfamily) { + struct in_addr sina = ((struct sockaddr_in*)&ifp->iiAddress)->sin_addr; + /* discard INADDR_ANY interface address */ + if (sina.s_addr != INADDR_ANY) + sptr = fmt_addr(sock_ntohl(sina.s_addr), sptr); + } + ifp++; + } + return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize); +} + + +/* input is an ip-address in string format i.e A.B.C.D +** scan the INTERFACE_LIST to get the options +*/ +static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len, + char** rbuf, int rsize) +{ + char ifbuf[BUFSIZ]; + int n; + char sbuf[BUFSIZ]; + char* sptr; + char* s_end = sbuf + BUFSIZ; + int namlen; + int err; + INTERFACE_INFO* ifp; + long namaddr; + + if ((len == 0) || ((namlen = buf[0]) > len)) + goto error; + if (parse_addr(buf+1, namlen, &namaddr) < 0) + goto error; + namaddr = sock_ntohl(namaddr); + buf += (namlen+1); + len -= (namlen+1); + + ifp = (INTERFACE_INFO*) ifbuf; + err = WSAIoctl(desc->s, SIO_GET_INTERFACE_LIST, NULL, 0, + (LPVOID) ifp, BUFSIZ, (LPDWORD) &n, + NULL, NULL); + if (err == SOCKET_ERROR) { + return ctl_error(sock_errno(), rbuf, rsize); + } + + n = (n + sizeof(INTERFACE_INFO) - 1) / sizeof(INTERFACE_INFO); + + /* find interface */ + while(n) { + if (((struct sockaddr_in*)&ifp->iiAddress)->sin_addr.s_addr == namaddr) + break; + ifp++; + n--; + } + if (n == 0) + goto error; + + sptr = sbuf; + + while (len--) { + switch(*buf++) { + case INET_IFOPT_ADDR: + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_ADDR; + if ((sptr = sockaddr_to_buf((struct sockaddr *)&ifp->iiAddress, + sptr, s_end)) == NULL) + goto error; + break; + + case INET_IFOPT_HWADDR: + break; + + case INET_IFOPT_BROADADDR: +#ifdef SIOCGIFBRDADDR + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_BROADADDR; + if ((sptr=sockaddr_to_buf((struct sockaddr *) + &ifp->iiBroadcastAddress,sptr,s_end)) + == NULL) + goto error; +#endif + break; + + case INET_IFOPT_DSTADDR: + break; + + case INET_IFOPT_NETMASK: + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_NETMASK; + if ((sptr = sockaddr_to_buf((struct sockaddr *) + &ifp->iiNetmask,sptr,s_end)) == NULL) + goto error; + break; + + case INET_IFOPT_MTU: + break; + + case INET_IFOPT_FLAGS: { + long eflags = 0; + int flags = ifp->iiFlags; + /* just enumerate the interfaces (no names) */ + + /* translate flags */ + if (flags & IFF_UP) + eflags |= INET_IFF_UP; + if (flags & IFF_BROADCAST) + eflags |= INET_IFF_BROADCAST; + if (flags & IFF_LOOPBACK) + eflags |= INET_IFF_LOOPBACK; + if (flags & IFF_POINTTOPOINT) + eflags |= INET_IFF_POINTTOPOINT; + if (flags & IFF_UP) /* emulate runnign ? */ + eflags |= INET_IFF_RUNNING; + if (flags & IFF_MULTICAST) + eflags |= INET_IFF_MULTICAST; + + buf_check(sptr, s_end, 5); + *sptr++ = INET_IFOPT_FLAGS; + put_int32(eflags, sptr); + sptr += 4; + break; + } + default: + goto error; + } + } + return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize); + + error: + return ctl_error(EINVAL, rbuf, rsize); +} + +/* not supported */ +static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len, + char** rbuf, int rsize) +{ + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); +} + + +#elif defined(SIOCGIFCONF) && defined(SIOCSIFFLAGS) +/* cygwin has SIOCGIFCONF but not SIOCSIFFLAGS (Nov 2002) */ + +#define VOIDP(x) ((void*)(x)) +#if defined(AF_LINK) && !defined(NO_SA_LEN) +#define SIZEA(p) (((p).sa_len > sizeof(p)) ? (p).sa_len : sizeof(p)) +#else +#define SIZEA(p) (sizeof (p)) +#endif + + +static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize) +{ + struct ifconf ifc; + struct ifreq *ifr; + char *buf; + int buflen, ifc_len, i; + char *sbuf, *sp; + + /* Courtesy of Per Bergqvist and W. Richard Stevens */ + + ifc_len = 0; + buflen = 100 * sizeof(struct ifreq); + buf = ALLOC(buflen); + + for (;;) { + ifc.ifc_len = buflen; + ifc.ifc_buf = buf; + if (ioctl(desc->s, SIOCGIFCONF, (char *)&ifc) < 0) { + int res = sock_errno(); + if (res != EINVAL || ifc_len) { + FREE(buf); + return ctl_error(res, rbuf, rsize); + } + } else { + if (ifc.ifc_len == ifc_len) break; /* buf large enough */ + ifc_len = ifc.ifc_len; + } + buflen += 10 * sizeof(struct ifreq); + buf = (char *)REALLOC(buf, buflen); + } + + sp = sbuf = ALLOC(ifc_len+1); + *sp++ = INET_REP_OK; + i = 0; + for (;;) { + int n; + + ifr = (struct ifreq *) VOIDP(buf + i); + n = sizeof(ifr->ifr_name) + SIZEA(ifr->ifr_addr); + if (n < sizeof(*ifr)) n = sizeof(*ifr); + if (i+n > ifc_len) break; + i += n; + + switch (ifr->ifr_addr.sa_family) { +#if defined(HAVE_IN6) && defined(AF_INET6) + case AF_INET6: +#endif + case AF_INET: + ASSERT(sp+IFNAMSIZ+1 < sbuf+buflen+1) + strncpy(sp, ifr->ifr_name, IFNAMSIZ); + sp[IFNAMSIZ] = '\0'; + sp += strlen(sp), ++sp; + } + + if (i >= ifc_len) break; + } + FREE(buf); + *rbuf = sbuf; + return sp - sbuf; +} + + + +static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len, + char** rbuf, int rsize) +{ + char sbuf[BUFSIZ]; + char* sptr; + char* s_end = sbuf + BUFSIZ; + struct ifreq ifreq; + int namlen; + + if ((len == 0) || ((namlen = buf[0]) > len)) + goto error; + sys_memset(ifreq.ifr_name, '\0', IFNAMSIZ); + sys_memcpy(ifreq.ifr_name, buf+1, + (namlen > IFNAMSIZ) ? IFNAMSIZ : namlen); + buf += (namlen+1); + len -= (namlen+1); + sptr = sbuf; + + while (len--) { + switch(*buf++) { + case INET_IFOPT_ADDR: + if (ioctl(desc->s, SIOCGIFADDR, (char *)&ifreq) < 0) + break; + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_ADDR; + if ((sptr = sockaddr_to_buf(&ifreq.ifr_addr, sptr, s_end)) == NULL) + goto error; + break; + + case INET_IFOPT_HWADDR: { +#ifdef SIOCGIFHWADDR + if (ioctl(desc->s, SIOCGIFHWADDR, (char *)&ifreq) < 0) + break; + buf_check(sptr, s_end, 1+IFHWADDRLEN); + *sptr++ = INET_IFOPT_HWADDR; + /* raw memcpy (fix include autoconf later) */ + sys_memcpy(sptr, (char*)(&ifreq.ifr_hwaddr.sa_data), IFHWADDRLEN); + sptr += IFHWADDRLEN; +#endif + break; + } + + + case INET_IFOPT_BROADADDR: +#ifdef SIOCGIFBRDADDR + if (ioctl(desc->s, SIOCGIFBRDADDR, (char *)&ifreq) < 0) + break; + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_BROADADDR; + if ((sptr=sockaddr_to_buf(&ifreq.ifr_broadaddr,sptr,s_end)) == NULL) + goto error; +#endif + break; + + case INET_IFOPT_DSTADDR: +#ifdef SIOCGIFDSTADDR + if (ioctl(desc->s, SIOCGIFDSTADDR, (char *)&ifreq) < 0) + break; + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_DSTADDR; + if ((sptr = sockaddr_to_buf(&ifreq.ifr_dstaddr,sptr,s_end)) == NULL) + goto error; +#endif + break; + + case INET_IFOPT_NETMASK: +#if defined(SIOCGIFNETMASK) + if (ioctl(desc->s, SIOCGIFNETMASK, (char *)&ifreq) < 0) + break; + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_NETMASK; +#if defined(ifr_netmask) + sptr = sockaddr_to_buf(&ifreq.ifr_netmask,sptr,s_end); +#else + /* SIOCGNETMASK exist but not macro ??? */ + sptr = sockaddr_to_buf(&ifreq.ifr_addr,sptr,s_end); +#endif + if (sptr == NULL) + goto error; +#else + if (ioctl(desc->s, SIOCGIFADDR, (char *)&ifreq) < 0) + break; + else { + struct sockadd_in* ap; + /* emulate netmask, + * (wasted stuff since noone uses classes) + */ + buf_check(sptr, s_end, 1); + *sptr++ = INET_IFOPT_NETMASK; + ap = (struct sockaddr_in*) VOIDP(&ifreq.ifr_addr); + ap->sin_addr = net_mask(ap->sin_addr); + if ((sptr = sockaddr_to_buf(&ifreq.ifr_addr,sptr,s_end)) == NULL) + goto error; + } +#endif + break; + + case INET_IFOPT_MTU: { +#if defined(SIOCGIFMTU) && defined(ifr_mtu) + int n; + + if (ioctl(desc->s, SIOCGIFMTU, (char *)&ifreq) < 0) + break; + buf_check(sptr, s_end, 5); + *sptr++ = INET_IFOPT_MTU; + n = ifreq.ifr_mtu; + put_int32(n, sptr); + sptr += 4; +#endif + break; + } + + case INET_IFOPT_FLAGS: { + int flags; + int eflags = 0; + + if (ioctl(desc->s, SIOCGIFFLAGS, (char*)&ifreq) < 0) + flags = 0; + else + flags = ifreq.ifr_flags; + /* translate flags */ + if (flags & IFF_UP) + eflags |= INET_IFF_UP; + if (flags & IFF_BROADCAST) + eflags |= INET_IFF_BROADCAST; + if (flags & IFF_LOOPBACK) + eflags |= INET_IFF_LOOPBACK; + if (flags & IFF_POINTOPOINT) + eflags |= INET_IFF_POINTTOPOINT; + if (flags & IFF_RUNNING) + eflags |= INET_IFF_RUNNING; + if (flags & IFF_MULTICAST) + eflags |= INET_IFF_MULTICAST; + + buf_check(sptr, s_end, 5); + *sptr++ = INET_IFOPT_FLAGS; + put_int32(eflags, sptr); + sptr += 4; + break; + } + default: + goto error; + } + } + return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize); + + error: + return ctl_error(EINVAL, rbuf, rsize); +} + +/* FIXME: temporary hack */ +#ifndef IFHWADDRLEN +#define IFHWADDRLEN 6 +#endif + +static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len, + char** rbuf, int rsize) +{ + struct ifreq ifreq; + int namlen; + char* b_end = buf + len; + + if ((len == 0) || ((namlen = buf[0]) > len)) + goto error; + sys_memset(ifreq.ifr_name, '\0', IFNAMSIZ); + sys_memcpy(ifreq.ifr_name, buf+1, + (namlen > IFNAMSIZ) ? IFNAMSIZ : namlen); + buf += (namlen+1); + len -= (namlen+1); + + while(buf < b_end) { + switch(*buf++) { + case INET_IFOPT_ADDR: + if ((buf = buf_to_sockaddr(buf, b_end, &ifreq.ifr_addr)) == NULL) + goto error; + (void) ioctl(desc->s, SIOCSIFADDR, (char*)&ifreq); + break; + + case INET_IFOPT_HWADDR: + buf_check(buf, b_end, IFHWADDRLEN); +#ifdef SIOCSIFHWADDR + /* raw memcpy (fix include autoconf later) */ + sys_memcpy((char*)(&ifreq.ifr_hwaddr.sa_data), buf, IFHWADDRLEN); + + (void) ioctl(desc->s, SIOCSIFHWADDR, (char *)&ifreq); +#endif + buf += IFHWADDRLEN; + break; + + + case INET_IFOPT_BROADADDR: +#ifdef SIOCSIFBRDADDR + if ((buf = buf_to_sockaddr(buf, b_end, &ifreq.ifr_broadaddr)) == NULL) + goto error; + (void) ioctl(desc->s, SIOCSIFBRDADDR, (char *)&ifreq); +#endif + break; + + case INET_IFOPT_DSTADDR: +#ifdef SIOCSIFDSTADDR + if ((buf = buf_to_sockaddr(buf, b_end, &ifreq.ifr_dstaddr)) == NULL) + goto error; + (void) ioctl(desc->s, SIOCSIFDSTADDR, (char *)&ifreq); +#endif + break; + + case INET_IFOPT_NETMASK: +#ifdef SIOCSIFNETMASK + +#if defined(ifr_netmask) + buf = buf_to_sockaddr(buf,b_end, &ifreq.ifr_netmask); +#else + buf = buf_to_sockaddr(buf,b_end, &ifreq.ifr_addr); +#endif + if (buf == NULL) + goto error; + (void) ioctl(desc->s, SIOCSIFNETMASK, (char *)&ifreq); +#endif + break; + + case INET_IFOPT_MTU: + buf_check(buf, b_end, 4); +#if defined(SIOCSIFMTU) && defined(ifr_mtu) + ifreq.ifr_mtu = get_int32(buf); + (void) ioctl(desc->s, SIOCSIFMTU, (char *)&ifreq); +#endif + buf += 4; + break; + + case INET_IFOPT_FLAGS: { + int flags0; + int flags; + int eflags; + + buf_check(buf, b_end, 4); + eflags = get_int32(buf); + + /* read current flags */ + if (ioctl(desc->s, SIOCGIFFLAGS, (char*)&ifreq) < 0) + flags0 = flags = 0; + else + flags0 = flags = ifreq.ifr_flags; + + /* update flags */ + if (eflags & INET_IFF_UP) flags |= IFF_UP; + if (eflags & INET_IFF_DOWN) flags &= ~IFF_UP; + if (eflags & INET_IFF_BROADCAST) flags |= IFF_BROADCAST; + if (eflags & INET_IFF_NBROADCAST) flags &= ~IFF_BROADCAST; + if (eflags & INET_IFF_POINTTOPOINT) flags |= IFF_POINTOPOINT; + if (eflags & INET_IFF_NPOINTTOPOINT) flags &= ~IFF_POINTOPOINT; + + if (flags != flags0) { + ifreq.ifr_flags = flags; + (void) ioctl(desc->s, SIOCSIFFLAGS, (char*)&ifreq); + } + buf += 4; + break; + } + + default: + goto error; + } + } + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + + error: + return ctl_error(EINVAL, rbuf, rsize); +} + +#else + + +static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize) +{ + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); +} + + +static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len, + char** rbuf, int rsize) +{ + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); +} + + +static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len, + char** rbuf, int rsize) +{ + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); +} + +#endif + +#ifdef VXWORKS +/* +** THIS is a terrible creature, a bug in the TCP part +** of the old VxWorks stack (non SENS) created a race. +** If (and only if?) a socket got closed from the other +** end and we tried a set/getsockopt on the TCP level, +** the task would generate a bus error... +*/ +static STATUS wrap_sockopt(STATUS (*function)() /* Yep, no parameter + check */, + int s, int level, int optname, + char *optval, unsigned int optlen + /* optlen is a pointer if function + is getsockopt... */) +{ + fd_set rs; + struct timeval timeout; + int to_read; + int ret; + + FD_ZERO(&rs); + FD_SET(s,&rs); + memset(&timeout,0,sizeof(timeout)); + if (level == IPPROTO_TCP) { + taskLock(); + if (select(s+1,&rs,NULL,NULL,&timeout)) { + if (ioctl(s,FIONREAD,(int)&to_read) == ERROR || + to_read == 0) { /* End of file, other end closed? */ + sock_errno() = EBADF; + taskUnlock(); + return ERROR; + } + } + ret = (*function)(s,level,optname,optval,optlen); + taskUnlock(); + } else { + ret = (*function)(s,level,optname,optval,optlen); + } + return ret; +} +#endif + +#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY) +static int setopt_prio_tos_trick + (int fd, int proto, int type, char* arg_ptr, int arg_sz) +{ + /* The relations between SO_PRIORITY, TOS and other options + is not what you (or at least I) would expect...: + If TOS is set after priority, priority is zeroed. + If any other option is set after tos, tos might be zeroed. + Therefore, save tos and priority. If something else is set, + restore both after setting, if tos is set, restore only + prio and if prio is set restore none... All to keep the + user feeling socket options are independent. /PaN */ + int tmp_ival_prio; + int tmp_ival_tos; + int res; +#ifdef HAVE_SOCKLEN_T + socklen_t +#else + int +#endif + tmp_arg_sz_prio = sizeof(tmp_ival_prio), + tmp_arg_sz_tos = sizeof(tmp_ival_tos); + + res = sock_getopt(fd, SOL_SOCKET, SO_PRIORITY, + (char *) &tmp_ival_prio, &tmp_arg_sz_prio); + if (res == 0) { + res = sock_getopt(fd, SOL_IP, IP_TOS, + (char *) &tmp_ival_tos, &tmp_arg_sz_tos); + if (res == 0) { + res = sock_setopt(fd, proto, type, arg_ptr, arg_sz); + if (res == 0) { + if (type != SO_PRIORITY) { + if (type != IP_TOS) { + res = sock_setopt(fd, + SOL_IP, + IP_TOS, + (char *) &tmp_ival_tos, + tmp_arg_sz_tos); + } + if (res == 0) { + res = sock_setopt(fd, + SOL_SOCKET, + SO_PRIORITY, + (char *) &tmp_ival_prio, + tmp_arg_sz_prio); + } + } + } + } + } + return (res); +} +#endif + +/* set socket options: +** return -1 on error +** 0 if ok +** 1 if ok force deliver of queued data +*/ +#ifdef HAVE_SCTP +static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len); +#endif + +static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) +{ + int type; + int proto; + int opt; + struct linger li_val; +#ifdef HAVE_MULTICAST_SUPPORT + struct ip_mreq mreq_val; +#endif + int ival; + char* arg_ptr; + int arg_sz; + enum PacketParseType old_htype = desc->htype; + int old_active = desc->active; + int propagate = 0; /* Set to 1 if failure to set this option + should be propagated to erlang (not all + errors can be propagated for BC reasons) */ + int res; +#ifdef HAVE_SCTP + /* SCTP sockets are treated completely separately: */ + if (IS_SCTP(desc)) + return sctp_set_opts(desc, ptr, len); +#endif + + while(len >= 5) { + opt = *ptr++; + ival = get_int32(ptr); + ptr += 4; + len -= 5; + arg_ptr = (char*) &ival; + arg_sz = sizeof(ival); + proto = SOL_SOCKET; + + switch(opt) { + case INET_LOPT_HEADER: + DEBUGF(("inet_set_opts(%ld): s=%d, HEADER=%d\r\n", + (long)desc->port, desc->s,ival)); + desc->hsz = ival; + continue; + + case INET_LOPT_MODE: + /* List or Binary: */ + DEBUGF(("inet_set_opts(%ld): s=%d, MODE=%d\r\n", + (long)desc->port, desc->s, ival)); + desc->mode = ival; + continue; + + case INET_LOPT_DELIVER: + DEBUGF(("inet_set_opts(%ld): s=%d, DELIVER=%d\r\n", + (long)desc->port, desc->s, ival)); + desc->deliver = ival; + continue; + + case INET_LOPT_BUFFER: + DEBUGF(("inet_set_opts(%ld): s=%d, BUFFER=%d\r\n", + (long)desc->port, desc->s, ival)); + if (ival > INET_MAX_BUFFER) ival = INET_MAX_BUFFER; + else if (ival < INET_MIN_BUFFER) ival = INET_MIN_BUFFER; + desc->bufsz = ival; + continue; + + case INET_LOPT_ACTIVE: + DEBUGF(("inet_set_opts(%ld): s=%d, ACTIVE=%d\r\n", + (long)desc->port, desc->s,ival)); + desc->active = ival; + if ((desc->stype == SOCK_STREAM) && (desc->active != INET_PASSIVE) && + (desc->state == INET_STATE_CLOSED)) { + tcp_closed_message((tcp_descriptor *) desc); + if (desc->exitf) { + driver_exit(desc->port, 0); + return 0; /* Give up on this socket, descriptor lost */ + } else { + desc_close_read(desc); + } + } + continue; + + case INET_LOPT_PACKET: + DEBUGF(("inet_set_opts(%ld): s=%d, PACKET=%d\r\n", + (long)desc->port, desc->s, ival)); + desc->htype = ival; + continue; + + case INET_LOPT_PACKET_SIZE: + DEBUGF(("inet_set_opts(%ld): s=%d, PACKET_SIZE=%d\r\n", + (long)desc->port, desc->s, ival)); + desc->psize = (unsigned int)ival; + continue; + + case INET_LOPT_EXITONCLOSE: + DEBUGF(("inet_set_opts(%ld): s=%d, EXITONCLOSE=%d\r\n", + (long)desc->port, desc->s, ival)); + desc->exitf = ival; + continue; + + case INET_LOPT_BIT8: + DEBUGF(("inet_set_opts(%ld): s=%d, BIT8=%d\r\n", + (long)desc->port, desc->s, ival)); + switch(ival) { + case INET_BIT8_ON: + desc->bit8f = 1; + desc->bit8 = 0; + break; + case INET_BIT8_OFF: + desc->bit8f = 0; + desc->bit8 = 0; + break; + case INET_BIT8_CLEAR: + desc->bit8f = 1; + desc->bit8 = 0; + break; + case INET_BIT8_SET: + desc->bit8f = 1; + desc->bit8 = 1; + break; + } + continue; + + case INET_LOPT_TCP_HIWTRMRK: + if (desc->stype == SOCK_STREAM) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (ival < 0) ival = 0; + else if (ival > INET_MAX_BUFFER*2) ival = INET_MAX_BUFFER*2; + if (tdesc->low > ival) + tdesc->low = ival; + tdesc->high = ival; + } + continue; + + case INET_LOPT_TCP_LOWTRMRK: + if (desc->stype == SOCK_STREAM) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (ival < 0) ival = 0; + else if (ival > INET_MAX_BUFFER) ival = INET_MAX_BUFFER; + if (tdesc->high < ival) + tdesc->high = ival; + tdesc->low = ival; + } + continue; + + case INET_LOPT_TCP_SEND_TIMEOUT: + if (desc->stype == SOCK_STREAM) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + tdesc->send_timeout = ival; + } + continue; + + case INET_LOPT_TCP_SEND_TIMEOUT_CLOSE: + if (desc->stype == SOCK_STREAM) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + tdesc->send_timeout_close = ival; + } + continue; + + + case INET_LOPT_TCP_DELAY_SEND: + if (desc->stype == SOCK_STREAM) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (ival) + tdesc->tcp_add_flags |= TCP_ADDF_DELAY_SEND; + else + tdesc->tcp_add_flags &= ~TCP_ADDF_DELAY_SEND; + } + continue; + + case INET_LOPT_UDP_READ_PACKETS: + if (desc->stype == SOCK_DGRAM) { + udp_descriptor* udesc = (udp_descriptor*) desc; + if (ival <= 0) return -1; + udesc->read_packets = ival; + } + continue; + + case INET_OPT_REUSEADDR: +#ifdef __WIN32__ + continue; /* Bjorn says */ +#else + type = SO_REUSEADDR; + DEBUGF(("inet_set_opts(%ld): s=%d, SO_REUSEADDR=%d\r\n", + (long)desc->port, desc->s,ival)); + break; +#endif + case INET_OPT_KEEPALIVE: type = SO_KEEPALIVE; + DEBUGF(("inet_set_opts(%ld): s=%d, SO_KEEPALIVE=%d\r\n", + (long)desc->port, desc->s, ival)); + break; + case INET_OPT_DONTROUTE: type = SO_DONTROUTE; + DEBUGF(("inet_set_opts(%ld): s=%d, SO_DONTROUTE=%d\r\n", + (long)desc->port, desc->s, ival)); + break; + case INET_OPT_BROADCAST: type = SO_BROADCAST; + DEBUGF(("inet_set_opts(%ld): s=%d, SO_BROADCAST=%d\r\n", + (long)desc->port, desc->s,ival)); + break; + case INET_OPT_OOBINLINE: type = SO_OOBINLINE; + DEBUGF(("inet_set_opts(%ld): s=%d, SO_OOBINLINE=%d\r\n", + (long)desc->port, desc->s, ival)); + break; + case INET_OPT_SNDBUF: type = SO_SNDBUF; + DEBUGF(("inet_set_opts(%ld): s=%d, SO_SNDBUF=%d\r\n", + (long)desc->port, desc->s, ival)); + /* + * Setting buffer sizes in VxWorks gives unexpected results + * our workaround is to leave it at default. + */ +#ifdef VXWORKS + goto skip_os_setopt; +#else + break; +#endif + case INET_OPT_RCVBUF: type = SO_RCVBUF; + DEBUGF(("inet_set_opts(%ld): s=%d, SO_RCVBUF=%d\r\n", + (long)desc->port, desc->s, ival)); +#ifdef VXWORKS + goto skip_os_setopt; +#else + break; +#endif + case INET_OPT_LINGER: type = SO_LINGER; + if (len < 4) + return -1; + li_val.l_onoff = ival; + li_val.l_linger = get_int32(ptr); + ptr += 4; + len -= 4; + arg_ptr = (char*) &li_val; + arg_sz = sizeof(li_val); + DEBUGF(("inet_set_opts(%ld): s=%d, SO_LINGER=%d,%d", + (long)desc->port, desc->s, li_val.l_onoff,li_val.l_linger)); + break; + + case INET_OPT_PRIORITY: +#ifdef SO_PRIORITY + type = SO_PRIORITY; + propagate = 1; /* We do want to know if this fails */ + DEBUGF(("inet_set_opts(%ld): s=%d, SO_PRIORITY=%d\r\n", + (long)desc->port, desc->s, ival)); + break; +#else + continue; +#endif + case INET_OPT_TOS: +#if defined(IP_TOS) && defined(SOL_IP) + proto = SOL_IP; + type = IP_TOS; + propagate = 1; + DEBUGF(("inet_set_opts(%ld): s=%d, IP_TOS=%d\r\n", + (long)desc->port, desc->s, ival)); + break; +#else + continue; +#endif + + case TCP_OPT_NODELAY: + proto = IPPROTO_TCP; + type = TCP_NODELAY; + DEBUGF(("inet_set_opts(%ld): s=%d, TCP_NODELAY=%d\r\n", + (long)desc->port, desc->s, ival)); + break; + +#ifdef HAVE_MULTICAST_SUPPORT + + case UDP_OPT_MULTICAST_TTL: + proto = IPPROTO_IP; + type = IP_MULTICAST_TTL; + DEBUGF(("inet_set_opts(%ld): s=%d, IP_MULTICAST_TTL=%d\r\n", + (long)desc->port,desc->s,ival)); + break; + + case UDP_OPT_MULTICAST_LOOP: + proto = IPPROTO_IP; + type = IP_MULTICAST_LOOP; + DEBUGF(("inet_set_opts(%ld): s=%d, IP_MULTICAST_LOOP=%d\r\n", + (long)desc->port,desc->s,ival)); + break; + + case UDP_OPT_MULTICAST_IF: + proto = IPPROTO_IP; + type = IP_MULTICAST_IF; + DEBUGF(("inet_set_opts(%ld): s=%d, IP_MULTICAST_IF=%x\r\n", + (long)desc->port, desc->s, ival)); + ival = sock_htonl(ival); + break; + + case UDP_OPT_ADD_MEMBERSHIP: + proto = IPPROTO_IP; + type = IP_ADD_MEMBERSHIP; + DEBUGF(("inet_set_opts(%ld): s=%d, IP_ADD_MEMBERSHIP=%d\r\n", + (long)desc->port, desc->s,ival)); + goto L_set_mreq; + + case UDP_OPT_DROP_MEMBERSHIP: + proto = IPPROTO_IP; + type = IP_DROP_MEMBERSHIP; + DEBUGF(("inet_set_opts(%ld): s=%d, IP_DROP_MEMBERSHIP=%x\r\n", + (long)desc->port, desc->s, ival)); + L_set_mreq: + mreq_val.imr_multiaddr.s_addr = sock_htonl(ival); + ival = get_int32(ptr); + mreq_val.imr_interface.s_addr = sock_htonl(ival); + ptr += 4; + len -= 4; + arg_ptr = (char*)&mreq_val; + arg_sz = sizeof(mreq_val); + break; + +#endif /* HAVE_MULTICAST_SUPPORT */ + + case INET_OPT_RAW: + if (len < 8) { + return -1; + } + proto = ival; + type = get_int32(ptr); + ptr += 4; + arg_sz = get_int32(ptr); + ptr += 4; + len -= 8; + if (len < arg_sz) { + return -1; + } + arg_ptr = ptr; + ptr += arg_sz; + len -= arg_sz; + break; + + default: + return -1; + } +#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY) + res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz); +#else + res = sock_setopt (desc->s, proto, type, arg_ptr, arg_sz); +#endif + if (propagate && res != 0) { + return -1; + } + DEBUGF(("inet_set_opts(%ld): s=%d returned %d\r\n", + (long)desc->port, desc->s, res)); +#ifdef VXWORKS +skip_os_setopt: +#endif + if (type == SO_RCVBUF) { + /* make sure we have desc->bufsz >= SO_RCVBUF */ + if (ival > desc->bufsz) + desc->bufsz = ival; + } + } + + if ( ((desc->stype == SOCK_STREAM) && IS_CONNECTED(desc)) || + ((desc->stype == SOCK_DGRAM) && IS_OPEN(desc))) { + + if (desc->active != old_active) + sock_select(desc, (FD_READ|FD_CLOSE), (desc->active>0)); + + if ((desc->stype==SOCK_STREAM) && desc->active) { + if (!old_active || (desc->htype != old_htype)) { + /* passive => active change OR header type change in active mode */ + return 1; + } + return 0; + } + } + return 0; +} + +#ifdef HAVE_SCTP + +/* "sctp_get_initmsg": +** Used by both "send*" and "setsockopt". Gets the 4 fields of "sctp_initmsg" +** from the input buffer: +*/ +#define SCTP_GET_INITMSG_LEN (4*2) +static char* sctp_get_initmsg(struct sctp_initmsg* ini, char* curr) +{ + ini->sinit_num_ostreams = get_int16 (curr); curr += 2; + ini->sinit_max_instreams = get_int16 (curr); curr += 2; + ini->sinit_max_attempts = get_int16 (curr); curr += 2; + ini->sinit_max_init_timeo = get_int16 (curr); curr += 2; + return curr; +} + +/* "sctp_get_sendparams": +** Parses (from the command buffer) the 6 user-sprcified parms of +** "sctp_sndrcvinfo": +** stream(u16), flags(u16), ppid(u32), context(u32), +** timetoleave(u32), assoc_id +** Is used by both "send*" and "setsockopt": +*/ +#define SCTP_GET_SENDPARAMS_LEN (2*2 + 3*4 + ASSOC_ID_LEN) +static char* sctp_get_sendparams (struct sctp_sndrcvinfo* sri, char* curr) +{ + int eflags; + int cflags; + + sri->sinfo_stream = get_int16(curr); curr += 2; + sri->sinfo_ssn = 0; + + /* The "flags" are already ORed at the Erlang side, here we + reconstruct the real SCTP flags: + */ + eflags = get_int16(curr); curr += 2; + cflags = 0; + if (eflags & SCTP_FLAG_UNORDERED) cflags |= SCTP_UNORDERED; + if (eflags & SCTP_FLAG_ADDR_OVER) cflags |= SCTP_ADDR_OVER; + if (eflags & SCTP_FLAG_ABORT) cflags |= SCTP_ABORT; + if (eflags & SCTP_FLAG_EOF) cflags |= SCTP_EOF; + + sri->sinfo_flags = cflags; + sri->sinfo_ppid = sock_htonl(get_int32(curr)); + curr += 4; + sri->sinfo_context = get_int32(curr); curr += 4; + sri->sinfo_timetolive = get_int32(curr); curr += 4; + sri->sinfo_tsn = 0; + sri->sinfo_cumtsn = 0; + sri->sinfo_assoc_id = GET_ASSOC_ID (curr); curr += ASSOC_ID_LEN; + + return curr; +} + +/* Set SCTP options: +** return -1 on error +** 0 if ok +** NB: unlike inet_set_opts(), we don't have an active mode here, so there is no +** mode change which could force data delivery on setting an option. +** Arg: "ptr": [(erlang_encoded_opt(u8), value(...)), ...]; thus, multiple opts +** can be set at a time. +*/ +static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) +{ +# define CHKLEN(Ptr, Len) \ + do { \ + if ((Ptr) + (Len) > ptr + len) return -1; \ + } while (0) + + char * curr = ptr; + int proto, type, res; + + /* The following union is used to hold any arg to "setsockopt": */ + union opts_union + { + int ival; + struct sctp_rtoinfo rtoi; + struct sctp_assocparams ap; + struct sctp_initmsg im; + struct linger lin; + struct sctp_setpeerprim prim; + struct sctp_setadaptation ad; + struct sctp_paddrparams pap; + struct sctp_sndrcvinfo sri; + struct sctp_event_subscribe es; +# ifdef SCTP_DELAYED_ACK_TIME + struct sctp_assoc_value av; /* Not in SOLARIS10 */ +# endif + } + arg; + + char * arg_ptr = NULL; + int arg_sz = 0; + int old_active = desc->active; + + while (curr < ptr + len) + { + /* Get the Erlang-encoded option type -- always 1 byte: */ + int eopt = *curr; + curr++; + + /* Get the option value. XXX: The condition (curr < ptr + len) + does not preclude us from reading from beyond the buffer end, + if the Erlang part of the driver specifies its input wrongly! + */ + CHKLEN(curr, 4); /* All options need at least 4 bytes */ + switch(eopt) + { + /* Local INET options: */ + + case INET_LOPT_BUFFER: + desc->bufsz = get_int32(curr); curr += 4; + + if (desc->bufsz > INET_MAX_BUFFER) + desc->bufsz = INET_MAX_BUFFER; + else + if (desc->bufsz < INET_MIN_BUFFER) + desc->bufsz = INET_MIN_BUFFER; + res = 0; /* This does not affect the kernel buffer size */ + continue; + + case INET_LOPT_MODE: + desc->mode = get_int32(curr); curr += 4; + res = 0; + continue; + + case INET_LOPT_ACTIVE: + desc->active = get_int32(curr); curr += 4; + res = 0; + continue; + + /* SCTP options and applicable generic INET options: */ + + case SCTP_OPT_RTOINFO: + { + CHKLEN(curr, ASSOC_ID_LEN + 3*4); + arg.rtoi.srto_assoc_id = GET_ASSOC_ID(curr); curr += ASSOC_ID_LEN; + arg.rtoi.srto_initial = get_int32 (curr); curr += 4; + arg.rtoi.srto_max = get_int32 (curr); curr += 4; + arg.rtoi.srto_min = get_int32 (curr); curr += 4; + + proto = IPPROTO_SCTP; + type = SCTP_RTOINFO; + arg_ptr = (char*) (&arg.rtoi); + arg_sz = sizeof ( arg.rtoi); + break; + } + case SCTP_OPT_ASSOCINFO: + { + CHKLEN(curr, ASSOC_ID_LEN + 2*2 + 3*4); + + arg.ap.sasoc_assoc_id = GET_ASSOC_ID(curr); curr += ASSOC_ID_LEN; + arg.ap.sasoc_asocmaxrxt = get_int16 (curr); curr += 2; + arg.ap.sasoc_number_peer_destinations = + get_int16 (curr); curr += 2; + arg.ap.sasoc_peer_rwnd = get_int32 (curr); curr += 4; + arg.ap.sasoc_local_rwnd = get_int32 (curr); curr += 4; + arg.ap.sasoc_cookie_life = get_int32 (curr); curr += 4; + + proto = IPPROTO_SCTP; + type = SCTP_ASSOCINFO; + arg_ptr = (char*) (&arg.ap); + arg_sz = sizeof ( arg.ap); + break; + } + case SCTP_OPT_INITMSG: + { + CHKLEN(curr, SCTP_GET_INITMSG_LEN); + curr = sctp_get_initmsg (&arg.im, curr); + + proto = IPPROTO_SCTP; + type = SCTP_INITMSG; + arg_ptr = (char*) (&arg.im); + arg_sz = sizeof ( arg.im); + break; + } + case INET_OPT_LINGER: + { + CHKLEN(curr, ASSOC_ID_LEN + 2 + 4); + arg.lin.l_onoff = get_int16 (curr); curr += 2; + arg.lin.l_linger = get_int32 (curr); curr += 4; + + proto = SOL_SOCKET; + type = SO_LINGER; + arg_ptr = (char*) (&arg.lin); + arg_sz = sizeof ( arg.lin); + break; + } + case SCTP_OPT_NODELAY: + { + arg.ival= get_int32 (curr); curr += 4; + proto = IPPROTO_SCTP; + type = SCTP_NODELAY; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } + case INET_OPT_RCVBUF: + { + arg.ival= get_int32 (curr); curr += 4; + proto = SOL_SOCKET; + type = SO_RCVBUF; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + + /* Adjust the size of the user-level recv buffer, so it's not + smaller than the kernel one: */ + if (desc->bufsz <= arg.ival) + desc->bufsz = arg.ival; + break; + } + case INET_OPT_SNDBUF: + { + arg.ival= get_int32 (curr); curr += 4; + proto = SOL_SOCKET; + type = SO_SNDBUF; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + + /* Adjust the size of the user-level recv buffer, so it's not + smaller than the kernel one: */ + if (desc->bufsz <= arg.ival) + desc->bufsz = arg.ival; + break; + } + case INET_OPT_REUSEADDR: + { + arg.ival= get_int32 (curr); curr += 4; + proto = SOL_SOCKET; + type = SO_REUSEADDR; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } + case INET_OPT_DONTROUTE: + { + arg.ival= get_int32 (curr); curr += 4; + proto = SOL_SOCKET; + type = SO_DONTROUTE; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } + case INET_OPT_PRIORITY: +# ifdef SO_PRIORITY + { + arg.ival= get_int32 (curr); curr += 4; + proto = SOL_SOCKET; + type = SO_PRIORITY; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } +# else + continue; /* Option not supported -- ignore it */ +# endif + + case INET_OPT_TOS: +# if defined(IP_TOS) && defined(SOL_IP) + { + arg.ival= get_int32 (curr); curr += 4; + proto = SOL_IP; + type = IP_TOS; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } +# else + continue; /* Option not supported -- ignore it */ +# endif + + case SCTP_OPT_AUTOCLOSE: + { + arg.ival= get_int32 (curr); curr += 4; + proto = IPPROTO_SCTP; + type = SCTP_AUTOCLOSE; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } + case SCTP_OPT_DISABLE_FRAGMENTS: + { + arg.ival= get_int32 (curr); curr += 4; + proto = IPPROTO_SCTP; + type = SCTP_DISABLE_FRAGMENTS; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } + case SCTP_OPT_I_WANT_MAPPED_V4_ADDR: + { + arg.ival= get_int32 (curr); curr += 4; + proto = IPPROTO_SCTP; + type = SCTP_I_WANT_MAPPED_V4_ADDR; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } + case SCTP_OPT_MAXSEG: + { + arg.ival= get_int32 (curr); curr += 4; + proto = IPPROTO_SCTP; + type = SCTP_MAXSEG; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } + case SCTP_OPT_PRIMARY_ADDR: + case SCTP_OPT_SET_PEER_PRIMARY_ADDR: + { + int alen; + char *after; + + CHKLEN(curr, ASSOC_ID_LEN); + /* XXX: These 2 opts have isomorphic value data structures, + "sctp_setpeerprim" and "sctp_prim" (in Solaris 10, the latter + is called "sctp_setprim"), so we grouped them together: + */ + arg.prim.sspp_assoc_id = GET_ASSOC_ID(curr); curr += ASSOC_ID_LEN; + + /* Fill in "arg.prim.sspp_addr": */ + alen = ptr + len - curr; + after = inet_set_faddress(desc->sfamily, + (inet_address*) (&arg.prim.sspp_addr), + curr, &alen); + if (after == NULL) + return -1; + curr = after; + + proto = IPPROTO_SCTP; + if (eopt == SCTP_OPT_PRIMARY_ADDR) + type = SCTP_PRIMARY_ADDR; + else + type = SCTP_SET_PEER_PRIMARY_ADDR; + + arg_ptr = (char*) (&arg.prim); + arg_sz = sizeof ( arg.prim); + break; + } + case SCTP_OPT_ADAPTATION_LAYER: + { + /* XXX: do we need to convert the Ind into network byte order??? */ + arg.ad.ssb_adaptation_ind = sock_htonl (get_int32(curr)); curr += 4; + + proto = IPPROTO_SCTP; + type = SCTP_ADAPTATION_LAYER; + arg_ptr = (char*) (&arg.ad); + arg_sz = sizeof ( arg.ad); + break; + } + case SCTP_OPT_PEER_ADDR_PARAMS: + { + int alen; + char *after; +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_FLAGS + int eflags, cflags, hb_enable, hb_disable, + pmtud_enable, pmtud_disable, + sackdelay_enable, sackdelay_disable; +# endif + + CHKLEN(curr, ASSOC_ID_LEN); + arg.pap.spp_assoc_id = GET_ASSOC_ID(curr); curr += ASSOC_ID_LEN; + + /* Fill in "pap.spp_address": */ + alen = ptr + len - curr; + after = inet_set_faddress(desc->sfamily, + (inet_address*) (&arg.pap.spp_address), + curr, &alen); + if (after == NULL) + return -1; + curr = after; + + CHKLEN(curr, 4 + 2 + 3*4); + + arg.pap.spp_hbinterval = get_int32(curr); curr += 4; + arg.pap.spp_pathmaxrxt = get_int16(curr); curr += 2; + + /* The following are missing in Solaris 10: */ +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_PATHMTU + arg.pap.spp_pathmtu = get_int32(curr); +# endif + curr += 4; +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY + arg.pap.spp_sackdelay = get_int32(curr); +# endif + curr += 4; + +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_FLAGS + /* Now re-construct the flags: */ + eflags = get_int32(curr); + cflags = 0; + + hb_enable = eflags & SCTP_FLAG_HB_ENABLE; + hb_disable = eflags & SCTP_FLAG_HB_DISABLE; + if (hb_enable && hb_disable) + return -1; + if (hb_enable) cflags |= SPP_HB_ENABLE; + if (hb_disable) cflags |= SPP_HB_DISABLE; + if (eflags & SCTP_FLAG_HB_DEMAND) cflags |= SPP_HB_DEMAND; + + pmtud_enable = eflags & SCTP_FLAG_PMTUD_ENABLE; + pmtud_disable = eflags & SCTP_FLAG_PMTUD_DISABLE; + if (pmtud_enable && pmtud_disable) + return -1; + if (pmtud_enable) cflags |= SPP_PMTUD_ENABLE; + if (pmtud_disable) cflags |= SPP_PMTUD_DISABLE; + + sackdelay_enable =eflags& SCTP_FLAG_SACDELAY_ENABLE; + sackdelay_disable=eflags& SCTP_FLAG_SACDELAY_DISABLE; + if (sackdelay_enable && sackdelay_disable) + return -1; + if (sackdelay_enable) cflags |= SPP_SACKDELAY_ENABLE; + if (sackdelay_disable) cflags |= SPP_SACKDELAY_DISABLE; + + arg.pap.spp_flags = cflags; +# endif + curr += 4; + + proto = IPPROTO_SCTP; + type = SCTP_PEER_ADDR_PARAMS; + arg_ptr = (char*) (&arg.pap); + arg_sz = sizeof ( arg.pap); + break; + } + case SCTP_OPT_DEFAULT_SEND_PARAM: + { + CHKLEN(curr, SCTP_GET_SENDPARAMS_LEN); + curr = sctp_get_sendparams (&arg.sri, curr); + + proto = IPPROTO_SCTP; + type = SCTP_DEFAULT_SEND_PARAM; + arg_ptr = (char*) (&arg.sri); + arg_sz = sizeof ( arg.sri); + break; + } + case SCTP_OPT_EVENTS: + { + CHKLEN(curr, 9); + /* We do not support "sctp_authentication_event" -- it is not + implemented in Linux Kernel SCTP anyway. Just in case if + the above structure has more fields than we support, zero + it out -- the extraneous events will NOT be used: + */ + memset (&arg.es, 0, sizeof(arg.es)); + + /* The input "buf" must contain the full definition of all the + supported event fields, 1 byte per each, as each event is + either explicitly subscribed or cleared: + */ + arg.es.sctp_data_io_event = get_int8(curr); curr++; + arg.es.sctp_association_event = get_int8(curr); curr++; + arg.es.sctp_address_event = get_int8(curr); curr++; + arg.es.sctp_send_failure_event = get_int8(curr); curr++; + arg.es.sctp_peer_error_event = get_int8(curr); curr++; + arg.es.sctp_shutdown_event = get_int8(curr); curr++; + arg.es.sctp_partial_delivery_event = get_int8(curr); curr++; + arg.es.sctp_adaptation_layer_event = get_int8(curr); curr++; + /* sctp_authentication_event not implemented */ curr++; + + proto = IPPROTO_SCTP; + type = SCTP_EVENTS; + arg_ptr = (char*) (&arg.es); + arg_sz = sizeof ( arg.es); + break; + } + /* The following is not available on Solaris 10: */ +# ifdef SCTP_DELAYED_ACK_TIME + case SCTP_OPT_DELAYED_ACK_TIME: + { + CHKLEN(curr, ASSOC_ID_LEN + 4); + arg.av.assoc_id = GET_ASSOC_ID(curr); curr += ASSOC_ID_LEN; + arg.av.assoc_value = get_int32(curr); curr += 4; + + proto = IPPROTO_SCTP; + type = SCTP_DELAYED_ACK_TIME; + arg_ptr = (char*) (&arg.av); + arg_sz = sizeof ( arg.es); + break; + } +# endif + default: + /* XXX: No more supported SCTP options. In particular, authentica- + tion options (SCTP_AUTH_CHUNK, SCTP_AUTH_KEY, SCTP_PEER_AUTH_ + CHUNKS, SCTP_LOCAL_AUTH_CHUNKS, SCTP_AUTH_SETKEY_ACTIVE) are + not yet implemented in the Linux kernel, hence not supported + here. Also not supported are SCTP_HMAC_IDENT, as well as any + "generic" options except "INET_LOPT_MODE". Raise an error: + */ + return -1; + } +#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY) + res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz); +#else + res = sock_setopt (desc->s, proto, type, arg_ptr, arg_sz); +#endif + /* The return values of "sock_setopt" can only be 0 or -1: */ + ASSERT(res == 0 || res == -1); + if (res == -1) + { /* Got an error, DO NOT continue with other options. However, on + Solaris 10, we DO allow SO_SNDBUF and SO_RCVBUF to fail, assu- + min that the default kernel versions are good enough: + */ +# ifdef SOLARIS10 + if (type != SO_SNDBUF && type != SO_RCVBUF) +# endif + return res; + } + } + /* If we got here, all "sock_setopt"s above were successful: */ + if (IS_OPEN(desc) && desc->active != old_active) { + sock_select(desc, (FD_READ|FD_CLOSE), (desc->active > 0)); + } + return 0; +# undef CHKLEN +} +#endif /* HAVE_SCTP */ + +/* load all option values into the buf and reply +** return total length of reply filled into ptr +** ptr should point to a buffer with 9*len +1 to be safe!! +*/ + +static int inet_fill_opts(inet_descriptor* desc, + char* buf, int len, char** dest, int destlen) +{ + int type; + int proto; + int opt; + struct linger li_val; + int ival; + char* arg_ptr; + unsigned int arg_sz; + char *ptr = NULL; + int dest_used = 0; + int dest_allocated = destlen; + char *orig_dest = *dest; + + /* Ptr is a name parameter */ +#define RETURN_ERROR() \ + do { \ + if (dest_allocated > destlen) { \ + FREE(*dest); \ + *dest = orig_dest; \ + } \ + return -1; \ + } while(0) + +#define PLACE_FOR(Size,Ptr) \ + do { \ + int need = dest_used + (Size); \ + if (need > INET_MAX_BUFFER) { \ + RETURN_ERROR(); \ + } \ + if (need > dest_allocated) { \ + char *new_buffer; \ + if (dest_allocated == destlen) { \ + new_buffer = ALLOC((dest_allocated = need + 10)); \ + memcpy(new_buffer,*dest,dest_used); \ + } else { \ + new_buffer = REALLOC(*dest, (dest_allocated = need + 10)); \ + } \ + *dest = new_buffer; \ + } \ + (Ptr) = (*dest) + dest_used; \ + dest_used = need; \ + } while (0) + + /* Ptr is a name parameter */ +#define TRUNCATE_TO(Size,Ptr) \ + do { \ + int new_need = ((Ptr) - (*dest)) + (Size); \ + if (new_need > dest_used) { \ + erl_exit(1,"Internal error in inet_drv, " \ + "miscalculated buffer size"); \ + } \ + dest_used = new_need; \ + } while(0) + + + PLACE_FOR(1,ptr); + *ptr = INET_REP_OK; + + while(len--) { + opt = *buf++; + proto = SOL_SOCKET; + ival = 0; /* Windows Vista needs this (only writes part of it) */ + arg_sz = sizeof(ival); + arg_ptr = (char*) &ival; + + PLACE_FOR(5,ptr); + + switch(opt) { + case INET_LOPT_BUFFER: + *ptr++ = opt; + put_int32(desc->bufsz, ptr); + continue; + case INET_LOPT_HEADER: + *ptr++ = opt; + put_int32(desc->hsz, ptr); + continue; + case INET_LOPT_MODE: + *ptr++ = opt; + put_int32(desc->mode, ptr); + continue; + case INET_LOPT_DELIVER: + *ptr++ = opt; + put_int32(desc->deliver, ptr); + continue; + case INET_LOPT_ACTIVE: + *ptr++ = opt; + put_int32(desc->active, ptr); + continue; + case INET_LOPT_PACKET: + *ptr++ = opt; + put_int32(desc->htype, ptr); + continue; + case INET_LOPT_PACKET_SIZE: + *ptr++ = opt; + put_int32(desc->psize, ptr); + continue; + case INET_LOPT_EXITONCLOSE: + *ptr++ = opt; + put_int32(desc->exitf, ptr); + continue; + + case INET_LOPT_BIT8: + *ptr++ = opt; + if (desc->bit8f) { + put_int32(desc->bit8, ptr); + } else { + put_int32(INET_BIT8_OFF, ptr); + } + continue; + + case INET_LOPT_TCP_HIWTRMRK: + if (desc->stype == SOCK_STREAM) { + *ptr++ = opt; + ival = ((tcp_descriptor*)desc)->high; + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + + case INET_LOPT_TCP_LOWTRMRK: + if (desc->stype == SOCK_STREAM) { + *ptr++ = opt; + ival = ((tcp_descriptor*)desc)->low; + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + + case INET_LOPT_TCP_SEND_TIMEOUT: + if (desc->stype == SOCK_STREAM) { + *ptr++ = opt; + ival = ((tcp_descriptor*)desc)->send_timeout; + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + + case INET_LOPT_TCP_SEND_TIMEOUT_CLOSE: + if (desc->stype == SOCK_STREAM) { + *ptr++ = opt; + ival = ((tcp_descriptor*)desc)->send_timeout_close; + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + + case INET_LOPT_TCP_DELAY_SEND: + if (desc->stype == SOCK_STREAM) { + *ptr++ = opt; + ival = !!(((tcp_descriptor*)desc)->tcp_add_flags & TCP_ADDF_DELAY_SEND); + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + + case INET_LOPT_UDP_READ_PACKETS: + if (desc->stype == SOCK_DGRAM) { + *ptr++ = opt; + ival = ((udp_descriptor*)desc)->read_packets; + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + + case INET_OPT_PRIORITY: +#ifdef SO_PRIORITY + type = SO_PRIORITY; + break; +#else + *ptr++ = opt; + put_int32(0, ptr); + continue; +#endif + case INET_OPT_TOS: +#if defined(IP_TOS) && defined(SOL_IP) + proto = SOL_IP; + type = IP_TOS; + break; +#else + *ptr++ = opt; + put_int32(0, ptr); + continue; +#endif + case INET_OPT_REUSEADDR: + type = SO_REUSEADDR; + break; + case INET_OPT_KEEPALIVE: + type = SO_KEEPALIVE; + break; + case INET_OPT_DONTROUTE: + type = SO_DONTROUTE; + break; + case INET_OPT_BROADCAST: + type = SO_BROADCAST; + break; + case INET_OPT_OOBINLINE: + type = SO_OOBINLINE; + break; + case INET_OPT_SNDBUF: + type = SO_SNDBUF; + break; + case INET_OPT_RCVBUF: + type = SO_RCVBUF; + break; + case TCP_OPT_NODELAY: + proto = IPPROTO_TCP; + type = TCP_NODELAY; + break; + +#ifdef HAVE_MULTICAST_SUPPORT + case UDP_OPT_MULTICAST_TTL: + proto = IPPROTO_IP; + type = IP_MULTICAST_TTL; + break; + case UDP_OPT_MULTICAST_LOOP: + proto = IPPROTO_IP; + type = IP_MULTICAST_LOOP; + break; + case UDP_OPT_MULTICAST_IF: + proto = IPPROTO_IP; + type = IP_MULTICAST_IF; + break; + case INET_OPT_LINGER: + arg_sz = sizeof(li_val); + sys_memzero((void *) &li_val, sizeof(li_val)); + arg_ptr = (char*) &li_val; + type = SO_LINGER; + break; +#endif /* HAVE_MULTICAST_SUPPORT */ + + case INET_OPT_RAW: + { + int data_provided; + /* Raw options are icky, handle directly... */ + if (len < 13) { + RETURN_ERROR(); + } + len -= 13; + proto = get_int32(buf); + buf += 4; + type = get_int32(buf); + buf += 4; + data_provided = (int) *buf++; + arg_sz = get_int32(buf); + if (arg_sz > INET_MAX_BUFFER) { + RETURN_ERROR(); + } + buf += 4; + TRUNCATE_TO(0,ptr); + PLACE_FOR(13 + arg_sz,ptr); + arg_ptr = ptr + 13; + if (data_provided) { + if (len < arg_sz) { + RETURN_ERROR(); + } + memcpy(arg_ptr,buf,arg_sz); + buf += arg_sz; + len -= arg_sz; + } + if (sock_getopt(desc->s,proto,type,arg_ptr,&arg_sz) == + SOCKET_ERROR) { + TRUNCATE_TO(0,ptr); + continue; + } + TRUNCATE_TO(arg_sz + 13,ptr); + *ptr++ = opt; + put_int32(proto,ptr); + ptr += 4; + put_int32(type,ptr); + ptr += 4; + put_int32(arg_sz,ptr); + continue; + } + default: + RETURN_ERROR(); + } + /* We have 5 bytes allocated to ptr */ + if (sock_getopt(desc->s,proto,type,arg_ptr,&arg_sz) == SOCKET_ERROR) { + TRUNCATE_TO(0,ptr); + continue; + } + *ptr++ = opt; + if (arg_ptr == (char*)&ival) { + put_int32(ival, ptr); + } + else { + put_int32(((Uint32) li_val.l_onoff), ptr); + PLACE_FOR(4,ptr); + put_int32(((Uint32) li_val.l_linger), ptr); + } + } + return (dest_used); +#undef PLACE_FOR +#undef TRUNCATE_TO +#undef RETURN_ERROR +} + +#ifdef HAVE_SCTP +#define LOAD_PADDRINFO_CNT \ + (2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + LOAD_IP_AND_PORT_CNT + \ + 4*LOAD_INT_CNT + LOAD_TUPLE_CNT) +static int load_paddrinfo (ErlDrvTermData * spec, int i, + inet_descriptor* desc, struct sctp_paddrinfo* pai) +{ + i = LOAD_ATOM (spec, i, am_sctp_paddrinfo); + i = LOAD_ASSOC_ID (spec, i, pai->spinfo_assoc_id); + i = load_ip_and_port(spec, i, desc, &pai->spinfo_address); + switch(pai->spinfo_state) + { + case SCTP_ACTIVE: + i = LOAD_ATOM (spec, i, am_active); + break; + case SCTP_INACTIVE: + i = LOAD_ATOM (spec, i, am_inactive); + break; + default: + ASSERT(0); /* NB: SCTP_UNCONFIRMED modifier not yet supported */ + } + i = LOAD_INT (spec, i, pai->spinfo_cwnd); + i = LOAD_INT (spec, i, pai->spinfo_srtt); + i = LOAD_INT (spec, i, pai->spinfo_rto ); + i = LOAD_INT (spec, i, pai->spinfo_mtu ); + /* Close up the record: */ + i = LOAD_TUPLE (spec, i, 8); + return i; +} + +/* +** "sctp_fill_opts": Returns {ok, Results}, or an error: +*/ +static int sctp_fill_opts(inet_descriptor* desc, char* buf, int buflen, + char** dest, int destlen) +{ + /* In contrast to the generic "inet_fill_opts", the output here is + represented by tuples/records, which are formed in the "spec": + */ + ErlDrvTermData *spec; + int i = 0; + int length = 0; /* Number of result list entries */ + + int spec_allocated = PACKET_ERL_DRV_TERM_DATA_LEN; + spec = ALLOC(sizeof(* spec) * spec_allocated); + +# define RETURN_ERROR(Spec, Errno) \ + do { \ + FREE(Spec); \ + return (Errno); \ + } while(0) + + /* Spec is a name parmeter */ +# define PLACE_FOR(Spec, Index, N) \ + do { \ + int need; \ + if ((Index) > spec_allocated) { \ + erl_exit(1,"Internal error in inet_drv, " \ + "miscalculated buffer size"); \ + } \ + need = (Index) + (N); \ + if (need > INET_MAX_BUFFER/sizeof(ErlDrvTermData)) { \ + RETURN_ERROR((Spec), -ENOMEM); \ + } \ + if (need > spec_allocated) { \ + (Spec) = REALLOC((Spec), \ + sizeof(* (Spec)) \ + * (spec_allocated = need + 20)); \ + } \ + } while (0) + + PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_PORT_CNT); + i = LOAD_ATOM (spec, i, am_inet_reply); + i = LOAD_PORT (spec, i, desc->dport); + i = LOAD_ATOM (spec, i, am_ok); + + while (buflen > 0) { + int eopt = *buf; /* "eopt" is 1-byte encoded */ + buf ++; buflen --; + + switch(eopt) + { + /* Local options allowed for SCTP. For TCP and UDP, the values of + these options are returned via "res" using integer encoding, + but here, we encode them as proper terms the same way as we do + it for all other SCTP options: + */ + case INET_LOPT_BUFFER: + { + PLACE_FOR(spec, i, LOAD_ATOM_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_buffer); + i = LOAD_INT (spec, i, desc->bufsz); + i = LOAD_TUPLE(spec, i, 2); + break; + } + case INET_LOPT_MODE: + { + PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_mode); + switch (desc->mode) + { + case INET_MODE_LIST : + { i = LOAD_ATOM (spec, i, am_list); break; } + + case INET_MODE_BINARY: + { i = LOAD_ATOM (spec, i, am_binary); break; } + + default: ASSERT (0); + } + i = LOAD_TUPLE (spec, i, 2); + break; + } + case INET_LOPT_ACTIVE: + { + PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_active); + switch (desc->active) + { + case INET_ACTIVE : + { i = LOAD_ATOM (spec, i, am_true); break; } + + case INET_PASSIVE: + { i = LOAD_ATOM (spec, i, am_false); break; } + + case INET_ONCE : + { i = LOAD_ATOM (spec, i, am_once); break; } + + default: ASSERT (0); + } + i = LOAD_TUPLE (spec, i, 2); + break; + } + + /* SCTP and generic INET options: */ + + case SCTP_OPT_RTOINFO: + { + struct sctp_rtoinfo rti; + unsigned int sz = sizeof(rti); + + if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); + rti.srto_assoc_id = GET_ASSOC_ID(buf); + buf += ASSOC_ID_LEN; + buflen -= ASSOC_ID_LEN; + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_RTOINFO, + &rti, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + + 3*LOAD_INT_CNT + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_rtoinfo); + i = LOAD_ATOM (spec, i, am_sctp_rtoinfo); + i = LOAD_ASSOC_ID (spec, i, rti.srto_assoc_id); + i = LOAD_INT (spec, i, rti.srto_initial); + i = LOAD_INT (spec, i, rti.srto_max); + i = LOAD_INT (spec, i, rti.srto_min); + i = LOAD_TUPLE (spec, i, 5); + i = LOAD_TUPLE (spec, i, 2); + break; + } + case SCTP_OPT_ASSOCINFO: + { + struct sctp_assocparams ap; + unsigned int sz = sizeof(ap); + + if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); + ap.sasoc_assoc_id = GET_ASSOC_ID(buf); + buf += ASSOC_ID_LEN; + buflen -= ASSOC_ID_LEN; + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_ASSOCINFO, + &ap, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + + 5*LOAD_INT_CNT + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_associnfo); + i = LOAD_ATOM (spec, i, am_sctp_assocparams); + i = LOAD_ASSOC_ID (spec, i, ap.sasoc_assoc_id); + i = LOAD_INT (spec, i, ap.sasoc_asocmaxrxt); + i = LOAD_INT (spec, i, ap.sasoc_number_peer_destinations); + i = LOAD_INT (spec, i, ap.sasoc_peer_rwnd); + i = LOAD_INT (spec, i, ap.sasoc_local_rwnd); + i = LOAD_INT (spec, i, ap.sasoc_cookie_life); + i = LOAD_TUPLE (spec, i, 7); + i = LOAD_TUPLE (spec, i, 2); + break; + } + case SCTP_OPT_INITMSG: + { + struct sctp_initmsg im; + unsigned int sz = sizeof(im); + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_INITMSG, + &im, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + 2*LOAD_ATOM_CNT + + 4*LOAD_INT_CNT + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_initmsg); + i = LOAD_ATOM (spec, i, am_sctp_initmsg); + i = LOAD_INT (spec, i, im.sinit_num_ostreams); + i = LOAD_INT (spec, i, im.sinit_max_instreams); + i = LOAD_INT (spec, i, im.sinit_max_attempts); + i = LOAD_INT (spec, i, im.sinit_max_init_timeo); + i = LOAD_TUPLE (spec, i, 5); + i = LOAD_TUPLE (spec, i, 2); + break; + } + /* The following option returns a tuple {bool, int}: */ + case INET_OPT_LINGER: + { + struct linger lg; + unsigned int sz = sizeof(lg); + + if (sock_getopt(desc->s, IPPROTO_SCTP, SO_LINGER, + &lg, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + LOAD_ATOM_CNT + LOAD_BOOL_CNT + + LOAD_INT_CNT + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_linger); + i = LOAD_BOOL (spec, i, lg.l_onoff); + i = LOAD_INT (spec, i, lg.l_linger); + i = LOAD_TUPLE (spec, i, 2); + i = LOAD_TUPLE (spec, i, 2); + break; + } + /* The following options just return an integer value: */ + case INET_OPT_RCVBUF : + case INET_OPT_SNDBUF : + case INET_OPT_REUSEADDR: + case INET_OPT_DONTROUTE: + case INET_OPT_PRIORITY : + case INET_OPT_TOS : + case SCTP_OPT_AUTOCLOSE: + case SCTP_OPT_MAXSEG : + /* The following options return true or false: */ + case SCTP_OPT_NODELAY : + case SCTP_OPT_DISABLE_FRAGMENTS: + case SCTP_OPT_I_WANT_MAPPED_V4_ADDR: + { + int res = 0; + unsigned int sz = sizeof(res); + int proto = 0, type = 0, is_int = 0; + ErlDrvTermData tag = am_sctp_error; + + switch(eopt) + { + case INET_OPT_RCVBUF : + { + proto = IPPROTO_SCTP; + type = SO_RCVBUF; + is_int = 1; + tag = am_recbuf; + break; + } + case INET_OPT_SNDBUF : + { + proto = IPPROTO_SCTP; + type = SO_SNDBUF; + is_int = 1; + tag = am_sndbuf; + break; + } + case INET_OPT_REUSEADDR: + { + proto = SOL_SOCKET; + type = SO_REUSEADDR; + is_int = 0; + tag = am_reuseaddr; + break; + } + case INET_OPT_DONTROUTE: + { + proto = SOL_SOCKET; + type = SO_DONTROUTE; + is_int = 0; + tag = am_dontroute; + break; + } + case INET_OPT_PRIORITY: + { +# if defined(SO_PRIORITY) + proto = SOL_SOCKET; + type = SO_PRIORITY; + is_int = 1; + tag = am_priority; + break; +# else + /* Not supported -- ignore */ + continue; +# endif + } + case INET_OPT_TOS: + { +# if defined(IP_TOS) && defined(SOL_IP) + proto = SOL_IP; + type = IP_TOS; + is_int = 1; + tag = am_tos; + break; +# else + /* Not supported -- ignore */ + continue; +# endif + } + case SCTP_OPT_AUTOCLOSE: + { + proto = IPPROTO_SCTP; + type = SCTP_AUTOCLOSE; + is_int = 1; + tag = am_sctp_autoclose; + break; + } + case SCTP_OPT_MAXSEG : + { + proto = IPPROTO_SCTP; + type = SCTP_MAXSEG; + is_int = 1; + tag = am_sctp_maxseg; + break; + } + case SCTP_OPT_NODELAY : + { + proto = IPPROTO_SCTP; + type = SCTP_NODELAY; + is_int = 0; + tag = am_sctp_nodelay; + break; + } + case SCTP_OPT_DISABLE_FRAGMENTS: + { + proto = IPPROTO_SCTP; + type = SCTP_DISABLE_FRAGMENTS; + is_int = 0; + tag = am_sctp_disable_fragments; + break; + } + case SCTP_OPT_I_WANT_MAPPED_V4_ADDR: + { + proto = IPPROTO_SCTP; + type = SCTP_I_WANT_MAPPED_V4_ADDR; + is_int = 0; + tag = am_sctp_i_want_mapped_v4_addr; + break; + } + default: ASSERT(0); + } + if (sock_getopt (desc->s, proto, type, &res, &sz) < 0) continue; + /* Form the result: */ + PLACE_FOR(spec, i, LOAD_ATOM_CNT + + (is_int ? LOAD_INT_CNT : LOAD_BOOL_CNT) + + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, tag); + if (is_int) + i = LOAD_INT (spec, i, res); + else + i = LOAD_BOOL (spec, i, res); + i = LOAD_TUPLE (spec, i, 2); + break; + } + case SCTP_OPT_PRIMARY_ADDR: + case SCTP_OPT_SET_PEER_PRIMARY_ADDR: + { + /* These 2 options use completely isomorphic data structures: */ + struct sctp_setpeerprim sp; + unsigned int sz = sizeof(sp); + + if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); + sp.sspp_assoc_id = GET_ASSOC_ID(buf); + buf += ASSOC_ID_LEN; + buflen -= ASSOC_ID_LEN; + + if (sock_getopt(desc->s, IPPROTO_SCTP, + (eopt == SCTP_OPT_PRIMARY_ADDR) ? + SCTP_PRIMARY_ADDR : SCTP_SET_PEER_PRIMARY_ADDR, + &sp, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + + LOAD_IP_AND_PORT_CNT + 2*LOAD_TUPLE_CNT); + switch (eopt) { + case SCTP_OPT_PRIMARY_ADDR: + i = LOAD_ATOM(spec, i, am_sctp_primary_addr); + i = LOAD_ATOM(spec, i, am_sctp_prim); + break; + case SCTP_OPT_SET_PEER_PRIMARY_ADDR: + i = LOAD_ATOM(spec, i, am_sctp_set_peer_primary_addr); + i = LOAD_ATOM(spec, i, am_sctp_setpeerprim); + break; + default: + ASSERT(0); + } + i = LOAD_ASSOC_ID (spec, i, sp.sspp_assoc_id); + i = load_ip_and_port(spec, i, desc, &sp.sspp_addr); + i = LOAD_TUPLE (spec, i, 3); + i = LOAD_TUPLE (spec, i, 2); + break; + } + case SCTP_OPT_ADAPTATION_LAYER: + { + struct sctp_setadaptation ad; + unsigned int sz = sizeof (ad); + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_ADAPTATION_LAYER, + &ad, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + 2*LOAD_ATOM_CNT + LOAD_INT_CNT + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_adaptation_layer); + i = LOAD_ATOM (spec, i, am_sctp_setadaptation); + i = LOAD_INT (spec, i, ad.ssb_adaptation_ind); + i = LOAD_TUPLE (spec, i, 2); + i = LOAD_TUPLE (spec, i, 2); + break; + } + case SCTP_OPT_PEER_ADDR_PARAMS: + { + struct sctp_paddrparams ap; + unsigned int sz = sizeof(ap); + int n; + char *after; + int alen; + + if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); + ap.spp_assoc_id = GET_ASSOC_ID(buf); + buf += ASSOC_ID_LEN; + buflen -= ASSOC_ID_LEN; + alen = buflen; + after = inet_set_faddress(desc->sfamily, + (inet_address*) (&ap.spp_address), + buf, &alen); + if (after == NULL) RETURN_ERROR(spec, -EINVAL); + buflen -= after - buf; + buf = after; + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_PEER_ADDR_PARAMS, + &ap, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + + LOAD_IP_AND_PORT_CNT + 4*LOAD_INT_CNT); + i = LOAD_ATOM (spec, i, am_sctp_peer_addr_params); + i = LOAD_ATOM (spec, i, am_sctp_paddrparams); + i = LOAD_ASSOC_ID (spec, i, ap.spp_assoc_id); + i = load_ip_and_port(spec, i, desc, &ap.spp_address); + i = LOAD_INT (spec, i, ap.spp_hbinterval); + i = LOAD_INT (spec, i, ap.spp_pathmaxrxt); + + /* The following fields are not suported in SOLARIS10, + ** so put 0s for "spp_pathmtu", "spp_sackdelay", + ** and empty list for "spp_flags": + */ + +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_PATHMTU + i = LOAD_INT (spec, i, ap.spp_pathmtu); +# else + i = LOAD_INT (spec, i, 0); +# endif + +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY + i = LOAD_INT (spec, i, ap.spp_sackdelay); +# else + i = LOAD_INT (spec, i, 0); +# endif + + n = 0; +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_FLAGS + PLACE_FOR(spec, i, 7*LOAD_ATOM_CNT); + /* Now Flags, as a list: */ + if (ap.spp_flags & SPP_HB_ENABLE) + { i = LOAD_ATOM (spec, i, am_hb_enable); n++; } + + if (ap.spp_flags & SPP_HB_DISABLE) + { i = LOAD_ATOM (spec, i, am_hb_disable); n++; } + + if (ap.spp_flags & SPP_HB_DEMAND) + { i = LOAD_ATOM (spec, i, am_hb_demand); n++; } + + if (ap.spp_flags & SPP_PMTUD_ENABLE) + { i = LOAD_ATOM (spec, i, am_pmtud_enable); n++; } + + if (ap.spp_flags & SPP_PMTUD_DISABLE) + { i = LOAD_ATOM (spec, i, am_pmtud_disable); n++; } + + if (ap.spp_flags & SPP_SACKDELAY_ENABLE) + { i = LOAD_ATOM (spec, i, am_sackdelay_enable); n++; } + + if (ap.spp_flags & SPP_SACKDELAY_DISABLE) + { i = LOAD_ATOM (spec, i, am_sackdelay_disable); n++; } +# endif + + PLACE_FOR(spec, i, + LOAD_NIL_CNT + LOAD_LIST_CNT + 2*LOAD_TUPLE_CNT); + + /* Close up the Flags list: */ + i = LOAD_NIL (spec, i); + i = LOAD_LIST (spec, i, n+1); + + /* Close up the record: */ + i = LOAD_TUPLE (spec, i, 8); + /* Close up the result tuple: */ + i = LOAD_TUPLE (spec, i, 2); + break; + } + case SCTP_OPT_DEFAULT_SEND_PARAM: + { + struct sctp_sndrcvinfo sri; + unsigned int sz = sizeof(sri); + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_DEFAULT_SEND_PARAM, + &sri, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, LOAD_ATOM_CNT + + SCTP_PARSE_SNDRCVINFO_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM(spec, i, am_sctp_default_send_param); + i = sctp_parse_sndrcvinfo(spec, i, &sri); + i = LOAD_TUPLE(spec, i, 2); + break; + } + case SCTP_OPT_EVENTS: + { + struct sctp_event_subscribe evs; + unsigned int sz = sizeof(evs); + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_EVENTS, + &evs, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + 2*LOAD_ATOM_CNT + 9*LOAD_BOOL_CNT + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_events); + i = LOAD_ATOM (spec, i, am_sctp_event_subscribe); + i = LOAD_BOOL (spec, i, evs.sctp_data_io_event); + i = LOAD_BOOL (spec, i, evs.sctp_association_event); + i = LOAD_BOOL (spec, i, evs.sctp_address_event); + i = LOAD_BOOL (spec, i, evs.sctp_send_failure_event); + i = LOAD_BOOL (spec, i, evs.sctp_peer_error_event); + i = LOAD_BOOL (spec, i, evs.sctp_shutdown_event); + i = LOAD_BOOL (spec, i, evs.sctp_partial_delivery_event); + i = LOAD_BOOL (spec, i, evs.sctp_adaptation_layer_event); + i = LOAD_BOOL (spec, i, 0);/* NB: sctp_authentication_event + * is not yet supported in Linux + */ + i = LOAD_TUPLE (spec, i, 10); + i = LOAD_TUPLE (spec, i, 2); + break; + } + /* The following option is not available in Solaris 10: */ +# ifdef SCTP_DELAYED_ACK_TIME + case SCTP_OPT_DELAYED_ACK_TIME: + { + struct sctp_assoc_value av; + unsigned int sz = sizeof(av); + + if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); + av.assoc_id = GET_ASSOC_ID(buf); + buf += ASSOC_ID_LEN; + buflen -= ASSOC_ID_LEN; + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_DELAYED_ACK_TIME, + &av, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + + LOAD_INT_CNT + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_delayed_ack_time); + i = LOAD_ATOM (spec, i, am_sctp_assoc_value); + i = LOAD_ASSOC_ID (spec, i, av.assoc_id); + i = LOAD_INT (spec, i, av.assoc_value); + i = LOAD_TUPLE (spec, i, 3); + i = LOAD_TUPLE (spec, i, 2); + break; + } +# endif + case SCTP_OPT_STATUS: + { + struct sctp_status st; + unsigned int sz = sizeof(st); + + if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); + st.sstat_assoc_id = GET_ASSOC_ID(buf); + buf += ASSOC_ID_LEN; + buflen -= ASSOC_ID_LEN; + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_STATUS, + &st, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, 3*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + + 6*LOAD_INT_CNT + LOAD_PADDRINFO_CNT + + 2*LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_status); + i = LOAD_ATOM (spec, i, am_sctp_status); + i = LOAD_ASSOC_ID (spec, i, st.sstat_assoc_id); + switch(st.sstat_state) + { + /* SCTP_EMPTY is not supported on SOLARIS10: */ +# ifdef SCTP_EMPTY + case SCTP_EMPTY: + i = LOAD_ATOM (spec, i, am_empty); + break; +# endif + case SCTP_CLOSED: + i = LOAD_ATOM (spec, i, am_closed); + break; + /* The following states are not supported by Linux Kernel SCTP yet: + case SCTP_BOUND: + i = LOAD_ATOM (spec, i, am_bound); + break; + case SCTP_LISTEN: + i = LOAD_ATOM (spec, i, am_listen); + break; + */ + case SCTP_COOKIE_WAIT: + i = LOAD_ATOM (spec, i, am_cookie_wait); + break; + case SCTP_COOKIE_ECHOED: + i = LOAD_ATOM (spec, i, am_cookie_echoed); + break; + case SCTP_ESTABLISHED: + i = LOAD_ATOM (spec, i, am_established); + break; + case SCTP_SHUTDOWN_PENDING: + i = LOAD_ATOM (spec, i, am_shutdown_pending); + break; + case SCTP_SHUTDOWN_SENT: + i = LOAD_ATOM (spec, i, am_shutdown_sent); + break; + case SCTP_SHUTDOWN_RECEIVED: + i = LOAD_ATOM (spec, i, am_shutdown_received); + break; + case SCTP_SHUTDOWN_ACK_SENT: + i = LOAD_ATOM (spec, i, am_shutdown_ack_sent); + break; + default: + i = LOAD_ATOM (spec, i, am_undefined); + break; + } + i = LOAD_INT (spec, i, st.sstat_rwnd); + i = LOAD_INT (spec, i, st.sstat_unackdata); + i = LOAD_INT (spec, i, st.sstat_penddata); + i = LOAD_INT (spec, i, st.sstat_instrms); + i = LOAD_INT (spec, i, st.sstat_outstrms); + i = LOAD_INT (spec, i, st.sstat_fragmentation_point); + i = load_paddrinfo (spec, i, desc, &st.sstat_primary); + /* Close up the record: */ + i = LOAD_TUPLE (spec, i, 10); + /* Close up the result tuple: */ + i = LOAD_TUPLE (spec, i, 2); + break; + } + case SCTP_OPT_GET_PEER_ADDR_INFO: + { + struct sctp_paddrinfo pai; + unsigned int sz = sizeof(pai); + char *after; + int alen; + + if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); + pai.spinfo_assoc_id = GET_ASSOC_ID(buf); + buf += ASSOC_ID_LEN; + buflen -= ASSOC_ID_LEN; + alen = buflen; + after = inet_set_faddress(desc->sfamily, + (inet_address*) (&pai.spinfo_address), + buf, &alen); + if (after == NULL) RETURN_ERROR(spec, -EINVAL); + buflen -= after - buf; + buf = after; + + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_GET_PEER_ADDR_INFO, + &pai, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + LOAD_ATOM_CNT + LOAD_PADDRINFO_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_sctp_get_peer_addr_info); + i = load_paddrinfo (spec, i, desc, &pai); + i = LOAD_TUPLE (spec, i, 2); + break; + } + default: + RETURN_ERROR(spec, -EINVAL); /* No more valid options */ + } + /* If we get here one result has been succesfully loaded */ + length ++; + } + if (buflen != 0) RETURN_ERROR(spec, -EINVAL); /* Optparam mismatch */ + + PLACE_FOR(spec, i, LOAD_NIL_CNT + LOAD_LIST_CNT + 2*LOAD_TUPLE_CNT); + + /* If we get here, we have "length" options: */ + i = LOAD_NIL (spec, i); + i = LOAD_LIST (spec, i, length+1); + + /* Close up the {ok, List} response: */ + i = LOAD_TUPLE(spec, i, 2); + /* Close up the {inet_reply, S, {ok, List}} response: */ + i = LOAD_TUPLE(spec, i, 3); + + /* Now, convert "spec" into the returnable term: */ + /* desc->caller = 0; What does it mean? */ + driver_output_term(desc->port, spec, i); + FREE(spec); + + (*dest)[0] = INET_REP_SCTP; + return 1; /* Response length */ +# undef PLACE_FOR +# undef RETURN_ERROR +} +#endif + +/* fill statistics reply, op codes from src and result in dest +** dst area must be a least 5*len + 1 bytes +*/ +static int inet_fill_stat(inet_descriptor* desc, char* src, int len, char* dst) +{ + unsigned long val; + int op; + char* dst_start = dst; + + *dst++ = INET_REP_OK; /* put reply code */ + while (len--) { + op = *src++; + *dst++ = op; /* copy op code */ + switch(op) { + case INET_STAT_RECV_CNT: + val = desc->recv_cnt; + break; + case INET_STAT_RECV_MAX: + val = (unsigned long) desc->recv_max; + break; + case INET_STAT_RECV_AVG: + val = (unsigned long) desc->recv_avg; + break; + case INET_STAT_RECV_DVI: + val = (unsigned long) fabs(desc->recv_dvi); + break; + case INET_STAT_SEND_CNT: + val = desc->send_cnt; + break; + case INET_STAT_SEND_MAX: + val = desc->send_max; + break; + case INET_STAT_SEND_AVG: + val = (unsigned long) desc->send_avg; + break; + case INET_STAT_SEND_PND: + val = driver_sizeq(desc->port); + break; + case INET_STAT_RECV_OCT: + put_int32(desc->recv_oct[1], dst); /* write high 32bit */ + put_int32(desc->recv_oct[0], dst+4); /* write low 32bit */ + dst += 8; + continue; + case INET_STAT_SEND_OCT: + put_int32(desc->send_oct[1], dst); /* write high 32bit */ + put_int32(desc->send_oct[0], dst+4); /* write low 32bit */ + dst += 8; + continue; + default: return -1; /* invalid argument */ + } + put_int32(val, dst); /* write 32bit value */ + dst += 4; + } + return dst - dst_start; /* actual length */ +} + +static void +send_empty_out_q_msgs(inet_descriptor* desc) +{ + ErlDrvTermData msg[6]; + int msg_len = 0; + + if(NO_SUBSCRIBERS(&desc->empty_out_q_subs)) + return; + + msg_len = LOAD_ATOM(msg, msg_len, am_empty_out_q); + msg_len = LOAD_PORT(msg, msg_len, desc->dport); + msg_len = LOAD_TUPLE(msg, msg_len, 2); + + ASSERT(msg_len == sizeof(msg)/sizeof(*msg)); + + send_to_subscribers(desc->port, + &desc->empty_out_q_subs, + 1, + msg, + msg_len); +} + +/* subscribe and fill subscription reply, op codes from src and +** result in dest dst area must be a least 5*len + 1 bytes +*/ +static int inet_subscribe(inet_descriptor* desc, char* src, int len, char* dst) +{ + unsigned long val; + int op; + char* dst_start = dst; + + *dst++ = INET_REP_OK; /* put reply code */ + while (len--) { + op = *src++; + *dst++ = op; /* copy op code */ + switch(op) { + case INET_SUBS_EMPTY_OUT_Q: + val = driver_sizeq(desc->port); + if(val > 0) + if(!save_subscriber(&desc->empty_out_q_subs, + driver_caller(desc->port))) + return 0; + break; + default: return -1; /* invalid argument */ + } + put_int32(val, dst); /* write 32bit value */ + dst += 4; + } + return dst - dst_start; /* actual length */ +} + +/* Terminate socket */ +static void inet_stop(inet_descriptor* desc) +{ + erl_inet_close(desc); + FREE(desc); +} + + +/* Allocate descriptor */ +static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) +{ + inet_descriptor* desc; + + if ((desc = (inet_descriptor*) ALLOC(size)) == NULL) + return NULL; + + desc->s = INVALID_SOCKET; + desc->event = INVALID_EVENT; + desc->event_mask = 0; +#ifdef __WIN32__ + desc->forced_events = 0; + desc->send_would_block = 0; +#endif + desc->port = port; + desc->dport = driver_mk_port(port); + desc->state = INET_STATE_CLOSED; + desc->prebound = 0; + desc->bufsz = INET_DEF_BUFFER; + desc->hsz = 0; /* list header size */ + desc->htype = TCP_PB_RAW; /* default packet type */ + desc->psize = 0; /* no size check */ + desc->stype = -1; /* bad stype */ + desc->sfamily = -1; + desc->sprotocol = protocol; + desc->mode = INET_MODE_LIST; /* list mode */ + desc->exitf = 1; /* exit port when close on active + socket */ + desc->bit8f = 0; + desc->bit8 = 0; + desc->deliver = INET_DELIVER_TERM; /* standard term format */ + desc->active = INET_PASSIVE; /* start passive */ + desc->oph = NULL; + desc->opt = NULL; + + desc->peer_ptr = NULL; + desc->name_ptr = NULL; + + desc->recv_oct[0] = desc->recv_oct[1] = 0; + desc->recv_cnt = 0; + desc->recv_max = 0; + desc->recv_avg = 0.0; + desc->recv_dvi = 0.0; + desc->send_oct[0] = desc->send_oct[1] = 0; + desc->send_cnt = 0; + desc->send_max = 0; + desc->send_avg = 0.0; + desc->empty_out_q_subs.subscriber = NO_PROCESS; + desc->empty_out_q_subs.next = NULL; + + sys_memzero((char *)&desc->remote,sizeof(desc->remote)); + + return (ErlDrvData)desc; +} + + +#ifndef MAXHOSTNAMELEN +#define MAXHOSTNAMELEN 256 +#endif + +/* +** common TCP/UDP/SCTP control command +*/ +static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len, + char** rbuf, int rsize) +{ + switch (cmd) { + + case INET_REQ_GETSTAT: { + char* dst; + int i; + int dstlen = 1; /* Reply code */ + + for (i = 0; i < len; i++) { + switch(buf[i]) { + case INET_STAT_SEND_OCT: dstlen += 9; break; + case INET_STAT_RECV_OCT: dstlen += 9; break; + default: dstlen += 5; break; + } + } + DEBUGF(("inet_ctl(%ld): GETSTAT\r\n", (long) desc->port)); + if (dstlen > INET_MAX_BUFFER) /* sanity check */ + return 0; + if (dstlen > rsize) { + if ((dst = (char*) ALLOC(dstlen)) == NULL) + return 0; + *rbuf = dst; /* call will free this buffer */ + } + else + dst = *rbuf; /* ok we fit in buffer given */ + return inet_fill_stat(desc, buf, len, dst); + } + + case INET_REQ_SUBSCRIBE: { + char* dst; + int dstlen = 1 /* Reply code */ + len*5; + DEBUGF(("inet_ctl(%ld): INET_REQ_SUBSCRIBE\r\n", (long) desc->port)); + if (dstlen > INET_MAX_BUFFER) /* sanity check */ + return 0; + if (dstlen > rsize) { + if ((dst = (char*) ALLOC(dstlen)) == NULL) + return 0; + *rbuf = dst; /* call will free this buffer */ + } + else + dst = *rbuf; /* ok we fit in buffer given */ + return inet_subscribe(desc, buf, len, dst); + } + + case INET_REQ_GETOPTS: { /* get options */ + int replen; + DEBUGF(("inet_ctl(%ld): GETOPTS\r\n", (long)desc->port)); +#ifdef HAVE_SCTP + if (IS_SCTP(desc)) + { + if ((replen = sctp_fill_opts(desc, buf, len, rbuf, rsize)) < 0) + return ctl_error(-replen, rbuf, rsize); + } else +#endif + if ((replen = inet_fill_opts(desc, buf, len, rbuf, rsize)) < 0) { + return ctl_error(EINVAL, rbuf, rsize); + } + return replen; + } + + case INET_REQ_GETIFLIST: { + DEBUGF(("inet_ctl(%ld): GETIFLIST\r\n", (long)desc->port)); + if (!IS_OPEN(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + return inet_ctl_getiflist(desc, rbuf, rsize); + } + + case INET_REQ_IFGET: { + DEBUGF(("inet_ctl(%ld): IFGET\r\n", (long)desc->port)); + if (!IS_OPEN(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + return inet_ctl_ifget(desc, buf, len, rbuf, rsize); + } + + case INET_REQ_IFSET: { + DEBUGF(("inet_ctl(%ld): IFSET\r\n", (long)desc->port)); + if (!IS_OPEN(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + return inet_ctl_ifset(desc, buf, len, rbuf, rsize); + } + + case INET_REQ_SETOPTS: { /* set options */ + DEBUGF(("inet_ctl(%ld): SETOPTS\r\n", (long)desc->port)); + switch(inet_set_opts(desc, buf, len)) { + case -1: + return ctl_error(EINVAL, rbuf, rsize); + case 0: + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + default: /* active/passive change!! */ + /* + * Let's hope that the descriptor really is a tcp_descriptor here. + */ + tcp_deliver((tcp_descriptor *) desc, 0); + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + } + + case INET_REQ_GETSTATUS: { + char tbuf[4]; + + DEBUGF(("inet_ctl(%ld): GETSTATUS\r\n", (long)desc->port)); + put_int32(desc->state, tbuf); + return ctl_reply(INET_REP_OK, tbuf, 4, rbuf, rsize); + } + + case INET_REQ_GETTYPE: { + char tbuf[8]; + + DEBUGF(("inet_ctl(%ld): GETTYPE\r\n", (long)desc->port)); + if (desc->sfamily == AF_INET) { + put_int32(INET_AF_INET, &tbuf[0]); + } +#if defined(HAVE_IN6) && defined(AF_INET6) + else if (desc->sfamily == AF_INET6) { + put_int32(INET_AF_INET6, &tbuf[0]); + } +#endif + else + return ctl_error(EINVAL, rbuf, rsize); + + if (desc->stype == SOCK_STREAM) { + put_int32(INET_TYPE_STREAM, &tbuf[4]); + } + else if (desc->stype == SOCK_DGRAM) { + put_int32(INET_TYPE_DGRAM, &tbuf[4]); + } +#ifdef HAVE_SCTP + else if (desc->stype == SOCK_SEQPACKET) { + put_int32(INET_TYPE_SEQPACKET, &tbuf[4]); + } +#endif + else + return ctl_error(EINVAL, rbuf, rsize); + return ctl_reply(INET_REP_OK, tbuf, 8, rbuf, rsize); + } + + + case INET_REQ_GETFD: { + char tbuf[4]; + + DEBUGF(("inet_ctl(%ld): GETFD\r\n", (long)desc->port)); + if (!IS_OPEN(desc)) + return ctl_error(EINVAL, rbuf, rsize); + put_int32((long)desc->s, tbuf); + return ctl_reply(INET_REP_OK, tbuf, 4, rbuf, rsize); + } + + case INET_REQ_GETHOSTNAME: { /* get host name */ + char tbuf[MAXHOSTNAMELEN]; + + DEBUGF(("inet_ctl(%ld): GETHOSTNAME\r\n", (long)desc->port)); + if (len != 0) + return ctl_error(EINVAL, rbuf, rsize); + + if (sock_hostname(tbuf, MAXHOSTNAMELEN) == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + return ctl_reply(INET_REP_OK, tbuf, strlen(tbuf), rbuf, rsize); + } + + case INET_REQ_PEER: { /* get peername */ + char tbuf[sizeof(inet_address)]; + inet_address peer; + inet_address* ptr; + unsigned int sz = sizeof(peer); + + DEBUGF(("inet_ctl(%ld): PEER\r\n", (long)desc->port)); + + if (!(desc->state & INET_F_ACTIVE)) + return ctl_error(ENOTCONN, rbuf, rsize); + if ((ptr = desc->peer_ptr) == NULL) { + ptr = &peer; + if (sock_peer(desc->s, (struct sockaddr*)ptr,&sz) == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + } + if (inet_get_address(desc->sfamily, tbuf, ptr, &sz) < 0) + return ctl_error(EINVAL, rbuf, rsize); + return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize); + } + + case INET_REQ_SETPEER: { /* set fake peername Port Address */ + if (len == 0) { + desc->peer_ptr = NULL; + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + else if (len < 2) + return ctl_error(EINVAL, rbuf, rsize); + else if (inet_set_address(desc->sfamily, &desc->peer_addr, + buf, &len) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + else { + desc->peer_ptr = &desc->peer_addr; + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + } + + case INET_REQ_NAME: { /* get sockname */ + char tbuf[sizeof(inet_address)]; + inet_address name; + inet_address* ptr; + unsigned int sz = sizeof(name); + + DEBUGF(("inet_ctl(%ld): NAME\r\n", (long)desc->port)); + + if (!IS_BOUND(desc)) + return ctl_error(EINVAL, rbuf, rsize); /* address is not valid */ + + if ((ptr = desc->name_ptr) == NULL) { + ptr = &name; + if (sock_name(desc->s, (struct sockaddr*)ptr, &sz) == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + } + if (inet_get_address(desc->sfamily, tbuf, ptr, &sz) < 0) + return ctl_error(EINVAL, rbuf, rsize); + return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize); + } + + case INET_REQ_SETNAME: { /* set fake peername Port Address */ + if (len == 0) { + desc->name_ptr = NULL; + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + else if (len < 2) + return ctl_error(EINVAL, rbuf, rsize); + else if (inet_set_address(desc->sfamily, &desc->name_addr, + buf, &len) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + else { + desc->name_ptr = &desc->name_addr; + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + } + + case INET_REQ_BIND: { /* bind socket */ + char tbuf[2]; + inet_address local; + short port; + + DEBUGF(("inet_ctl(%ld): BIND\r\n", (long)desc->port)); + + if (len < 2) + return ctl_error(EINVAL, rbuf, rsize); + if (desc->state != INET_STATE_OPEN) + return ctl_xerror(EXBADPORT, rbuf, rsize); + + if (inet_set_address(desc->sfamily, &local, buf, &len) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + + if (sock_bind(desc->s,(struct sockaddr*) &local, len) == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + + desc->state = INET_STATE_BOUND; + + if ((port = inet_address_port(&local)) == 0) { + len = sizeof(local); + sock_name(desc->s, (struct sockaddr*) &local, (unsigned int*)&len); + port = inet_address_port(&local); + } + port = sock_ntohs(port); + put_int16(port, tbuf); + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } + +#ifndef VXWORKS + + case INET_REQ_GETSERVBYNAME: { /* L1 Name-String L2 Proto-String */ + char namebuf[256]; + char protobuf[256]; + char tbuf[2]; + struct servent* srv; + short port; + int n; + + if (len < 2) + return ctl_error(EINVAL, rbuf, rsize); + n = buf[0]; buf++; len--; + if (n >= len) /* the = sign makes the test inklude next length byte */ + return ctl_error(EINVAL, rbuf, rsize); + memcpy(namebuf, buf, n); + namebuf[n] = '\0'; + len -= n; buf += n; + n = buf[0]; buf++; len--; + if (n > len) + return ctl_error(EINVAL, rbuf, rsize); + memcpy(protobuf, buf, n); + protobuf[n] = '\0'; + if ((srv = sock_getservbyname(namebuf, protobuf)) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + port = sock_ntohs(srv->s_port); + put_int16(port, tbuf); + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } + + case INET_REQ_GETSERVBYPORT: { /* P1 P0 L1 Proto-String */ + char protobuf[256]; + unsigned short port; + int n; + struct servent* srv; + + if (len < 3) + return ctl_error(EINVAL, rbuf, rsize); + port = get_int16(buf); + port = sock_htons(port); + buf += 2; + n = buf[0]; buf++; len -= 3; + if (n > len) + return ctl_error(EINVAL, rbuf, rsize); + memcpy(protobuf, buf, n); + protobuf[n] = '\0'; + if ((srv = sock_getservbyport(port, protobuf)) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + len = strlen(srv->s_name); + return ctl_reply(INET_REP_OK, srv->s_name, len, rbuf, rsize); + } + +#endif /* !VXWORKS */ + + default: + return ctl_xerror(EXBADPORT, rbuf, rsize); + } +} + +/* update statistics on output packets */ +static void inet_output_count(inet_descriptor* desc, int len) +{ + unsigned long n = desc->send_cnt + 1; + unsigned long t = desc->send_oct[0] + len; + int c = (t < desc->send_oct[0]); + double avg = desc->send_avg; + + /* at least 64 bit octet count */ + desc->send_oct[0] = t; + desc->send_oct[1] += c; + + if (n == 0) /* WRAP, use old avg as input to a new sequence */ + n = 1; + desc->send_avg += (len - avg) / n; + if (len > desc->send_max) + desc->send_max = len; + desc->send_cnt = n; +} + +/* update statistics on input packets */ +static void inet_input_count(inet_descriptor* desc, int len) +{ + unsigned long n = desc->recv_cnt + 1; + unsigned long t = desc->recv_oct[0] + len; + int c = (t < desc->recv_oct[0]); + double avg = desc->recv_avg; + double dvi; + + /* at least 64 bit octet count */ + desc->recv_oct[0] = t; + desc->recv_oct[1] += c; + + if (n == 0) /* WRAP */ + n = 1; + + /* average packet length */ + avg = avg + (len - avg) / n; + desc->recv_avg = avg; + + if (len > desc->recv_max) + desc->recv_max = len; + + /* average deviation from average packet length */ + dvi = desc->recv_dvi; + desc->recv_dvi = dvi + ((len - avg) - dvi) / n; + desc->recv_cnt = n; +} + +/*---------------------------------------------------------------------------- + + TCP + +-----------------------------------------------------------------------------*/ + +/* +** Set new size on buffer, used when packet size is determined +** and the buffer is to small. +** buffer must have a size of at least len bytes (counting from ptr_start!) +*/ +static int tcp_expand_buffer(tcp_descriptor* desc, int len) +{ + ErlDrvBinary* bin; + int offs1; + int offs2; + int used = desc->i_ptr_start - desc->i_buf->orig_bytes; + int ulen = used + len; + + if (desc->i_bufsz >= ulen) /* packet will fit */ + return 0; + else if (desc->i_buf->orig_size >= ulen) { /* buffer is large enough */ + desc->i_bufsz = ulen; /* set "virtual" size */ + return 0; + } + + DEBUGF(("tcp_expand_buffer(%ld): s=%d, from %ld to %d\r\n", + (long)desc->inet.port, desc->inet.s, desc->i_buf->orig_size, ulen)); + + offs1 = desc->i_ptr_start - desc->i_buf->orig_bytes; + offs2 = desc->i_ptr - desc->i_ptr_start; + + if ((bin = driver_realloc_binary(desc->i_buf, ulen)) == NULL) + return -1; + + desc->i_buf = bin; + desc->i_ptr_start = bin->orig_bytes + offs1; + desc->i_ptr = desc->i_ptr_start + offs2; + desc->i_bufsz = ulen; + return 0; +} + +/* push data into i_buf */ +static int tcp_push_buffer(tcp_descriptor* desc, char* buf, int len) +{ + ErlDrvBinary* bin; + + if (desc->i_buf == NULL) { + bin = alloc_buffer(len); + sys_memcpy(bin->orig_bytes, buf, len); + desc->i_buf = bin; + desc->i_bufsz = len; + desc->i_ptr_start = desc->i_buf->orig_bytes; + desc->i_ptr = desc->i_ptr_start + len; + } + else { + char* start = desc->i_buf->orig_bytes; + int sz_before = desc->i_ptr_start - start; + int sz_filled = desc->i_ptr - desc->i_ptr_start; + + if (len <= sz_before) { + sys_memcpy(desc->i_ptr_start - len, buf, len); + desc->i_ptr_start -= len; + } + else { + bin = alloc_buffer(desc->i_bufsz+len); + sys_memcpy(bin->orig_bytes, buf, len); + sys_memcpy(bin->orig_bytes+len, desc->i_ptr_start, sz_filled); + free_buffer(desc->i_buf); + desc->i_bufsz += len; + desc->i_buf = bin; + desc->i_ptr_start = bin->orig_bytes; + desc->i_ptr = desc->i_ptr_start + sz_filled + len; + } + } + desc->i_remain = 0; + return 0; +} + +/* clear CURRENT input buffer */ +static void tcp_clear_input(tcp_descriptor* desc) +{ + if (desc->i_buf != NULL) + free_buffer(desc->i_buf); + desc->i_buf = NULL; + desc->i_remain = 0; + desc->i_ptr = NULL; + desc->i_ptr_start = NULL; + desc->i_bufsz = 0; +} + +/* clear QUEUED output */ +static void tcp_clear_output(tcp_descriptor* desc) +{ + ErlDrvPort ix = desc->inet.port; + int qsz = driver_sizeq(ix); + + driver_deq(ix, qsz); + send_empty_out_q_msgs(INETP(desc)); +} + + +/* Move data so that ptr_start point at buf->orig_bytes */ +static void tcp_restart_input(tcp_descriptor* desc) +{ + if (desc->i_ptr_start != desc->i_buf->orig_bytes) { + int n = desc->i_ptr - desc->i_ptr_start; + + DEBUGF(("tcp_restart_input: move %d bytes\r\n", n)); + sys_memmove(desc->i_buf->orig_bytes, desc->i_ptr_start, n); + desc->i_ptr_start = desc->i_buf->orig_bytes; + desc->i_ptr = desc->i_ptr_start + n; + } +} + + +static int tcp_inet_init(void) +{ + DEBUGF(("tcp_inet_init() {}\r\n")); + return 0; +} + +/* initialize the TCP descriptor */ + +static ErlDrvData tcp_inet_start(ErlDrvPort port, char* args) +{ + tcp_descriptor* desc; + DEBUGF(("tcp_inet_start(%ld) {\r\n", (long)port)); + + desc = (tcp_descriptor*) + inet_start(port, sizeof(tcp_descriptor), IPPROTO_TCP); + if (desc == NULL) + return ERL_DRV_ERROR_ERRNO; + desc->high = INET_HIGH_WATERMARK; + desc->low = INET_LOW_WATERMARK; + desc->send_timeout = INET_INFINITY; + desc->send_timeout_close = 0; + desc->busy_on_send = 0; + desc->i_buf = NULL; + desc->i_ptr = NULL; + desc->i_ptr_start = NULL; + desc->i_remain = 0; + desc->i_bufsz = 0; + desc->tcp_add_flags = 0; + desc->http_state = 0; + desc->mtd = NULL; + desc->multi_first = desc->multi_last = NULL; + DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port)); + return (ErlDrvData) desc; +} + +/* Copy a descriptor, by creating a new port with same settings + * as the descriptor desc. + * return NULL on error (ENFILE no ports avail) + */ +static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s, + ErlDrvTermData owner, int* err) +{ + ErlDrvPort port = desc->inet.port; + tcp_descriptor* copy_desc; + + copy_desc = (tcp_descriptor*) tcp_inet_start(port, NULL); + + /* Setup event if needed */ + if ((copy_desc->inet.s = s) != INVALID_SOCKET) { + if ((copy_desc->inet.event = sock_create_event(INETP(copy_desc))) == + INVALID_EVENT) { + *err = sock_errno(); + FREE(copy_desc); + return NULL; + } + } + + /* Some flags must be inherited at this point */ + copy_desc->inet.mode = desc->inet.mode; + copy_desc->inet.exitf = desc->inet.exitf; + copy_desc->inet.bit8f = desc->inet.bit8f; + copy_desc->inet.deliver = desc->inet.deliver; + copy_desc->inet.htype = desc->inet.htype; + copy_desc->inet.psize = desc->inet.psize; + copy_desc->inet.stype = desc->inet.stype; + copy_desc->inet.sfamily = desc->inet.sfamily; + copy_desc->inet.hsz = desc->inet.hsz; + copy_desc->inet.bufsz = desc->inet.bufsz; + copy_desc->high = desc->high; + copy_desc->low = desc->low; + copy_desc->send_timeout = desc->send_timeout; + copy_desc->send_timeout_close = desc->send_timeout_close; + + /* The new port will be linked and connected to the original caller */ + port = driver_create_port(port, owner, "tcp_inet", (ErlDrvData) copy_desc); + if ((long)port == -1) { + *err = ENFILE; + FREE(copy_desc); + return NULL; + } + copy_desc->inet.port = port; + copy_desc->inet.dport = driver_mk_port(port); + *err = 0; + return copy_desc; +} + +/* +** Check Special cases: +** 1. we are a listener doing nb accept -> report error on accept ! +** 2. we are doing accept -> restore listener state +*/ +static void tcp_close_check(tcp_descriptor* desc) +{ + /* XXX:PaN - multiple clients to handle! */ + if (desc->inet.state == TCP_STATE_ACCEPTING) { + inet_async_op *this_op = desc->inet.opt; + sock_select(INETP(desc), FD_ACCEPT, 0); + desc->inet.state = TCP_STATE_LISTEN; + if (this_op != NULL) { + driver_demonitor_process(desc->inet.port, &(this_op->monitor)); + } + async_error_am(INETP(desc), am_closed); + } + else if (desc->inet.state == TCP_STATE_MULTI_ACCEPTING) { + int id,req; + ErlDrvTermData caller; + ErlDrvMonitor monitor; + + sock_select(INETP(desc), FD_ACCEPT, 0); + desc->inet.state = TCP_STATE_LISTEN; + while (deq_multi_op(desc,&id,&req,&caller,NULL,&monitor) == 0) { + driver_demonitor_process(desc->inet.port, &monitor); + send_async_error(desc->inet.port, desc->inet.dport, id, caller, am_closed); + } + clean_multi_timers(&(desc->mtd), desc->inet.port); + } + + else if (desc->inet.state == TCP_STATE_CONNECTING) { + async_error_am(INETP(desc), am_closed); + } + else if (desc->inet.state == TCP_STATE_CONNECTED) { + async_error_am_all(INETP(desc), am_closed); + } +} + +/* +** Cleanup & Free +*/ +static void tcp_inet_stop(ErlDrvData e) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + DEBUGF(("tcp_inet_stop(%ld) {s=%d\r\n", + (long)desc->inet.port, desc->inet.s)); + tcp_close_check(desc); + /* free input buffer & output buffer */ + if (desc->i_buf != NULL) + release_buffer(desc->i_buf); + desc->i_buf = NULL; /* net_mess2 may call this function recursively when + faulty messages arrive on dist ports*/ + DEBUGF(("tcp_inet_stop(%ld) }\r\n", (long)desc->inet.port)); + inet_stop(INETP(desc)); +} + + + + +/* TCP requests from Erlang */ +static int tcp_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, int len, + char** rbuf, int rsize) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + switch(cmd) { + case INET_REQ_OPEN: /* open socket and return internal index */ + DEBUGF(("tcp_inet_ctl(%ld): OPEN\r\n", (long)desc->inet.port)); + if ((len == 1) && (buf[0] == INET_AF_INET)) + return + inet_ctl_open(INETP(desc), AF_INET, SOCK_STREAM, rbuf, rsize); +#if defined(HAVE_IN6) && defined(AF_INET6) + else if ((len == 1) && (buf[0] == INET_AF_INET6)) + return + inet_ctl_open(INETP(desc), AF_INET6, SOCK_STREAM, rbuf, rsize); +#else + else if ((len == 1) && (buf[0] == INET_AF_INET6)) + return ctl_xerror("eafnosupport",rbuf,rsize); +#endif + else + return ctl_error(EINVAL, rbuf, rsize); + + case INET_REQ_FDOPEN: /* pass in an open socket */ + DEBUGF(("tcp_inet_ctl(%ld): FDOPEN\r\n", (long)desc->inet.port)); + if ((len == 5) && (buf[0] == INET_AF_INET)) + return inet_ctl_fdopen(INETP(desc), AF_INET, SOCK_STREAM, + (SOCKET) get_int32(buf+1), rbuf, rsize); +#if defined(HAVE_IN6) && defined(AF_INET6) + else if ((len == 5) && (buf[0] == INET_AF_INET6)) + return inet_ctl_fdopen(INETP(desc), AF_INET6, SOCK_STREAM, + (SOCKET) get_int32(buf+1), rbuf, rsize); +#endif + else + return ctl_error(EINVAL, rbuf, rsize); + + case TCP_REQ_LISTEN: { /* argument backlog */ + + int backlog; + DEBUGF(("tcp_inet_ctl(%ld): LISTEN\r\n", (long)desc->inet.port)); + if (desc->inet.state == TCP_STATE_CLOSED) + return ctl_xerror(EXBADPORT, rbuf, rsize); + if (!IS_OPEN(INETP(desc))) + return ctl_xerror(EXBADPORT, rbuf, rsize); + if (!IS_BOUND(INETP(desc))) + return ctl_xerror(EXBADSEQ, rbuf, rsize); + if (len != 2) + return ctl_error(EINVAL, rbuf, rsize); + backlog = get_int16(buf); + if (sock_listen(desc->inet.s, backlog) == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + desc->inet.state = TCP_STATE_LISTEN; + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + + + case INET_REQ_CONNECT: { /* do async connect */ + int code; + char tbuf[2]; + unsigned timeout; + + DEBUGF(("tcp_inet_ctl(%ld): CONNECT\r\n", (long)desc->inet.port)); + /* INPUT: Timeout(4), Port(2), Address(N) */ + + if (!IS_OPEN(INETP(desc))) + return ctl_xerror(EXBADPORT, rbuf, rsize); + if (IS_CONNECTED(INETP(desc))) + return ctl_error(EISCONN, rbuf, rsize); + if (!IS_BOUND(INETP(desc))) + return ctl_xerror(EXBADSEQ, rbuf, rsize); + if (IS_CONNECTING(INETP(desc))) + return ctl_error(EINVAL, rbuf, rsize); + if (len < 6) + return ctl_error(EINVAL, rbuf, rsize); + timeout = get_int32(buf); + buf += 4; + len -= 4; + if (inet_set_address(desc->inet.sfamily, &desc->inet.remote, + buf, &len) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + + code = sock_connect(desc->inet.s, + (struct sockaddr*) &desc->inet.remote, len); + if ((code == SOCKET_ERROR) && + ((sock_errno() == ERRNO_BLOCK) || /* Winsock2 */ + (sock_errno() == EINPROGRESS))) { /* Unix & OSE!! */ + sock_select(INETP(desc), FD_CONNECT, 1); + desc->inet.state = TCP_STATE_CONNECTING; + if (timeout != INET_INFINITY) + driver_set_timer(desc->inet.port, timeout); + enq_async(INETP(desc), tbuf, INET_REQ_CONNECT); + } + else if (code == 0) { /* ok we are connected */ + desc->inet.state = TCP_STATE_CONNECTED; + if (desc->inet.active) + sock_select(INETP(desc), (FD_READ|FD_CLOSE), 1); + enq_async(INETP(desc), tbuf, INET_REQ_CONNECT); + async_ok(INETP(desc)); + } + else { + return ctl_error(sock_errno(), rbuf, rsize); + } + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } + + case TCP_REQ_ACCEPT: { /* do async accept */ + char tbuf[2]; + unsigned timeout; + inet_address remote; + unsigned int n; + SOCKET s; + + DEBUGF(("tcp_inet_ctl(%ld): ACCEPT\r\n", (long)desc->inet.port)); + /* INPUT: Timeout(4) */ + + if ((desc->inet.state != TCP_STATE_LISTEN && desc->inet.state != TCP_STATE_ACCEPTING && + desc->inet.state != TCP_STATE_MULTI_ACCEPTING) || len != 4) { + return ctl_error(EINVAL, rbuf, rsize); + } + + timeout = get_int32(buf); + + if (desc->inet.state == TCP_STATE_ACCEPTING) { + unsigned long time_left; + int oid; + ErlDrvTermData ocaller; + int oreq; + unsigned otimeout; + ErlDrvTermData caller = driver_caller(desc->inet.port); + MultiTimerData *mtd = NULL,*omtd = NULL; + ErlDrvMonitor monitor, omonitor; + + + if (driver_monitor_process(desc->inet.port, caller ,&monitor) != 0) { + return ctl_xerror("noproc", rbuf, rsize); + } + deq_async_w_tmo(INETP(desc),&oid,&ocaller,&oreq,&otimeout,&omonitor); + if (otimeout != INET_INFINITY) { + driver_read_timer(desc->inet.port, &time_left); + driver_cancel_timer(desc->inet.port); + if (time_left <= 0) { + time_left = 1; + } + omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller, + time_left, &tcp_inet_multi_timeout); + } + enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor); + if (timeout != INET_INFINITY) { + mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + timeout, &tcp_inet_multi_timeout); + } + enq_multi_op(desc, tbuf, TCP_REQ_ACCEPT, caller, mtd, &monitor); + desc->inet.state = TCP_STATE_MULTI_ACCEPTING; + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } else if (desc->inet.state == TCP_STATE_MULTI_ACCEPTING) { + ErlDrvTermData caller = driver_caller(desc->inet.port); + MultiTimerData *mtd = NULL; + ErlDrvMonitor monitor; + + if (driver_monitor_process(desc->inet.port, caller ,&monitor) != 0) { + return ctl_xerror("noproc", rbuf, rsize); + } + if (timeout != INET_INFINITY) { + mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + timeout, &tcp_inet_multi_timeout); + } + enq_multi_op(desc, tbuf, TCP_REQ_ACCEPT, caller, mtd, &monitor); + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } else { + n = sizeof(desc->inet.remote); + s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &n); + if (s == INVALID_SOCKET) { + if (sock_errno() == ERRNO_BLOCK) { + ErlDrvMonitor monitor; + if (driver_monitor_process(desc->inet.port, driver_caller(desc->inet.port), + &monitor) != 0) { + return ctl_xerror("noproc", rbuf, rsize); + } + enq_async_w_tmo(INETP(desc), tbuf, TCP_REQ_ACCEPT, timeout, &monitor); + desc->inet.state = TCP_STATE_ACCEPTING; + sock_select(INETP(desc),FD_ACCEPT,1); + if (timeout != INET_INFINITY) { + driver_set_timer(desc->inet.port, timeout); + } + } else { + return ctl_error(sock_errno(), rbuf, rsize); + } + } else { + ErlDrvTermData caller = driver_caller(desc->inet.port); + tcp_descriptor* accept_desc; + int err; + + if ((accept_desc = tcp_inet_copy(desc,s,caller,&err)) == NULL) { + sock_close(s); + return ctl_error(err, rbuf, rsize); + } + /* FIXME: may MUST lock access_port + * 1 - Port is accessible via the erlang:ports() + * 2 - Port is accessible via callers process_info(links) + */ + accept_desc->inet.remote = remote; + SET_NONBLOCKING(accept_desc->inet.s); +#ifdef __WIN32__ + driver_select(accept_desc->inet.port, accept_desc->inet.event, + ERL_DRV_READ, 1); +#endif + accept_desc->inet.state = TCP_STATE_CONNECTED; + enq_async(INETP(desc), tbuf, TCP_REQ_ACCEPT); + async_ok_port(INETP(desc), accept_desc->inet.dport); + } + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } + } + case INET_REQ_CLOSE: + DEBUGF(("tcp_inet_ctl(%ld): CLOSE\r\n", (long)desc->inet.port)); + tcp_close_check(desc); + erl_inet_close(INETP(desc)); + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + + + case TCP_REQ_RECV: { + unsigned timeout; + char tbuf[2]; + int n; + + DEBUGF(("tcp_inet_ctl(%ld): RECV\r\n", (long)desc->inet.port)); + /* INPUT: Timeout(4), Length(4) */ + if (!IS_CONNECTED(INETP(desc))) { + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_RECV) { + desc->tcp_add_flags &= ~(TCP_ADDF_DELAYED_CLOSE_RECV| + TCP_ADDF_DELAYED_CLOSE_SEND); + return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize); + } + return ctl_error(ENOTCONN, rbuf, rsize); + } + if (desc->inet.active || (len != 8)) + return ctl_error(EINVAL, rbuf, rsize); + timeout = get_int32(buf); + buf += 4; + n = get_int32(buf); + DEBUGF(("tcp_inet_ctl(%ld) timeout = %d, n = %d\r\n", + (long)desc->inet.port,timeout,n)); + if ((desc->inet.htype != TCP_PB_RAW) && (n != 0)) + return ctl_error(EINVAL, rbuf, rsize); + if (n > TCP_MAX_PACKET_SIZE) + return ctl_error(ENOMEM, rbuf, rsize); + if (enq_async(INETP(desc), tbuf, TCP_REQ_RECV) < 0) + return ctl_error(EALREADY, rbuf, rsize); + + if (tcp_recv(desc, n) == 0) { + if (timeout == 0) + async_error_am(INETP(desc), am_timeout); + else { + if (timeout != INET_INFINITY) + driver_set_timer(desc->inet.port, timeout); + sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); + } + } + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } + + case TCP_REQ_UNRECV: { + DEBUGF(("tcp_inet_ctl(%ld): UNRECV\r\n", (long)desc->inet.port)); + if (!IS_CONNECTED(INETP(desc))) + return ctl_error(ENOTCONN, rbuf, rsize); + tcp_push_buffer(desc, buf, len); + if (desc->inet.active) + tcp_deliver(desc, 0); + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } +#ifndef _OSE_ + case TCP_REQ_SHUTDOWN: { + int how; + DEBUGF(("tcp_inet_ctl(%ld): FDOPEN\r\n", (long)desc->inet.port)); + if (!IS_CONNECTED(INETP(desc))) { + return ctl_error(ENOTCONN, rbuf, rsize); + } + if (len != 1) { + return ctl_error(EINVAL, rbuf, rsize); + } + how = buf[0]; + if (sock_shutdown(INETP(desc)->s, how) == 0) { + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } else { + return ctl_error(sock_errno(), rbuf, rsize); + } + } +#endif + default: + DEBUGF(("tcp_inet_ctl(%ld): %u\r\n", (long)desc->inet.port, cmd)); + return inet_ctl(INETP(desc), cmd, buf, len, rbuf, rsize); + } + +} + +/* +** tcp_inet_timeout: +** called when timer expire: +** TCP socket may be: +** +** a) receiving -- deselect +** b) connecting -- close socket +** c) accepting -- reset listener +** +*/ + +static void tcp_inet_timeout(ErlDrvData e) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + int state = desc->inet.state; + + DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n", + (long)desc->inet.port, desc->inet.s)); + if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */ + fire_multi_timers(&(desc->mtd), desc->inet.port, e); + } else if ((state & TCP_STATE_CONNECTED) == TCP_STATE_CONNECTED) { + if (desc->busy_on_send) { + ASSERT(IS_BUSY(INETP(desc))); + desc->inet.caller = desc->inet.busy_caller; + desc->inet.state &= ~INET_F_BUSY; + desc->busy_on_send = 0; + set_busy_port(desc->inet.port, 0); + inet_reply_error_am(INETP(desc), am_timeout); + if (desc->send_timeout_close) { + erl_inet_close(INETP(desc)); + } + } + else { + /* assume recv timeout */ + ASSERT(!desc->inet.active); + sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); + desc->i_remain = 0; + async_error_am(INETP(desc), am_timeout); + } + } + else if ((state & TCP_STATE_CONNECTING) == TCP_STATE_CONNECTING) { + /* assume connect timeout */ + /* close the socket since it's not usable (see man pages) */ + erl_inet_close(INETP(desc)); + async_error_am(INETP(desc), am_timeout); + } + else if ((state & TCP_STATE_ACCEPTING) == TCP_STATE_ACCEPTING) { + inet_async_op *this_op = desc->inet.opt; + /* timer is set on accept */ + sock_select(INETP(desc), FD_ACCEPT, 0); + if (this_op != NULL) { + driver_demonitor_process(desc->inet.port, &(this_op->monitor)); + } + desc->inet.state = TCP_STATE_LISTEN; + async_error_am(INETP(desc), am_timeout); + } + DEBUGF(("tcp_inet_timeout(%ld) }\r\n", (long)desc->inet.port)); +} + +static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + int id,req; + ErlDrvMonitor monitor; + + if (remove_multi_op(desc, &id, &req, caller, NULL, &monitor) != 0) { + return; + } + driver_demonitor_process(desc->inet.port, &monitor); + if (desc->multi_first == NULL) { + sock_select(INETP(desc),FD_ACCEPT,0); + desc->inet.state = TCP_STATE_LISTEN; /* restore state */ + } + send_async_error(desc->inet.port, desc->inet.dport, id, caller, am_timeout); +} + + + +/* +** command: +** output on a socket only ! +** a reply code will be sent to connected (caller later) +** {inet_reply, S, Status} +** NOTE! normal sockets use the the tcp_inet_commandv +** but distribution still uses the tcp_inet_command!! +*/ + +static void tcp_inet_command(ErlDrvData e, char *buf, int len) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + desc->inet.caller = driver_caller(desc->inet.port); + + DEBUGF(("tcp_inet_command(%ld) {s=%d\r\n", + (long)desc->inet.port, desc->inet.s)); + if (!IS_CONNECTED(INETP(desc))) + inet_reply_error(INETP(desc), ENOTCONN); + else if (tcp_send(desc, buf, len) == 0) + inet_reply_ok(INETP(desc)); + DEBUGF(("tcp_inet_command(%ld) }\r\n", (long)desc->inet.port)); +} + + +static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + desc->inet.caller = driver_caller(desc->inet.port); + + DEBUGF(("tcp_inet_commanv(%ld) {s=%d\r\n", + (long)desc->inet.port, desc->inet.s)); + if (!IS_CONNECTED(INETP(desc))) { + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_SEND) { + desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_CLOSE_SEND; + inet_reply_error_am(INETP(desc), am_closed); + } + else + inet_reply_error(INETP(desc), ENOTCONN); + } + else if (tcp_sendv(desc, ev) == 0) + inet_reply_ok(INETP(desc)); + DEBUGF(("tcp_inet_commandv(%ld) }\r\n", (long)desc->inet.port)); +} + +static void tcp_inet_flush(ErlDrvData e) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + if (!(desc->inet.event_mask & FD_WRITE)) { + /* Discard send queue to avoid hanging port (OTP-7615) */ + tcp_clear_output(desc); + } +} + +static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + ErlDrvTermData who = driver_get_monitored_process(desc->inet.port,monitorp); + int state = desc->inet.state; + + if ((state & TCP_STATE_MULTI_ACCEPTING) == TCP_STATE_MULTI_ACCEPTING) { + int id,req; + MultiTimerData *timeout; + if (remove_multi_op(desc, &id, &req, who, &timeout, NULL) != 0) { + return; + } + if (timeout != NULL) { + remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + } + if (desc->multi_first == NULL) { + sock_select(INETP(desc),FD_ACCEPT,0); + desc->inet.state = TCP_STATE_LISTEN; /* restore state */ + } + } else if ((state & TCP_STATE_ACCEPTING) == TCP_STATE_ACCEPTING) { + int did,drid; + ErlDrvTermData dcaller; + deq_async(INETP(desc), &did, &dcaller, &drid); + driver_cancel_timer(desc->inet.port); + sock_select(INETP(desc),FD_ACCEPT,0); + desc->inet.state = TCP_STATE_LISTEN; /* restore state */ + } +} + +static void inet_stop_select(ErlDrvEvent event, void* _) +{ +#ifdef __WIN32__ + WSACloseEvent((HANDLE)event); +#else + sock_close((SOCKET)(long)event); +#endif +} + +/* The peer socket has closed, cleanup and send event */ +static int tcp_recv_closed(tcp_descriptor* desc) +{ +#ifdef DEBUG + long port = (long) desc->inet.port; /* Used after driver_exit() */ +#endif + DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n", + port, desc->inet.s, __FILE__, __LINE__)); + if (IS_BUSY(INETP(desc))) { + /* A send is blocked */ + desc->inet.caller = desc->inet.busy_caller; + tcp_clear_output(desc); + if (desc->busy_on_send) { + driver_cancel_timer(desc->inet.port); + desc->busy_on_send = 0; + DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port)); + } + desc->inet.state &= ~INET_F_BUSY; + set_busy_port(desc->inet.port, 0); + inet_reply_error_am(INETP(desc), am_closed); + DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port)); + } + if (!desc->inet.active) { + /* We must cancel any timer here ! */ + driver_cancel_timer(desc->inet.port); + /* passive mode do not terminate port ! */ + tcp_clear_input(desc); + if (desc->inet.exitf) { + tcp_clear_output(desc); + desc_close(INETP(desc)); + } else { + desc_close_read(INETP(desc)); + } + async_error_am_all(INETP(desc), am_closed); + /* next time EXBADSEQ will be delivered */ + DEBUGF(("tcp_recv_closed(%ld): passive reply all 'closed'\r\n", port)); + } else { + tcp_clear_input(desc); + tcp_closed_message(desc); + if (desc->inet.exitf) { + driver_exit(desc->inet.port, 0); + } else { + desc_close_read(INETP(desc)); + } + DEBUGF(("tcp_recv_closed(%ld): active close\r\n", port)); + } + DEBUGF(("tcp_recv_closed(%ld): done\r\n", port)); + return -1; +} + + +/* We have a read error determine the action */ +static int tcp_recv_error(tcp_descriptor* desc, int err) +{ + if (err != ERRNO_BLOCK) { + if (IS_BUSY(INETP(desc))) { + /* A send is blocked */ + desc->inet.caller = desc->inet.busy_caller; + tcp_clear_output(desc); + if (desc->busy_on_send) { + driver_cancel_timer(desc->inet.port); + desc->busy_on_send = 0; + } + desc->inet.state &= ~INET_F_BUSY; + set_busy_port(desc->inet.port, 0); + inet_reply_error_am(INETP(desc), am_closed); + } + if (!desc->inet.active) { + /* We must cancel any timer here ! */ + driver_cancel_timer(desc->inet.port); + tcp_clear_input(desc); + if (desc->inet.exitf) { + desc_close(INETP(desc)); + } else { + desc_close_read(INETP(desc)); + } + async_error_am_all(INETP(desc), error_atom(err)); + } else { + tcp_clear_input(desc); + tcp_error_message(desc, err); /* first error */ + tcp_closed_message(desc); /* then closed */ + if (desc->inet.exitf) + driver_exit(desc->inet.port, err); + else + desc_close(INETP(desc)); + } + return -1; + } + return 0; +} + + + +/* +** Calculate number of bytes that remain to read before deliver +** Assume buf, ptr_start, ptr has been setup +** +** return > 0 if more to read +** = 0 if holding complete packet +** < 0 on error +** +** if return value == 0 then *len will hold the length of the first packet +** return value > 0 then if *len == 0 then value means upperbound +** *len > 0 then value means exact +** +*/ +static int tcp_remain(tcp_descriptor* desc, int* len) +{ + char* ptr = desc->i_ptr_start; + int nfill = (desc->i_ptr - desc->i_buf->orig_bytes); /* filled */ + int nsz = desc->i_bufsz - nfill; /* remain */ + int n = desc->i_ptr - ptr; /* number of bytes read */ + int tlen; + + DEBUGF(("tcp_remain(%ld): s=%d, n=%d, nfill=%d nsz=%d\r\n", + (long)desc->inet.port, desc->inet.s, n, nfill, nsz)); + + tlen = packet_get_length(desc->inet.htype, ptr, n, + desc->inet.psize, desc->i_bufsz, + &desc->http_state); + if (tlen > 0) { + if (tlen <= n) { /* got a packet */ + *len = tlen; + DEBUGF((" => nothing remain packet=%d\r\n", tlen)); + return 0; + } + else { /* need known more */ + if (tcp_expand_buffer(desc, tlen) < 0) + return -1; + *len = tlen - n; + DEBUGF((" => remain=%d\r\n", *len)); + return *len; + } + } + else if (tlen == 0) { /* need unknown more */ + *len = 0; + if (nsz == 0) { + if (nfill == n) + goto error; + DEBUGF((" => restart more=%d\r\n", nfill - n)); + return nfill - n; + } + else { + DEBUGF((" => more=%d \r\n", nsz)); + return nsz; + } + } + +error: + DEBUGF((" => packet error\r\n")); + return -1; +} + +/* +** Deliver all packets ready +** if len == 0 then check start with a check for ready packet +*/ +static int tcp_deliver(tcp_descriptor* desc, int len) +{ + int count = 0; + int n; + + /* Poll for ready packet */ + if (len == 0) { + /* empty buffer or waiting for more input */ + if ((desc->i_buf == NULL) || (desc->i_remain > 0)) + return count; + if ((n = tcp_remain(desc, &len)) != 0) { + if (n < 0) /* packet error */ + return n; + if (len > 0) /* more data pending */ + desc->i_remain = len; + return count; + } + } + + while (len > 0) { + int code = 0; + + inet_input_count(INETP(desc), len); + + /* deliver binary? */ + if (len*4 >= desc->i_buf->orig_size*3) { /* >=75% */ + /* something after? */ + if (desc->i_ptr_start + len == desc->i_ptr) { /* no */ + code = tcp_reply_binary_data(desc, desc->i_buf, + (desc->i_ptr_start - + desc->i_buf->orig_bytes), + len); + tcp_clear_input(desc); + } + else { /* move trail to beginning of a new buffer */ + ErlDrvBinary* bin; + char* ptr_end = desc->i_ptr_start + len; + int sz = desc->i_ptr - ptr_end; + + bin = alloc_buffer(desc->i_bufsz); + memcpy(bin->orig_bytes, ptr_end, sz); + + code = tcp_reply_binary_data(desc, desc->i_buf, + (desc->i_ptr_start- + desc->i_buf->orig_bytes), + len); + free_buffer(desc->i_buf); + desc->i_buf = bin; + desc->i_ptr_start = desc->i_buf->orig_bytes; + desc->i_ptr = desc->i_ptr_start + sz; + desc->i_remain = 0; + } + } + else { + code = tcp_reply_data(desc, desc->i_ptr_start, len); + /* XXX The buffer gets thrown away on error (code < 0) */ + /* Windows needs workaround for this in tcp_inet_event... */ + desc->i_ptr_start += len; + if (desc->i_ptr_start == desc->i_ptr) + tcp_clear_input(desc); + else + desc->i_remain = 0; + + } + + if (code < 0) + return code; + + count++; + len = 0; + + if (!desc->inet.active) { + driver_cancel_timer(desc->inet.port); + sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); + if (desc->i_buf != NULL) + tcp_restart_input(desc); + } + else if (desc->i_buf != NULL) { + if ((n = tcp_remain(desc, &len)) != 0) { + if (n < 0) /* packet error */ + return n; + tcp_restart_input(desc); + if (len > 0) + desc->i_remain = len; + len = 0; + } + } + } + return count; +} + + +static int tcp_recv(tcp_descriptor* desc, int request_len) +{ + int n; + int len; + int nread; + + if (desc->i_buf == NULL) { /* allocte a read buffer */ + int sz = (request_len > 0) ? request_len : desc->inet.bufsz; + + if ((desc->i_buf = alloc_buffer(sz)) == NULL) + return -1; + /* XXX: changing bufsz during recv SHOULD/MAY? affect + * ongoing operation but is not now + */ + desc->i_bufsz = sz; /* use i_bufsz not i_buf->orig_size ! */ + desc->i_ptr_start = desc->i_buf->orig_bytes; + desc->i_ptr = desc->i_ptr_start; + nread = sz; + if (request_len > 0) + desc->i_remain = request_len; + else + desc->i_remain = 0; + } + else if (request_len > 0) { /* we have a data in buffer and a request */ + n = desc->i_ptr - desc->i_ptr_start; + if (n >= request_len) + return tcp_deliver(desc, request_len); + else if (tcp_expand_buffer(desc, request_len) < 0) + return tcp_recv_error(desc, ENOMEM); + else + desc->i_remain = nread = request_len - n; + } + else if (desc->i_remain == 0) { /* poll remain from buffer data */ + if ((nread = tcp_remain(desc, &len)) < 0) + return tcp_recv_error(desc, EMSGSIZE); + else if (nread == 0) + return tcp_deliver(desc, len); + else if (len > 0) + desc->i_remain = len; /* set remain */ + } + else /* remain already set use it */ + nread = desc->i_remain; + + DEBUGF(("tcp_recv(%ld): s=%d about to read %d bytes...\r\n", + (long)desc->inet.port, desc->inet.s, nread)); + + n = sock_recv(desc->inet.s, desc->i_ptr, nread, 0); + + if (n == SOCKET_ERROR) { + int err = sock_errno(); + if (err == ECONNRESET) { + DEBUGF((" => detected close (connreset)\r\n")); + return tcp_recv_closed(desc); + } + if (err == ERRNO_BLOCK) { + DEBUGF((" => would block\r\n")); + return 0; + } + else { + DEBUGF((" => error: %d\r\n", err)); + return tcp_recv_error(desc, err); + } + } + else if (n == 0) { + DEBUGF((" => detected close\r\n")); + return tcp_recv_closed(desc); + } + + DEBUGF((" => got %d bytes\r\n", n)); + desc->i_ptr += n; + if (desc->i_remain > 0) { + desc->i_remain -= n; + if (desc->i_remain == 0) + return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start); + } + else { + if ((nread = tcp_remain(desc, &len)) < 0) + return tcp_recv_error(desc, EMSGSIZE); + else if (nread == 0) + return tcp_deliver(desc, len); + else if (len > 0) + desc->i_remain = len; /* set remain */ + } + return 0; +} + + +#ifdef __WIN32__ + + +static int winsock_event_select(inet_descriptor *desc, int flags, int on) +{ + int save_event_mask = desc->event_mask; + + desc->forced_events = 0; + if (on) + desc->event_mask |= flags; + else + desc->event_mask &= (~flags); + DEBUGF(("port %d: winsock_event_select: " + "flags=%02X, on=%d, event_mask=%02X\n", + desc->port, flags, on, desc->event_mask)); + /* The RIGHT WAY (TM) to do this is to make sure: + A) The cancelling of all network events is done with + NULL as the event parameter (bug in NT's winsock), + B) The actual event handle is reset so that it is only + raised if one of the requested network events is active, + C) Avoid race conditions by making sure that the event cannot be set + while we are preparing to set the correct network event mask. + The simplest way to do it is to turn off all events, reset the + event handle and then, if event_mask != 0, turn on the appropriate + events again. */ + if (WSAEventSelect(desc->s, NULL, 0) != 0) { + DEBUGF(("port %d: winsock_event_select: " + "WSAEventSelect returned error, code %d.\n", + sock_errno())); + desc->event_mask = save_event_mask; + return -1; + } + if (!ResetEvent(desc->event)) { + DEBUGF(("port %d: winsock_event_select: " + "ResetEvent returned error, code %d.\n", + GetLastError())); + desc->event_mask = 0; + return -1; + } + if (desc->event_mask != 0) { + if (WSAEventSelect(desc->s, + desc->event, + desc->event_mask) != 0) { + DEBUGF(("port %d: winsock_event_select: " + "WSAEventSelect returned error, code %d.\n", + sock_errno())); + desc->event_mask = 0; + return -1; + } + + /* Now, WSAEventSelect() is trigged only when the queue goes from + full to empty or from empty to full; therefore we need an extra test + to see whether it is writeable, readable or closed... */ + if ((desc->event_mask & FD_WRITE)) { + int do_force = 1; + if (desc->send_would_block) { + TIMEVAL tmo = {0,0}; + FD_SET fds; + int ret; + + FD_ZERO(&fds); + FD_SET(desc->s,&fds); + do_force = (select(desc->s+1,0,&fds,0,&tmo) > 0); + } + if (do_force) { + SetEvent(desc->event); + desc->forced_events |= FD_WRITE; + } + } + if ((desc->event_mask & (FD_READ|FD_CLOSE))) { + int readable = 0; + int closed = 0; + TIMEVAL tmo = {0,0}; + FD_SET fds; + int ret; + unsigned long arg; + + FD_ZERO(&fds); + FD_SET(desc->s,&fds); + ret = select(desc->s+1,&fds,0,0,&tmo); + if (ret > 0) { + ++readable; + if (ioctlsocket(desc->s,FIONREAD,&arg) != 0) { + ++closed; /* Which gives a FD_CLOSE event */ + } else { + closed = (arg == 0); + } + } + if ((desc->event_mask & FD_READ) && readable && !closed) { + SetEvent(desc->event); + desc->forced_events |= FD_READ; + } + if ((desc->event_mask & FD_CLOSE) && closed) { + SetEvent(desc->event); + desc->forced_events |= FD_CLOSE; + } + } + } + return 0; +} + +static void tcp_inet_event(ErlDrvData e, ErlDrvEvent event) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + WSANETWORKEVENTS netEv; + int err; + + DEBUGF(("tcp_inet_event(%ld) {s=%d\r\n", + (long)desc->inet.port, desc->inet.s)); + if (WSAEnumNetworkEvents(desc->inet.s, desc->inet.event, + &netEv) != 0) { + DEBUGF((" => EnumNetworkEvents = %d\r\n", sock_errno() )); + goto error; + } + + DEBUGF((" => event=%02X, mask=%02X\r\n", + netEv.lNetworkEvents, desc->inet.event_mask)); + + /* Add the forced events. */ + + netEv.lNetworkEvents |= desc->inet.forced_events; + + /* + * Calling WSAEventSelect() with a mask of 0 doesn't always turn off + * all events. To avoid acting on events we don't want, we mask + * the events with mask for the events we really want. + */ + +#ifdef DEBUG + if ((netEv.lNetworkEvents & ~(desc->inet.event_mask)) != 0) { + DEBUGF(("port %d: ... unexpected event: %d\r\n", + desc->inet.port, netEv.lNetworkEvents & ~(desc->inet.event_mask))); + } +#endif + netEv.lNetworkEvents &= desc->inet.event_mask; + + if (netEv.lNetworkEvents & FD_READ) { + if (tcp_inet_input(desc, event) < 0) { + goto error; + } + if (netEv.lNetworkEvents & FD_CLOSE) { + /* + * We must loop to read out the remaining packets (if any). + */ + for (;;) { + DEBUGF(("Retrying read due to closed port\r\n")); + /* XXX The buffer will be thrown away on error (empty que). + Possible SMP FIXME. */ + if (!desc->inet.active && (desc->inet.opt) == NULL) { + goto error; + } + if (tcp_inet_input(desc, event) < 0) { + goto error; + } + } + } + } + if (netEv.lNetworkEvents & FD_WRITE) { + desc->inet.send_would_block = 0; + if (tcp_inet_output(desc, event) < 0) + goto error; + } + if (netEv.lNetworkEvents & FD_CONNECT) { + if ((err = netEv.iErrorCode[FD_CONNECT_BIT]) != 0) { + async_error(INETP(desc), err); + } else { + tcp_inet_output(desc, event); + } + } else if (netEv.lNetworkEvents & FD_ACCEPT) { + if ((err = netEv.iErrorCode[FD_ACCEPT_BIT]) != 0) + async_error(INETP(desc), err); + else + tcp_inet_input(desc, event); + } + if (netEv.lNetworkEvents & FD_CLOSE) { + /* error in err = netEv.iErrorCode[FD_CLOSE_BIT] */ + DEBUGF(("Detected close in %s, line %d\r\n", __FILE__, __LINE__)); + tcp_recv_closed(desc); + } + DEBUGF(("tcp_inet_event(%ld) }\r\n", (long)desc->inet.port)); + return; + + error: + DEBUGF(("tcp_inet_event(%ld) error}\r\n", (long)desc->inet.port)); + return; +} + +#endif /* WIN32 */ + + +/* socket has input: +** 1. TCP_STATE_ACCEPTING => non block accept ? +** 2. TCP_STATE_CONNECTED => read input +*/ +static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) +{ + int ret = 0; +#ifdef DEBUG + long port = (long) desc->inet.port; /* Used after driver_exit() */ +#endif + DEBUGF(("tcp_inet_input(%ld) {s=%d\r\n", port, desc->inet.s)); + if (desc->inet.state == TCP_STATE_ACCEPTING) { + SOCKET s; + unsigned int len; + inet_address remote; + inet_async_op *this_op = desc->inet.opt; + + len = sizeof(desc->inet.remote); + s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len); + if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) { + /* Just try again, no real error, just a ghost trigger from poll, + keep the default return code and everything else as is */ + goto done; + } + + sock_select(INETP(desc),FD_ACCEPT,0); + desc->inet.state = TCP_STATE_LISTEN; /* restore state */ + + if (this_op != NULL) { + driver_demonitor_process(desc->inet.port, &(this_op->monitor)); + } + + + driver_cancel_timer(desc->inet.port); /* posssibly cancel a timer */ + + if (s == INVALID_SOCKET) { + ret = async_error(INETP(desc), sock_errno()); + goto done; + } + else { + ErlDrvTermData caller; + tcp_descriptor* accept_desc; + int err; + + if (desc->inet.opt == NULL) { + /* No caller setup */ + sock_close(s); + ret = async_error(INETP(desc), EINVAL); + goto done; + } + caller = desc->inet.opt->caller; + if ((accept_desc = tcp_inet_copy(desc,s,caller,&err)) == NULL) { + sock_close(s); + ret = async_error(INETP(desc), err); + goto done; + } + /* FIXME: may MUST lock port + * 1 - Port is accessible via the erlang:ports() + * 2 - Port is accessible via callers process_info(links) + */ + accept_desc->inet.remote = remote; + SET_NONBLOCKING(accept_desc->inet.s); +#ifdef __WIN32__ + driver_select(accept_desc->inet.port, accept_desc->inet.event, + ERL_DRV_READ, 1); +#endif + accept_desc->inet.state = TCP_STATE_CONNECTED; + ret = async_ok_port(INETP(desc), accept_desc->inet.dport); + goto done; + } + } else if (desc->inet.state == TCP_STATE_MULTI_ACCEPTING) { + SOCKET s; + unsigned int len; + inet_address remote; + int id,req; + ErlDrvTermData caller; + MultiTimerData *timeout; + ErlDrvMonitor monitor; +#ifdef HARDDEBUG + int times = 0; +#endif + + while (desc->inet.state == TCP_STATE_MULTI_ACCEPTING) { + len = sizeof(desc->inet.remote); + s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len); + + if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) { + /* Just try again, no real error, keep the last return code */ + goto done; + } +#ifdef HARDDEBUG + if (++times > 1) { + erts_fprintf(stderr,"Accepts in one suite: %d :-)\r\n",times); + } +#endif + if (deq_multi_op(desc,&id,&req,&caller,&timeout,&monitor) != 0) { + ret = -1; + goto done; + } + + if (desc->multi_first == NULL) { + sock_select(INETP(desc),FD_ACCEPT,0); + desc->inet.state = TCP_STATE_LISTEN; /* restore state */ + } + + if (timeout != NULL) { + remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + } + + driver_demonitor_process(desc->inet.port, &monitor); + + + if (s == INVALID_SOCKET) { /* Not ERRNO_BLOCK, that's handled right away */ + ret = send_async_error(desc->inet.port, desc->inet.dport, + id, caller, error_atom(sock_errno())); + goto done; + } + else { + tcp_descriptor* accept_desc; + int err; + + if ((accept_desc = tcp_inet_copy(desc,s,caller,&err)) == NULL) { + sock_close(s); + ret = send_async_error(desc->inet.port, desc->inet.dport, + id, caller, error_atom(err)); + goto done; + } + accept_desc->inet.remote = remote; + SET_NONBLOCKING(accept_desc->inet.s); +#ifdef __WIN32__ + driver_select(accept_desc->inet.port, accept_desc->inet.event, + ERL_DRV_READ, 1); +#endif + accept_desc->inet.state = TCP_STATE_CONNECTED; + ret = send_async_ok_port(desc->inet.port, desc->inet.dport, + id, caller, accept_desc->inet.dport); + } + } + } + else if (IS_CONNECTED(INETP(desc))) { + ret = tcp_recv(desc, 0); + goto done; + } + else { + /* maybe a close op from connection attempt?? */ + sock_select(INETP(desc),FD_ACCEPT,0); + DEBUGF(("tcp_inet_input(%ld): s=%d bad state: %04x\r\n", + port, desc->inet.s, desc->inet.state)); + } + done: + DEBUGF(("tcp_inet_input(%ld) }\r\n", port)); + return ret; +} + +static int tcp_send_error(tcp_descriptor* desc, int err) +{ + /* + * If the port is busy, we must do some clean-up before proceeding. + */ + if (IS_BUSY(INETP(desc))) { + desc->inet.caller = desc->inet.busy_caller; + if (desc->busy_on_send) { + driver_cancel_timer(desc->inet.port); + desc->busy_on_send = 0; + } + desc->inet.state &= ~INET_F_BUSY; + set_busy_port(desc->inet.port, 0); + } + + /* + * We used to handle "expected errors" differently from unexpected ones. + * Now we handle all errors in the same way. We just have to distinguish + * between passive and active sockets. + */ + DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n", + (long)desc->inet.port, __FILE__, __LINE__)); + if (desc->inet.active) { + tcp_closed_message(desc); + inet_reply_error_am(INETP(desc), am_closed); + if (desc->inet.exitf) + driver_exit(desc->inet.port, 0); + else + desc_close(INETP(desc)); + } else { + tcp_clear_output(desc); + tcp_clear_input(desc); + tcp_close_check(desc); + erl_inet_close(INETP(desc)); + + if (desc->inet.caller) { + inet_reply_error_am(INETP(desc), am_closed); + } + else { + /* No blocking send op to reply to right now. + * If next op is a send, make sure it returns {error,closed} + * rather than {error,enotconn}. + */ + desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND; + } + + /* + * Make sure that the next receive operation gets an {error,closed} + * result rather than {error,enotconn}. That means that the caller + * can safely ignore errors in the send operations and handle them + * in the receive operation. + */ + desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_RECV; + } + return -1; +} + +/* +** Send non-blocking vector data +*/ +static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) +{ + int sz; + char buf[4]; + int h_len; + int n; + ErlDrvPort ix = desc->inet.port; + int len = ev->size; + + switch(desc->inet.htype) { + case TCP_PB_1: + put_int8(len, buf); + h_len = 1; + break; + case TCP_PB_2: + put_int16(len, buf); + h_len = 2; + break; + case TCP_PB_4: + put_int32(len, buf); + h_len = 4; + break; + default: + if (len == 0) + return 0; + h_len = 0; + break; + } + + inet_output_count(INETP(desc), len+h_len); + + if (h_len > 0) { + ev->iov[0].iov_base = buf; + ev->iov[0].iov_len = h_len; + ev->size += h_len; + } + + if ((sz = driver_sizeq(ix)) > 0) { + driver_enqv(ix, ev, 0); + if (sz+ev->size >= desc->high) { + DEBUGF(("tcp_sendv(%ld): s=%d, sender forced busy\r\n", + (long)desc->inet.port, desc->inet.s)); + desc->inet.state |= INET_F_BUSY; /* mark for low-watermark */ + desc->inet.busy_caller = desc->inet.caller; + set_busy_port(desc->inet.port, 1); + if (desc->send_timeout != INET_INFINITY) { + desc->busy_on_send = 1; + driver_set_timer(desc->inet.port, desc->send_timeout); + } + return 1; + } + } + else { + int vsize = (ev->vsize > MAX_VSIZE) ? MAX_VSIZE : ev->vsize; + + DEBUGF(("tcp_sendv(%ld): s=%d, about to send %d,%d bytes\r\n", + (long)desc->inet.port, desc->inet.s, h_len, len)); + if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { + n = 0; + } else if (sock_sendv(desc->inet.s, ev->iov, vsize, &n, 0) + == SOCKET_ERROR) { + if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) { + int err = sock_errno(); + DEBUGF(("tcp_sendv(%ld): s=%d, " + "sock_sendv(size=2) errno = %d\r\n", + (long)desc->inet.port, desc->inet.s, err)); + return tcp_send_error(desc, err); + } +#ifdef __WIN32__ + desc->inet.send_would_block = 1; +#endif + n = 0; + } + else if (n == ev->size) { + ASSERT(NO_SUBSCRIBERS(&INETP(desc)->empty_out_q_subs)); + return 0; + } + else { + DEBUGF(("tcp_sendv(%ld): s=%d, only sent %d/%d of %d/%d bytes/items\r\n", + (long)desc->inet.port, desc->inet.s, n, vsize, ev->size, ev->vsize)); + } + + DEBUGF(("tcp_sendv(%ld): s=%d, Send failed, queuing\r\n", + (long)desc->inet.port, desc->inet.s)); + driver_enqv(ix, ev, n); + sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1); + } + return 0; +} + +/* +** Send non blocking data +*/ +static int tcp_send(tcp_descriptor* desc, char* ptr, int len) +{ + int sz; + char buf[4]; + int h_len; + int n; + ErlDrvPort ix = desc->inet.port; + SysIOVec iov[2]; + + switch(desc->inet.htype) { + case TCP_PB_1: + put_int8(len, buf); + h_len = 1; + break; + case TCP_PB_2: + put_int16(len, buf); + h_len = 2; + break; + case TCP_PB_4: + put_int32(len, buf); + h_len = 4; + break; + default: + if (len == 0) + return 0; + h_len = 0; + break; + } + + inet_output_count(INETP(desc), len+h_len); + + + if ((sz = driver_sizeq(ix)) > 0) { + if (h_len > 0) + driver_enq(ix, buf, h_len); + driver_enq(ix, ptr, len); + if (sz+h_len+len >= desc->high) { + DEBUGF(("tcp_send(%ld): s=%d, sender forced busy\r\n", + (long)desc->inet.port, desc->inet.s)); + desc->inet.state |= INET_F_BUSY; /* mark for low-watermark */ + desc->inet.busy_caller = desc->inet.caller; + set_busy_port(desc->inet.port, 1); + if (desc->send_timeout != INET_INFINITY) { + desc->busy_on_send = 1; + driver_set_timer(desc->inet.port, desc->send_timeout); + } + return 1; + } + } + else { + iov[0].iov_base = buf; + iov[0].iov_len = h_len; + iov[1].iov_base = ptr; + iov[1].iov_len = len; + + DEBUGF(("tcp_send(%ld): s=%d, about to send %d,%d bytes\r\n", + (long)desc->inet.port, desc->inet.s, h_len, len)); + if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { + sock_send(desc->inet.s, buf, 0, 0); + n = 0; + } else if (sock_sendv(desc->inet.s,iov,2,&n,0) == SOCKET_ERROR) { + if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) { + int err = sock_errno(); + DEBUGF(("tcp_send(%ld): s=%d,sock_sendv(size=2) errno = %d\r\n", + (long)desc->inet.port, desc->inet.s, err)); + return tcp_send_error(desc, err); + } +#ifdef __WIN32__ + desc->inet.send_would_block = 1; +#endif + n = 0; + } + else if (n == len+h_len) { + ASSERT(NO_SUBSCRIBERS(&INETP(desc)->empty_out_q_subs)); + return 0; + } + + DEBUGF(("tcp_send(%ld): s=%d, Send failed, queuing", + (long)desc->inet.port, desc->inet.s)); + + if (n < h_len) { + driver_enq(ix, buf+n, h_len-n); + driver_enq(ix, ptr, len); + } + else { + n -= h_len; + driver_enq(ix, ptr+n, len-n); + } + sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1); + } + return 0; +} + +static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event) +{ + (void)tcp_inet_output((tcp_descriptor*)data, (HANDLE)event); +} + +static void tcp_inet_drv_input(ErlDrvData data, ErlDrvEvent event) +{ + (void)tcp_inet_input((tcp_descriptor*)data, (HANDLE)event); +} + +/* socket ready for ouput: +** 1. TCP_STATE_CONNECTING => non block connect ? +** 2. TCP_STATE_CONNECTED => write output +*/ +static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) +{ + int ret = 0; + ErlDrvPort ix = desc->inet.port; + + DEBUGF(("tcp_inet_output(%ld) {s=%d\r\n", + (long)desc->inet.port, desc->inet.s)); + if (desc->inet.state == TCP_STATE_CONNECTING) { + sock_select(INETP(desc),FD_CONNECT,0); + + driver_cancel_timer(ix); /* posssibly cancel a timer */ +#ifndef __WIN32__ + /* + * XXX This is strange. This *should* work on Windows NT too, + * but doesn't. An bug in Winsock 2.0 for Windows NT? + * + * See "Unix Netwok Programming", W.R.Stevens, p 412 for a + * discussion about Unix portability and non blocking connect. + */ + +#ifndef SO_ERROR + { + int sz = sizeof(desc->inet.remote); + int code = sock_peer(desc->inet.s, + (struct sockaddr*) &desc->inet.remote, &sz); + + if (code == SOCKET_ERROR) { + desc->inet.state = TCP_STATE_BOUND; /* restore state */ + ret = async_error(INETP(desc), sock_errno()); + goto done; + } + } +#else + { + int error = 0; /* Has to be initiated, we check it */ + unsigned int sz = sizeof(error); /* even if we get -1 */ + int code = sock_getopt(desc->inet.s, SOL_SOCKET, SO_ERROR, + (void *)&error, &sz); + + if ((code < 0) || error) { + desc->inet.state = TCP_STATE_BOUND; /* restore state */ + ret = async_error(INETP(desc), error); + goto done; + } + } +#endif /* SOCKOPT_CONNECT_STAT */ +#endif /* !__WIN32__ */ + + desc->inet.state = TCP_STATE_CONNECTED; + if (desc->inet.active) + sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); + async_ok(INETP(desc)); + } + else if (IS_CONNECTED(INETP(desc))) { + for (;;) { + int vsize; + int n; + SysIOVec* iov; + + if ((iov = driver_peekq(ix, &vsize)) == NULL) { + sock_select(INETP(desc), FD_WRITE, 0); + send_empty_out_q_msgs(INETP(desc)); + goto done; + } + vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; + DEBUGF(("tcp_inet_output(%ld): s=%d, About to send %d items\r\n", + (long)desc->inet.port, desc->inet.s, vsize)); + if (sock_sendv(desc->inet.s, iov, vsize, &n, 0)==SOCKET_ERROR) { + if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) { + DEBUGF(("tcp_inet_output(%ld): sock_sendv(%d) errno = %d\r\n", + (long)desc->inet.port, vsize, sock_errno())); + ret = tcp_send_error(desc, sock_errno()); + goto done; + } +#ifdef __WIN32__ + desc->inet.send_would_block = 1; +#endif + goto done; + } + if (driver_deq(ix, n) <= desc->low) { + if (IS_BUSY(INETP(desc))) { + desc->inet.caller = desc->inet.busy_caller; + desc->inet.state &= ~INET_F_BUSY; + set_busy_port(desc->inet.port, 0); + /* if we have a timer then cancel and send ok to client */ + if (desc->busy_on_send) { + driver_cancel_timer(desc->inet.port); + desc->busy_on_send = 0; + } + inet_reply_ok(INETP(desc)); + } + } + } + } + else { + sock_select(INETP(desc),FD_CONNECT,0); + DEBUGF(("tcp_inet_output(%ld): bad state: %04x\r\n", + (long)desc->inet.port, desc->inet.state)); + } + done: + DEBUGF(("tcp_inet_output(%ld) }\r\n", (long)desc->inet.port)); + return ret; +} + +/*----------------------------------------------------------------------------- + + UDP & SCTP (the latter in a 1<->M Mode) + +-----------------------------------------------------------------------------*/ + +#if defined(HAVE_SO_BSDCOMPAT) +#if defined(__linux__) +#include <sys/utsname.h> +static int should_use_so_bsdcompat(void) +{ + /* SMP: FIXME this is probably not SMP safe but may be ok anyway? */ + static int init_done; + static int so_bsdcompat_is_obsolete; + + if (!init_done) { + struct utsname utsname; + unsigned int version, patchlevel; + + init_done = 1; + if (uname(&utsname) < 0) { + fprintf(stderr, "uname: %s\r\n", strerror(sock_errno())); + return 1; + } + /* Format is <version>.<patchlevel>.<sublevel><extraversion> + where the first three are unsigned integers and the last + is an arbitrary string. We only care about the first two. */ + if (sscanf(utsname.release, "%u.%u", &version, &patchlevel) != 2) { + fprintf(stderr, "uname: unexpected release '%s'\r\n", + utsname.release); + return 1; + } + /* SO_BSDCOMPAT is deprecated and triggers warnings in 2.5 + kernels. It is a no-op in 2.4 but not in 2.2 kernels. */ + if (version > 2 || (version == 2 && patchlevel >= 5)) + so_bsdcompat_is_obsolete = 1; + } + return !so_bsdcompat_is_obsolete; +} +#else /* __linux__ */ +#define should_use_so_bsdcompat() 1 +#endif /* __linux__ */ +#endif /* HAVE_SO_BSDCOMPAT */ + +static int packet_inet_init() +{ + return 0; +} + +static ErlDrvData packet_inet_start(ErlDrvPort port, char* args, int protocol) +{ + /* "inet_start" returns "ErlDrvData", but in fact it is "inet_descriptor*", + so we can preserve it as "ErlDrvData": + */ + ErlDrvData drvd = inet_start(port, sizeof(udp_descriptor), + protocol); + udp_descriptor* desc = (udp_descriptor*) drvd; + + if (desc == NULL) + return ERL_DRV_ERROR_ERRNO; + + desc->read_packets = INET_PACKET_POLL; + return drvd; +} + +static ErlDrvData udp_inet_start(ErlDrvPort port, char *args) +{ + return packet_inet_start(port, args, IPPROTO_UDP); +} + +#ifdef HAVE_SCTP +static ErlDrvData sctp_inet_start(ErlDrvPort port, char *args) +{ + return packet_inet_start(port, args, IPPROTO_SCTP); +} +#endif + +static void packet_inet_stop(ErlDrvData e) +{ + /* There should *never* be any "empty out q" subscribers on + an UDP or SCTP socket! + NB: as in "inet_start", we can always cast "ErlDRvData" + into "udp_descriptor*" or "inet_descriptor*": + */ + udp_descriptor * udesc = (udp_descriptor*) e; + inet_descriptor* descr = INETP(udesc); + + ASSERT(NO_SUBSCRIBERS(&(descr->empty_out_q_subs))); + inet_stop(descr); +} + +static int packet_error(udp_descriptor* udesc, int err) +{ + inet_descriptor * desc = INETP(udesc); + if (!desc->active) + async_error(desc, err); + driver_failure_posix(desc->port, err); + return -1; +} + +/* +** Various functions accessible via "port_control" on the Erlang side: +*/ +static int packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, int len, + char** rbuf, int rsize) +{ + int replen; + udp_descriptor * udesc = (udp_descriptor *) e; + inet_descriptor* desc = INETP(udesc); + int type = SOCK_DGRAM; + int af; +#ifdef HAVE_SCTP + if (IS_SCTP(desc)) type = SOCK_SEQPACKET; +#endif + + switch(cmd) { + case INET_REQ_OPEN: /* open socket and return internal index */ + DEBUGF(("packet_inet_ctl(%ld): OPEN\r\n", (long)desc->port)); + if (len != 1) { + return ctl_error(EINVAL, rbuf, rsize); + } + switch (buf[0]) { + case INET_AF_INET: af = AF_INET; break; +#if defined(HAVE_IN6) && defined(AF_INET6) + case INET_AF_INET6: af = AF_INET6; break; +#endif + default: + return ctl_error(EINVAL, rbuf, rsize); + } + replen = inet_ctl_open(desc, af, type, rbuf, rsize); + + if ((*rbuf)[0] != INET_REP_ERROR) { + if (desc->active) + sock_select(desc,FD_READ,1); +#ifdef HAVE_SO_BSDCOMPAT + /* + * Make sure that sending UDP packets to a non existing port on an + * existing machine doesn't close the socket. (Linux behaves this + * way) + */ + if (should_use_so_bsdcompat()) { + int one = 1; + /* Ignore errors */ + sock_setopt(desc->s, SOL_SOCKET, SO_BSDCOMPAT, &one, + sizeof(one)); + } +#endif + } + return replen; + + + case INET_REQ_FDOPEN: /* pass in an open (and bound) socket */ + DEBUGF(("packet inet_ctl(%ld): FDOPEN\r\n", (long)desc->port)); + if ((len == 5) && (buf[0] == INET_AF_INET)) + replen = inet_ctl_fdopen(desc, AF_INET, SOCK_DGRAM, + (SOCKET)get_int32(buf+1),rbuf,rsize); +#if defined(HAVE_IN6) && defined(AF_INET6) + else if ((len == 5) && (buf[0] == INET_AF_INET6)) + replen = inet_ctl_fdopen(desc, AF_INET6, SOCK_DGRAM, + (SOCKET)get_int32(buf+1),rbuf,rsize); +#endif + else + return ctl_error(EINVAL, rbuf, rsize); + + if ((*rbuf)[0] != INET_REP_ERROR) { + if (desc->active) + sock_select(desc,FD_READ,1); +#ifdef HAVE_SO_BSDCOMPAT + /* + * Make sure that sending UDP packets to a non existing port on an + * existing machine doesn't close the socket. (Linux behaves this + * way) + */ + if (should_use_so_bsdcompat()) { + int one = 1; + /* Ignore errors */ + sock_setopt(desc->s, SOL_SOCKET, SO_BSDCOMPAT, &one, + sizeof(one)); + } +#endif + } + return replen; + + + case INET_REQ_CLOSE: + DEBUGF(("packet_inet_ctl(%ld): CLOSE\r\n", (long)desc->port)); + erl_inet_close(desc); + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + return 0; + + + case INET_REQ_CONNECT: { + /* UDP and SCTP connect operations are completely different. UDP + connect means only setting the default peer addr locally, so + it is always synchronous. SCTP connect means actual establish- + ing of an SCTP association with a remote peer, so it is async- + ronous, and similar to TCP connect. However, unlike TCP, SCTP + allows the socket to have multiple simultaneous associations: + */ + int code; + char tbuf[2]; + unsigned timeout; + + DEBUGF(("packet_inet_ctl(%ld): CONNECT\r\n", (long)desc->port)); + + /* INPUT: [ Timeout(4), Port(2), Address(N) ] */ + + if (!IS_OPEN(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + + if (!IS_BOUND(desc)) + return ctl_xerror(EXBADSEQ, rbuf, rsize); +#ifdef HAVE_SCTP + if (IS_SCTP(desc)) { + inet_address remote; + + if (IS_CONNECTING(desc)) + return ctl_error(EINVAL, rbuf, rsize); + if (len < 6) + return ctl_error(EINVAL, rbuf, rsize); + timeout = get_int32(buf); + buf += 4; + len -= 4; + + /* For SCTP, we do not set the peer's addr in desc->remote, as + multiple peers are possible: */ + if (inet_set_address(desc->sfamily, &remote, buf, &len) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + + sock_select(desc, FD_CONNECT, 1); + code = sock_connect(desc->s, &remote.sa, len); + + if ((code == SOCKET_ERROR) && (sock_errno() == EINPROGRESS)) { + /* XXX: Unix only -- WinSock would have a different cond! */ + desc->state = SCTP_STATE_CONNECTING; + if (timeout != INET_INFINITY) + driver_set_timer(desc->port, timeout); + enq_async(desc, tbuf, INET_REQ_CONNECT); + } + else if (code == 0) { /* OK we are connected */ + sock_select(desc, FD_CONNECT, 0); + desc->state = PACKET_STATE_CONNECTED; + enq_async(desc, tbuf, INET_REQ_CONNECT); + async_ok(desc); + } + else { + sock_select(desc, FD_CONNECT, 0); + return ctl_error(sock_errno(), rbuf, rsize); + } + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } +#endif + /* UDP */ + if (len == 0) { + /* What does it mean??? NULL sockaddr??? */ + sock_connect(desc->s, (struct sockaddr*) NULL, 0); + desc->state &= ~INET_F_ACTIVE; + enq_async(desc, tbuf, INET_REQ_CONNECT); + async_ok (desc); + } + else if (len < 6) + return ctl_error(EINVAL, rbuf, rsize); + else { + timeout = get_int32(buf); /* IGNORED */ + buf += 4; + len -= 4; + if (inet_set_address(desc->sfamily, + &desc->remote, buf, &len) == NULL) + return ctl_error(EINVAL, rbuf, rsize); + + code = sock_connect(desc->s, + (struct sockaddr*) &desc->remote, len); + if (code == SOCKET_ERROR) { + sock_connect(desc->s, (struct sockaddr*) NULL, 0); + desc->state &= ~INET_F_ACTIVE; + return ctl_error(sock_errno(), rbuf, rsize); + } + else /* ok we are connected */ { + enq_async(desc, tbuf, INET_REQ_CONNECT); + desc->state |= INET_F_ACTIVE; + async_ok (desc); + } + } + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } + +#ifdef HAVE_SCTP + case SCTP_REQ_LISTEN: + { /* LISTEN is only for SCTP sockets, not UDP. This code is borrowed + from the TCP section. Returns: {ok,[]} on success. + */ + int flag; + + DEBUGF(("packet_inet_ctl(%ld): LISTEN\r\n", (long)desc->port)); + if (!IS_SCTP(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + if (!IS_OPEN(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + if (!IS_BOUND(desc)) + return ctl_xerror(EXBADSEQ, rbuf, rsize); + + /* The arg is a binary value: 1:enable, 0:disable */ + if (len != 1) + return ctl_error(EINVAL, rbuf, rsize); + flag = get_int8(buf); + + if (sock_listen(desc->s, flag) == SOCKET_ERROR) + return ctl_error(sock_errno(), rbuf, rsize); + + desc->state = SCTP_STATE_LISTEN; /* XXX: not used? */ + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + + case SCTP_REQ_BINDX: + { /* Multi-homing bind for SCTP: */ + /* Construct the list of addresses we bind to. The curr limit is + 256 addrs. Buff structure: Flags(1), ListItem,...: + */ + struct sockaddr addrs[256]; + char* curr; + int add_flag, n, rflag; + + if (!IS_SCTP(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + + curr = buf; + add_flag = get_int8(curr); + curr++; + + for(n=0; n < 256 && curr < buf+len; n++) + { + /* List item format: Port(2), IP(4|16) -- compatible with + "inet_set_address": */ + inet_address tmp; + int alen = buf + len - curr; + curr = inet_set_address(desc->sfamily, &tmp, curr, &alen); + if (curr == NULL) + return ctl_error(EINVAL, rbuf, rsize); + + /* Now: we need to squeeze "tmp" into the size of "sockaddr", + which is smaller than "tmp" for IPv6 (extra IN6 info will + be cut off): */ + memcpy(addrs + n, &tmp, sizeof(struct sockaddr)); + } + /* Make the real flags: */ + rflag = add_flag ? SCTP_BINDX_ADD_ADDR : SCTP_BINDX_REM_ADDR; + + /* Invoke the call: */ + if (p_sctp_bindx(desc->s, addrs, n, rflag) < 0) + return ctl_error(sock_errno(), rbuf, rsize); + + desc->state = INET_STATE_BOUND; + + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } +#endif /* HAVE_SCTP */ + + case PACKET_REQ_RECV: + { /* THIS IS A FRONT-END for "recv*" requests. It only enqueues the + request and possibly returns the data immediately available. + The actual data returning function is the back-end ("*input"): + */ + unsigned timeout; + char tbuf[2]; + + DEBUGF(("packet_inet_ctl(%ld): RECV\r\n", (long)desc->port)); + /* INPUT: Timeout(4), Length(4) */ + if (!IS_OPEN(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + if (!IS_BOUND(desc)) + return ctl_error(EINVAL, rbuf, rsize); + if (desc->active || (len != 8)) + return ctl_error(EINVAL, rbuf, rsize); + timeout = get_int32(buf); + /* The 2nd arg, Length(4), is ignored for both UDP ans SCTP protocols, + since they are msg-oriented. */ + + if (enq_async(desc, tbuf, PACKET_REQ_RECV) < 0) + return ctl_error(EALREADY, rbuf, rsize); + + if (packet_inet_input(udesc, desc->event) == 0) { + if (timeout == 0) + async_error_am(desc, am_timeout); + else { + if (timeout != INET_INFINITY) + driver_set_timer(desc->port, timeout); + } + } + return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); + } + + default: + /* Delegate the request to the INET layer. In particular, + INET_REQ_BIND goes here. If the req is not recognised + there either, an error is returned: + */ + return inet_ctl(desc, cmd, buf, len, rbuf, rsize); + } +} + +static void packet_inet_timeout(ErlDrvData e) +{ + udp_descriptor * udesc = (udp_descriptor*) e; + inet_descriptor * desc = INETP(udesc); + if (!(desc->active)) + sock_select(desc, FD_READ, 0); + async_error_am (desc, am_timeout); +} + + +/* THIS IS A "send*" REQUEST; on the Erlang side: "port_command". +** input should be: P1 P0 Address buffer . +** For UDP, buffer (after Address) is just data to be sent. +** For SCTP, buffer contains a list representing 2 items: +** (1) 6 parms for sctp_sndrcvinfo, as in sctp_get_sendparams(); +** (2) 0+ real data bytes. +** There is no destination address -- SCTYP send is performed over +** an existing association, using "sctp_sndrcvinfo" specified. +*/ +static void packet_inet_command(ErlDrvData e, char* buf, int len) +{ + udp_descriptor * udesc= (udp_descriptor*) e; + inet_descriptor* desc = INETP(udesc); + char* ptr = buf; + char* qtr; + int sz; + int code; + inet_address other; + + desc->caller = driver_caller(desc->port); + + if (!IS_OPEN(desc)) { + inet_reply_error(desc, EINVAL); + return; + } + if (!IS_BOUND(desc)) { + inet_reply_error(desc, EINVAL); + return; + } + +#ifdef HAVE_SCTP + if (IS_SCTP(desc)) + { + int data_len; + struct iovec iov[1]; /* For real data */ + struct msghdr mhdr; /* Message wrapper */ + struct sctp_sndrcvinfo *sri; /* The actual ancilary data */ + union { /* For ancilary data */ + struct cmsghdr hdr; + char ancd[CMSG_SPACE(sizeof(*sri))]; + } cmsg; + + if (len < SCTP_GET_SENDPARAMS_LEN) { + inet_reply_error(desc, EINVAL); + return; + } + + /* The ancilary data */ + sri = (struct sctp_sndrcvinfo *) (CMSG_DATA(&cmsg.hdr)); + /* Get the "sndrcvinfo" from the buffer, advancing the "ptr": */ + ptr = sctp_get_sendparams(sri, ptr); + + /* The ancilary data wrapper */ + cmsg.hdr.cmsg_level = IPPROTO_SCTP; + cmsg.hdr.cmsg_type = SCTP_SNDRCV; + cmsg.hdr.cmsg_len = CMSG_LEN(sizeof(*sri)); + + data_len = (buf + len) - ptr; + /* The whole msg. + * Solaris (XPG 4.2) requires iovlen >= 1 even for data_len == 0. + */ + mhdr.msg_name = NULL; /* Already connected */ + mhdr.msg_namelen = 0; + iov[0].iov_len = data_len; + iov[0].iov_base = ptr; /* The real data */ + mhdr.msg_iov = iov; + mhdr.msg_iovlen = 1; + mhdr.msg_control = cmsg.ancd; /* For ancilary data */ + mhdr.msg_controllen = cmsg.hdr.cmsg_len; + mhdr.msg_flags = 0; /* Not used with "sendmsg" */ + + /* Now do the actual sending. NB: "flags" in "sendmsg" itself are NOT + used: */ + code = sock_sendmsg(desc->s, &mhdr, 0); + goto check_result_code; + } +#endif + /* UDP socket. Even if it is connected, there is an address prefix + here -- ignored for connected sockets: */ + sz = len; + qtr = inet_set_address(desc->sfamily, &other, ptr, &sz); + if (qtr == NULL) { + inet_reply_error(desc, EINVAL); + return; + } + len -= (qtr - ptr); + ptr = qtr; + /* Now "ptr" is the user data ptr, "len" is data length: */ + inet_output_count(desc, len); + + if (desc->state & INET_F_ACTIVE) { /* connected (ignore address) */ + code = sock_send(desc->s, ptr, len, 0); + } + else { + code = sock_sendto(desc->s, ptr, len, 0, &other.sa, sz); + } + +#ifdef HAVE_SCTP + check_result_code: + /* "code" analysis is the same for both SCTP and UDP cases above: */ +#endif + if (code == SOCKET_ERROR) { + int err = sock_errno(); + inet_reply_error(desc, err); + } + else + inet_reply_ok(desc); +} + + +#ifdef __WIN32__ +static void packet_inet_event(ErlDrvData e, ErlDrvEvent event) +{ + udp_descriptor * udesc = (udp_descriptor*)e; + inet_descriptor* desc = INETP(udesc); + WSANETWORKEVENTS netEv; + + if ((WSAEnumNetworkEvents)(desc->s, desc->event, &netEv) != 0) { + DEBUGF(( "port %d: EnumNetwrokEvents = %d\r\n", + desc->port, sock_errno() )); + return; /* -1; */ + } + netEv.lNetworkEvents |= desc->forced_events; + if (netEv.lNetworkEvents & FD_READ) { + packet_inet_input(udesc, (HANDLE)event); + } +} + +#endif + +static void packet_inet_drv_input(ErlDrvData e, ErlDrvEvent event) +{ + (void) packet_inet_input((udp_descriptor*)e, (HANDLE)event); +} + +/* +** THIS IS A BACK-END FOR "recv*" REQUEST, which actually receives the +** data requested, and delivers them to the caller: +*/ +static int packet_inet_input(udp_descriptor* udesc, HANDLE event) +{ + inet_descriptor* desc = INETP(udesc); + int n; + unsigned int len; + inet_address other; + char abuf[sizeof(inet_address)]; /* buffer address; enough??? */ + int sz; + char* ptr; + ErlDrvBinary* buf; /* binary */ + int packet_count = udesc->read_packets; + int count = 0; /* number of packets delivered to owner */ +#ifdef HAVE_SCTP + struct msghdr mhdr; /* Top-level msg structure */ + struct iovec iov[1]; /* Data or Notification Event */ + char ancd[SCTP_ANC_BUFF_SIZE]; /* Ancillary Data */ + int short_recv = 0; +#endif + + while(packet_count--) { + len = sizeof(other); + sz = desc->bufsz; + /* Allocate space for message and address. NB: "bufsz" is in "desc", + but the "buf" itself is allocated separately: + */ + if ((buf = alloc_buffer(sz+len)) == NULL) + return packet_error(udesc, ENOMEM); + ptr = buf->orig_bytes + len; /* pointer to message part */ + + /* Note: On Windows NT, recvfrom() fails if the socket is connected. */ +#ifdef HAVE_SCTP + /* For SCTP we must use recvmsg() */ + if (IS_SCTP(desc)) { + iov->iov_base = ptr; /* Data will come here */ + iov->iov_len = sz; /* Remaining buffer space */ + + mhdr.msg_name = &other; /* Peer addr comes into "other" */ + mhdr.msg_namelen = len; + mhdr.msg_iov = iov; + mhdr.msg_iovlen = 1; + mhdr.msg_control = ancd; + mhdr.msg_controllen = SCTP_ANC_BUFF_SIZE; + mhdr.msg_flags = 0; /* To be filled by "recvmsg" */ + + /* Do the actual SCTP receive: */ + n = sock_recvmsg(desc->s, &mhdr, 0); + goto check_result; + } +#endif + /* Use recv() instead on connected sockets. */ + if ((desc->state & INET_F_ACTIVE)) { + n = sock_recv(desc->s, ptr, sz, 0); + other = desc->remote; + } + else + n = sock_recvfrom(desc->s, ptr, sz, 0, &other.sa, &len); + +#ifdef HAVE_SCTP + check_result: +#endif + /* Analyse the result: */ + if (n == SOCKET_ERROR +#ifdef HAVE_SCTP + || (short_recv = (IS_SCTP(desc) && !(mhdr.msg_flags & MSG_EOR))) + /* NB: here we check for EOR not being set -- this is an error as + well, we don't support partial msgs: + */ +#endif + ) { + int err = sock_errno(); + release_buffer(buf); + if (err != ERRNO_BLOCK) { + if (!desc->active) { +#ifdef HAVE_SCTP + if (short_recv) + async_error_am(desc, am_short_recv); + else +#else + async_error(desc, err); +#endif + driver_cancel_timer(desc->port); + sock_select(desc,FD_READ,0); + } + else { + /* This is for an active desc only: */ + packet_error_message(udesc, err); + } + } + else if (!desc->active) + sock_select(desc,FD_READ,1); + return count; /* strange, not ready */ + } + else { + int offs; + int nsz; + int code; + unsigned int alen = len; + void * extra = NULL; + + inet_input_count(desc, n); + inet_get_address(desc->sfamily, abuf, &other, &alen); + /* Copy formatted address to the buffer allocated; "alen" is the + actual length which must be <= than the original reserved "len". + This means that the addr + data in the buffer are contiguous, + but they may start not at the "orig_bytes", but with some "offs" + from them: + */ + ASSERT (alen <= len); + sys_memcpy(ptr - alen, abuf, alen); + ptr -= alen; + nsz = n + alen; /* nsz = data + address */ + offs = ptr - buf->orig_bytes; /* initial pointer offset */ + + /* Check if we need to reallocate binary */ + if ((desc->mode == INET_MODE_BINARY) && + (desc->hsz < n) && (nsz < BIN_REALLOC_LIMIT(sz))) { + ErlDrvBinary* tmp; + if ((tmp = realloc_buffer(buf,nsz+offs)) != NULL) + buf = tmp; + } +#ifdef HAVE_SCTP + if (IS_SCTP(desc)) extra = &mhdr; +#endif + /* Actual parsing and return of the data received, occur here: */ + code = packet_reply_binary_data(desc, (unsigned int)alen, + buf, offs, nsz, extra); + free_buffer(buf); + if (code < 0) + return count; + count++; + if (!desc->active) { + driver_cancel_timer(desc->port); /* possibly cancel */ + sock_select(desc,FD_READ,0); + return count; /* passive mode (read one packet only) */ + } + } + } + return count; +} + +static void packet_inet_drv_output(ErlDrvData e, ErlDrvEvent event) +{ + (void) packet_inet_output((udp_descriptor*)e, (HANDLE)event); +} + +/* UDP/SCTP socket ready for output: +** This is a Back-End for Non-Block SCTP Connect (SCTP_STATE_CONNECTING) +*/ +static int packet_inet_output(udp_descriptor* udesc, HANDLE event) +{ + inet_descriptor* desc = INETP(udesc); + int ret = 0; + ErlDrvPort ix = desc->port; + + DEBUGF(("packet_inet_output(%ld) {s=%d\r\n", + (long)desc->port, desc->s)); + + if (desc->state == SCTP_STATE_CONNECTING) { + sock_select(desc, FD_CONNECT, 0); + + driver_cancel_timer(ix); /* posssibly cancel a timer */ +#ifndef __WIN32__ + /* + * XXX This is strange. This *should* work on Windows NT too, + * but doesn't. An bug in Winsock 2.0 for Windows NT? + * + * See "Unix Netwok Programming", W.R.Stevens, p 412 for a + * discussion about Unix portability and non blocking connect. + */ + +#ifndef SO_ERROR + { + int sz = sizeof(desc->remote); + int code = sock_peer(desc->s, + (struct sockaddr*) &desc->remote, &sz); + + if (code == SOCKET_ERROR) { + desc->state = PACKET_STATE_BOUND; /* restore state */ + ret = async_error(desc, sock_errno()); + goto done; + } + } +#else + { + int error = 0; /* Has to be initiated, we check it */ + unsigned int sz = sizeof(error); /* even if we get -1 */ + int code = sock_getopt(desc->s, SOL_SOCKET, SO_ERROR, + (void *)&error, &sz); + + if ((code < 0) || error) { + desc->state = PACKET_STATE_BOUND; /* restore state */ + ret = async_error(desc, error); + goto done; + } + } +#endif /* SOCKOPT_CONNECT_STAT */ +#endif /* !__WIN32__ */ + + desc->state = PACKET_STATE_CONNECTED; + async_ok(desc); + } + else { + sock_select(desc,FD_CONNECT,0); + + DEBUGF(("packet_inet_output(%ld): bad state: %04x\r\n", + (long)desc->port, desc->state)); + } + done: + DEBUGF(("packet_inet_output(%ld) }\r\n", (long)desc->port)); + return ret; +} + +/*---------------------------------------------------------------------------*/ + +#ifdef __WIN32__ + +/* + * Although we no longer need to lookup all of winsock2 dynamically, + * there are still some function(s) we need to look up. + */ +static void find_dynamic_functions(void) +{ + char kernel_dll_name[] = "kernel32"; + HMODULE module; + module = GetModuleHandle(kernel_dll_name); + fpSetHandleInformation = (module != NULL) ? + (BOOL (WINAPI *)(HANDLE,DWORD,DWORD)) + GetProcAddress(module,"SetHandleInformation") : + NULL; +} + + + +/* + * We must make sure that the socket handles are not inherited + * by port programs (if there are inherited, the sockets will not + * get closed when the emulator terminates, and epmd and other Erlang + * nodes will not notice that we have exited). + * + * XXX It is not clear whether this works/is necessary in Windows 95. + * There could also be problems with Winsock implementations from other + * suppliers than Microsoft. + */ + +static SOCKET +make_noninheritable_handle(SOCKET s) +{ + if (s != INVALID_SOCKET) { + if (fpSetHandleInformation != NULL) { + (*fpSetHandleInformation)((HANDLE) s, HANDLE_FLAG_INHERIT, 0); + } else { + HANDLE non_inherited; + HANDLE this_process = GetCurrentProcess(); + if (DuplicateHandle(this_process, (HANDLE) s, + this_process, &non_inherited, 0, + FALSE, DUPLICATE_SAME_ACCESS)) { + sock_close(s); + s = (SOCKET) non_inherited; + } + } + } + return s; +} + +#endif /* UDP for __WIN32__ */ + +/* + * Multi-timers + */ + +static void absolute_timeout(unsigned millis, ErlDrvNowData *out) +{ + unsigned rest; + unsigned long millipart; + unsigned long secpart; + unsigned long megasecpart; + unsigned tmo_secs = (millis / 1000U); + unsigned tmo_millis = (millis % 1000); + driver_get_now(out); + rest = (out->microsecs) % 1000; + millipart = ((out->microsecs) / 1000UL); + if (rest >= 500) { + ++millipart; + } + secpart = out->secs; + megasecpart = out->megasecs; + millipart += tmo_millis; + secpart += (millipart / 1000000UL); + millipart %= 1000000UL; + secpart += tmo_secs; + megasecpart += (secpart / 1000000UL); + secpart %= 1000000UL; + out->megasecs = megasecpart; + out->secs = secpart; + out->microsecs = (millipart * 1000UL); +} + +static unsigned relative_timeout(ErlDrvNowData *in) +{ + ErlDrvNowData now; + unsigned rest; + unsigned long millipart, in_millis, in_secs, in_megasecs; + + driver_get_now(&now); + + in_secs = in->secs; + in_megasecs = in->megasecs; + + rest = (now.microsecs) % 1000; + millipart = ((now.microsecs) / 1000UL); + if (rest >= 500) { + ++millipart; + } + in_millis = ((in->microsecs) / 1000UL); + if ( in_millis < millipart ) { + if (in_secs > 0) { + --in_secs; + } else { + in_secs = (1000000UL - 1UL); + if (in_megasecs <= now.megasecs) { + return 0; + } else { + --in_megasecs; + } + } + in_millis += 1000UL; + } + in_millis -= millipart; + + if (in_secs < now.secs) { + if (in_megasecs <= now.megasecs) { + return 0; + } else { + --in_megasecs; + } + in_secs += 1000000; + } + in_secs -= now.secs; + if (in_megasecs < now.megasecs) { + return 0; + } else { + in_megasecs -= now.megasecs; + } + return (unsigned) ((in_megasecs * 1000000000UL) + + (in_secs * 1000UL) + + in_millis); +} + +#ifdef DEBUG +static int nowcmp(ErlDrvNowData *d1, ErlDrvNowData *d2) +{ + /* Assume it's not safe to do signed conversion on megasecs... */ + if (d1->megasecs < d2->megasecs) { + return -1; + } else if (d1->megasecs > d2->megasecs) { + return 1; + } else if (d1->secs != d2->secs) { + return ((int) d1->secs) - ((int) d2->secs); + } + return ((int) d1->microsecs) - ((int) d2->microsecs); +} +#endif + +static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, + ErlDrvData data) +{ + unsigned next_timeout; + if (!*first) { + ASSERT(0); + return; + } +#ifdef DEBUG + { + ErlDrvNowData chk; + driver_get_now(&chk); + chk.microsecs /= 10000UL; + chk.microsecs *= 10000UL; + chk.microsecs += 10000; + ASSERT(nowcmp(&chk,&((*first)->when)) >= 0); + } +#endif + do { + MultiTimerData *save = *first; + *first = save->next; + (*(save->timeout_function))(data,save->caller); + FREE(save); + if (*first == NULL) { + return; + } + (*first)->prev = NULL; + next_timeout = relative_timeout(&((*first)->when)); + } while (next_timeout == 0); + driver_set_timer(port,next_timeout); +} + +static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port) +{ + MultiTimerData *p; + if (*first) { + driver_cancel_timer(port); + } + while (*first) { + p = *first; + *first = p->next; + FREE(p); + } +} +static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p) +{ + if (p->prev != NULL) { + p->prev->next = p->next; + } else { + driver_cancel_timer(port); + *first = p->next; + if (*first) { + unsigned ntmo = relative_timeout(&((*first)->when)); + driver_set_timer(port,ntmo); + } + } + if (p->next != NULL) { + p->next->prev = p->prev; + } + FREE(p); +} + +static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, + ErlDrvTermData caller, unsigned timeout, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)) +{ + MultiTimerData *mtd, *p, *s; + mtd = ALLOC(sizeof(MultiTimerData)); + absolute_timeout(timeout, &(mtd->when)); + mtd->timeout_function = timeout_fun; + mtd->caller = caller; + mtd->next = mtd->prev = NULL; + for(p = *first,s = NULL; p != NULL; s = p, p = p->next) { + if (p->when.megasecs >= mtd->when.megasecs) { + break; + } + } + if (!p || p->when.megasecs > mtd->when.megasecs) { + goto found; + } + for (; p!= NULL; s = p, p = p->next) { + if (p->when.secs >= mtd->when.secs) { + break; + } + } + if (!p || p->when.secs > mtd->when.secs) { + goto found; + } + for (; p!= NULL; s = p, p = p->next) { + if (p->when.microsecs >= mtd->when.microsecs) { + break; + } + } + found: + if (!p) { + if (!s) { + *first = mtd; + } else { + s->next = mtd; + mtd->prev = s; + } + } else { + if (!s) { + *first = mtd; + } else { + s->next = mtd; + mtd->prev = s; + } + mtd->next = p; + p->prev = mtd; + } + if (!s) { + if (mtd->next) { + driver_cancel_timer(port); + } + driver_set_timer(port,timeout); + } + return mtd; +} + + + + + +/*----------------------------------------------------------------------------- + + Subscription + +-----------------------------------------------------------------------------*/ + +static int +save_subscriber(subs, subs_pid) +subs_list *subs; ErlDrvTermData subs_pid; +{ + subs_list *tmp; + + if(NO_SUBSCRIBERS(subs)) { + subs->subscriber = subs_pid; + subs->next = NULL; + } + else { + tmp = subs->next; + subs->next = ALLOC(sizeof(subs_list)); + if(subs->next == NULL) { + subs->next = tmp; + return 0; + } + subs->next->subscriber = subs_pid; + subs->next->next = tmp; + } + return 1; +} + +static void +free_subscribers(subs) +subs_list *subs; +{ + subs_list *this; + subs_list *next; + + this = subs->next; + while(this) { + next = this->next; + FREE((void *) this); + this = next; + } + + subs->subscriber = NO_PROCESS; + subs->next = NULL; +} + +static void send_to_subscribers +( + ErlDrvPort port, + subs_list *subs, + int free_subs, + ErlDrvTermData msg[], + int msg_len +) +{ + subs_list *this; + subs_list *next; + int first = 1; + + if(NO_SUBSCRIBERS(subs)) + return; + + this = subs; + while(this) { + + (void) driver_send_term(port, this->subscriber, msg, msg_len); + + if(free_subs && !first) { + next = this->next; + FREE((void *) this); + this = next; + } + else + this = this->next; + first = 0; + } + + if(free_subs) { + subs->subscriber = NO_PROCESS; + subs->next = NULL; + } + +} + +/* + * A *very* limited socket interface. Used by the memory tracer + * (erl_mtrace.c). + */ +#include "erl_sock.h" + +erts_sock_t erts_sock_open(void) +{ + SOCKET s; + + if(!sock_init()) + return ERTS_SOCK_INVALID_SOCKET; + + s = sock_open(AF_INET, SOCK_STREAM, 0); + + if (s == INVALID_SOCKET) + return ERTS_SOCK_INVALID_SOCKET; + + return (erts_sock_t) s; +} + +void erts_sock_close(erts_sock_t socket) +{ + if (socket != ERTS_SOCK_INVALID_SOCKET) + sock_close((SOCKET) socket); +} + + +int erts_sock_connect(erts_sock_t socket, byte *ip_addr, int len, Uint16 port) +{ + SOCKET s = (SOCKET) socket; + char buf[2 + 4]; + int blen = 6; + inet_address addr; + + if (socket == ERTS_SOCK_INVALID_SOCKET || len != 4) + return 0; + + put_int16(port, buf); + memcpy((void *) (buf + 2), (void *) ip_addr, 4); + + if (!inet_set_address(AF_INET, &addr, buf, &blen)) + return 0; + + if (SOCKET_ERROR == sock_connect(s, + (struct sockaddr *) &addr, + sizeof(struct sockaddr_in))) + return 0; + return 1; +} + +Sint erts_sock_send(erts_sock_t socket, const void *buf, Sint len) +{ + return (Sint) sock_send((SOCKET) socket, buf, (size_t) len, 0); +} + + +int erts_sock_gethostname(char *buf, int bufsz) +{ + if (sock_hostname(buf, bufsz) == SOCKET_ERROR) + return -1; + return 0; +} + + +int erts_sock_errno() +{ + return sock_errno(); +} diff --git a/erts/emulator/drivers/common/ram_file_drv.c b/erts/emulator/drivers/common/ram_file_drv.c new file mode 100644 index 0000000000..2e3aeb981e --- /dev/null +++ b/erts/emulator/drivers/common/ram_file_drv.c @@ -0,0 +1,692 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1997-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * RAM File operations + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* Operations */ + +/* defined "file" functions */ +#define RAM_FILE_OPEN 1 +#define RAM_FILE_READ 2 +#define RAM_FILE_LSEEK 3 +#define RAM_FILE_WRITE 4 +#define RAM_FILE_FSYNC 9 +#define RAM_FILE_TRUNCATE 14 +#define RAM_FILE_PREAD 17 +#define RAM_FILE_PWRITE 18 + +/* other operations */ +#define RAM_FILE_GET 30 +#define RAM_FILE_SET 31 +#define RAM_FILE_GET_CLOSE 32 /* get_file/close */ +#define RAM_FILE_COMPRESS 33 /* compress file */ +#define RAM_FILE_UNCOMPRESS 34 /* uncompress file */ +#define RAM_FILE_UUENCODE 35 /* uuencode file */ +#define RAM_FILE_UUDECODE 36 /* uudecode file */ +#define RAM_FILE_SIZE 37 /* get file size */ +/* possible new operations include: + DES_ENCRYPT + DES_DECRYPT + CRC-32, CRC-16, CRC-CCITT + IP-CHECKSUM +*/ + +/* + * Open modes for RAM_FILE_OPEN. + */ +#define RAM_FILE_MODE_READ 1 +#define RAM_FILE_MODE_WRITE 2 /* Implies truncating file + * when used alone. */ +#define RAM_FILE_MODE_READ_WRITE 3 + +/* + * Seek modes for RAM_FILE_LSEEK. + */ +#define RAM_FILE_SEEK_SET 0 +#define RAM_FILE_SEEK_CUR 1 +#define RAM_FILE_SEEK_END 2 + +/* Return codes */ + +#define RAM_FILE_RESP_OK 0 +#define RAM_FILE_RESP_ERROR 1 +#define RAM_FILE_RESP_DATA 2 +#define RAM_FILE_RESP_NUMBER 3 +#define RAM_FILE_RESP_INFO 4 + +#include <stdio.h> +#include <ctype.h> +#include <limits.h> + +#include "sys.h" +#include "erl_driver.h" +#include "zlib.h" +#include "gzio.h" + +#ifndef NULL +#define NULL ((void*)0) +#endif + +#define BFILE_BLOCK 1024 + +typedef unsigned char uchar; + +static ErlDrvData rfile_start(ErlDrvPort, char*); +static int rfile_init(void); +static void rfile_stop(ErlDrvData); +static void rfile_command(ErlDrvData, char*, int); + + +struct erl_drv_entry ram_file_driver_entry = { + rfile_init, + rfile_start, + rfile_stop, + rfile_command, + NULL, + NULL, + "ram_file_drv" +}; + +/* A File is represented as a array of bytes, this array is + reallocated when needed. A possibly better implementation + whould be to have a vector of blocks. This may be implemented + when we have the commandv/driver_outputv +*/ +typedef struct ram_file { + ErlDrvPort port; /* the associcated port */ + int flags; /* flags read/write */ + ErlDrvBinary* bin; /* binary to hold binary file */ + char* buf; /* buffer start (in binary) */ + int size; /* buffer size (allocated) */ + int cur; /* current position in buffer */ + int end; /* end position in buffer */ +} RamFile; + +#ifdef LOADABLE +static int rfile_finish(DriverEntry* drv) +{ + return 0; +} + +DriverEntry* driver_init(void *handle) +{ + ram_file_driver_entry.handle = handle; + ram_file_driver_entry.driver_name = "ram_file_drv"; + ram_file_driver_entry.finish = rfile_finish; + ram_file_driver_entry.init = rfile_init; + ram_file_driver_entry.start = rfile_start; + ram_file_driver_entry.stop = rfile_stop; + ram_file_driver_entry.output = rfile_command; + ram_file_driver_entry.ready_input = NULL; + ram_file_driver_entry.ready_output = NULL; + return &ram_file_driver_entry; +} +#endif + +static int rfile_init(void) +{ + return 0; +} + +static ErlDrvData rfile_start(ErlDrvPort port, char* buf) +{ + RamFile* f; + + if ((f = (RamFile*) driver_alloc(sizeof(RamFile))) == NULL) { + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + f->port = port; + f->flags = 0; + f->bin = NULL; + f->buf = NULL; + f->size = f->cur = f->end = 0; + return (ErlDrvData)f; +} + +static void rfile_stop(ErlDrvData e) +{ + RamFile* f = (RamFile*)e; + if (f->bin != NULL) + driver_free_binary(f->bin); + driver_free(f); +} + +/* + * Sends back an error reply to Erlang. + */ + +static int error_reply(RamFile *f, int err) +{ + char response[256]; /* Response buffer. */ + char* s; + char* t; + + /* + * Contents of buffer sent back: + * + * +-----------------------------------------+ + * | RAM_FILE_RESP_ERROR | Posix error id string | + * +-----------------------------------------+ + */ + response[0] = RAM_FILE_RESP_ERROR; + for (s = erl_errno_id(err), t = response+1; *s; s++, t++) + *t = tolower(*s); + driver_output2(f->port, response, t-response, NULL, 0); + return 0; +} + +static int reply(RamFile *f, int ok, int err) +{ + if (!ok) + error_reply(f, err); + else { + char c = RAM_FILE_RESP_OK; + driver_output2(f->port, &c, 1, NULL, 0); + } + return 0; +} + +static int numeric_reply(RamFile *f, int result) +{ + char tmp[5]; + + /* + * Contents of buffer sent back: + * + * +-----------------------------------------------+ + * | RAM_FILE_RESP_NUMBER | 32-bit number (big-endian) | + * +-----------------------------------------------+ + */ + + tmp[0] = RAM_FILE_RESP_NUMBER; + put_int32(result, tmp+1); + driver_output2(f->port, tmp, sizeof(tmp), NULL, 0); + return 0; +} + +/* install bin as the new binary reset all pointer */ + +static void ram_file_set(RamFile *f, ErlDrvBinary *bin, int bsize, int len) +{ + f->size = bsize; + f->buf = bin->orig_bytes; + f->cur = 0; + f->end = len; + f->bin = bin; +} + +static int ram_file_init(RamFile *f, char *buf, int count, int *error) +{ + int bsize; + ErlDrvBinary* bin; + + if (count < 0) { + *error = EINVAL; + return -1; + } + if ((bsize = (count+BFILE_BLOCK+(BFILE_BLOCK>>1)) & ~(BFILE_BLOCK-1)) + < 0) { + bsize = INT_MAX; + } + + if (f->bin == NULL) + bin = driver_alloc_binary(bsize); + else + bin = driver_realloc_binary(f->bin, bsize); + if (bin == NULL) { + *error = ENOMEM; + return -1; + } + sys_memzero(bin->orig_bytes, bsize); + sys_memcpy(bin->orig_bytes, buf, count); + ram_file_set(f, bin, bsize, count); + return count; +} + +static int ram_file_expand(RamFile *f, int size, int *error) +{ + int bsize; + ErlDrvBinary* bin; + + if (size < 0) { + *error = EINVAL; + return -1; + } + if ((bsize = (size+BFILE_BLOCK+(BFILE_BLOCK>>1)) & ~(BFILE_BLOCK-1)) + < 0) { + bsize = INT_MAX; + } + + if (bsize <= f->size) + return f->size; + else { + if ((bin = driver_realloc_binary(f->bin, bsize)) == NULL) { + *error = ENOMEM; + return -1; + } + sys_memzero(bin->orig_bytes+f->size, bsize - f->size); + f->size = bsize; + f->buf = bin->orig_bytes; + f->bin = bin; + return bsize; + } +} + + +static int ram_file_write(RamFile *f, char *buf, int len, + int *location, int *error) +{ + int cur = f->cur; + + if (!(f->flags & RAM_FILE_MODE_WRITE)) { + *error = EBADF; + return -1; + } + if (location) cur = *location; + if (cur < 0 || len < 0 || cur+len < 0) { + *error = EINVAL; + return -1; + } + if (cur+len > f->size && ram_file_expand(f, cur+len, error) < 0) { + return -1; + } + if (len) sys_memcpy(f->buf+cur, buf, len); + cur += len; + if (cur > f->end) f->end = cur; + if (! location) f->cur = cur; + return len; +} + +static int ram_file_read(RamFile *f, int len, ErlDrvBinary **bp, + int *location, int *error) +{ + ErlDrvBinary* bin; + int cur = f->cur; + + if (!(f->flags & RAM_FILE_MODE_READ)) { + *error = EBADF; + return -1; + } + if (location) cur = *location; + if (cur < 0 || len < 0) { + *error = EINVAL; + return -1; + } + if (cur < f->end) { + if (len > f->end-cur) len = f->end - cur; + } else { + len = 0; /* eof */ + } + if ((bin = driver_alloc_binary(len)) == NULL) { + *error = ENOMEM; + return -1; + } + if (len) sys_memcpy(bin->orig_bytes, f->buf+cur, len); + *bp = bin; + if (! location) f->cur = cur + len; + return len; +} + +static int ram_file_seek(RamFile *f, int offset, int whence, int *error) +{ + int pos; + + if (f->flags == 0) { + *error = EBADF; + return -1; + } + switch(whence) { + case RAM_FILE_SEEK_SET: pos = offset; break; + case RAM_FILE_SEEK_CUR: pos = f->cur + offset; break; + case RAM_FILE_SEEK_END: pos = f->end + offset; break; + default: *error = EINVAL; return -1; + } + if (pos < 0) { + *error = EINVAL; + return -1; + } + return f->cur = pos; +} + +#define UUMASK(x) ((x)&0x3F) +#define uu_encode(x) (UUMASK(x)+32) + +/* calculate max number of quadrauple bytes given max line length */ +#define UULINE(n) ( (((n)-1) / 4) * 3) + +#define UNIX_LINE 61 /* 61 character lines => 45 uncoded => 60 coded */ + +#define uu_pack(p, c1, c2, c3) \ + (p)[0] = uu_encode((c1) >> 2), \ + (p)[1] = uu_encode(((c1) << 4) | ((c2) >> 4)), \ + (p)[2] = uu_encode(((c2) << 2) | ((c3) >> 6)), \ + (p)[3] = uu_encode(c3) + +static int ram_file_uuencode(RamFile *f) +{ + int code_len = UULINE(UNIX_LINE); + int len = f->end; + int usize = (len*4+2)/3 + 2*(len/code_len+1) + 2 + 1; + ErlDrvBinary* bin; + uchar* inp; + uchar* outp; + int count = 0; + + if ((bin = driver_alloc_binary(usize)) == NULL) + return error_reply(f, ENOMEM); + outp = (uchar*)bin->orig_bytes; + inp = (uchar*)f->buf; + + while(len > 0) { + int c1, c2, c3; + int n = (len >= code_len) ? code_len : len; + + len -= n; + *outp++ = uu_encode(UUMASK(n)); + count++; + while (n >= 3) { + c1 = inp[0]; + c2 = inp[1]; + c3 = inp[2]; + uu_pack(outp, c1, c2, c3); + inp += 3; n -= 3; + outp += 4; count += 4; + } + if (n == 2) { + c1 = inp[0]; + c2 = inp[1]; + uu_pack(outp, c1, c2, 0); + inp += 2; + outp += 4; count += 4; + } + else if (n == 1) { + c1 = inp[0]; + uu_pack(outp, c1, 0, 0); + inp += 1; + outp += 4; count += 4; + } + *outp++ = '\n'; + count++; + } + *outp++ = ' '; /* this end of file 0 length !!! */ + *outp++ = '\n'; + count += 2; + + driver_free_binary(f->bin); + ram_file_set(f, bin, usize, count); + return numeric_reply(f, count); +} + + +#define uu_decode(x) ((x)-32) + +static int ram_file_uudecode(RamFile *f) +{ + int len = f->end; + int usize = ( (len+3) / 4 ) * 3; + ErlDrvBinary* bin; + uchar* inp; + uchar* outp; + int count = 0; + int n; + + if ((bin = driver_alloc_binary(usize)) == NULL) + return error_reply(f, ENOMEM); + outp = (uchar*)bin->orig_bytes; + inp = (uchar*)f->buf; + + while(len > 0) { + if ((n = uu_decode(*inp++)) < 0) + goto error; + len--; + if ((n == 0) && (*inp == '\n')) + break; + count += n; /* count characters */ + while((n > 0) && (len >= 4)) { + int c1, c2, c3, c4; + c1 = uu_decode(inp[0]); + c2 = uu_decode(inp[1]); + c3 = uu_decode(inp[2]); + c4 = uu_decode(inp[3]); + inp += 4; + len -= 4; + + switch(n) { + case 1: + *outp++ = (c1 << 2) | (c2 >> 4); + n = 0; + break; + case 2: + *outp++ = (c1 << 2) | (c2 >> 4); + *outp++ = (c2 << 4) | (c3 >> 2); + n = 0; + break; + default: + *outp++ = (c1 << 2) | (c2 >> 4); + *outp++ = (c2 << 4) | (c3 >> 2); + *outp++ = (c3 << 6) | c4; + n -= 3; + break; + } + } + if ((n != 0) || (*inp++ != '\n')) + goto error; + len--; + } + driver_free_binary(f->bin); + ram_file_set(f, bin, usize, count); + return numeric_reply(f, count); + + error: + driver_free_binary(bin); + return error_reply(f, EINVAL); +} + + +static int ram_file_compress(RamFile *f) +{ + int size = f->end; + ErlDrvBinary* bin; + + if ((bin = erts_gzdeflate_buffer(f->buf, size)) == NULL) { + return error_reply(f, EINVAL); + } + driver_free_binary(f->bin); + size = bin->orig_size; + ram_file_set(f, bin, size, size); + return numeric_reply(f, size); +} + +/* Tricky since we dont know the expanded size !!! */ +/* First attempt is to double the size of input */ +/* loop until we don't get Z_BUF_ERROR */ + +static int ram_file_uncompress(RamFile *f) +{ + int size = f->end; + ErlDrvBinary* bin; + + if ((bin = erts_gzinflate_buffer(f->buf, size)) == NULL) { + return error_reply(f, EINVAL); + } + driver_free_binary(f->bin); + size = bin->orig_size; + ram_file_set(f, bin, size, size); + return numeric_reply(f, size); +} + + +static void rfile_command(ErlDrvData e, char* buf, int count) +{ + RamFile* f = (RamFile*)e; + int error = 0; + ErlDrvBinary* bin; + char header[5]; /* result code + count */ + int offset; + int origin; /* Origin of seek. */ + int n; + + count--; + switch(*(uchar*)buf++) { + case RAM_FILE_OPEN: /* args is initial data */ + f->flags = get_int32(buf); + if (ram_file_init(f, buf+4, count-4, &error) < 0) + error_reply(f, error); + else + numeric_reply(f, 0); /* 0 is not used */ + break; + + case RAM_FILE_FSYNC: + if (f->flags == 0) + error_reply(f, EBADF); + else + reply(f, 1, 0); + break; + + case RAM_FILE_WRITE: + if (ram_file_write(f, buf, count, NULL, &error) < 0) + error_reply(f, error); + else + numeric_reply(f, count); + break; + + case RAM_FILE_PWRITE: + if ((offset = get_int32(buf)) < 0) + error_reply(f, EINVAL); + else if (ram_file_write(f, buf+4, count-4, &offset, &error) < 0) + error_reply(f, error); + else + numeric_reply(f, count-4); + break; + + case RAM_FILE_LSEEK: + offset = get_int32(buf); + origin = get_int32(buf+4); + if ((offset = ram_file_seek(f, offset, origin, &error)) < 0) + error_reply(f, error); + else + numeric_reply(f, offset); + break; + + case RAM_FILE_PREAD: + if ((offset = get_int32(buf)) < 0) { + error_reply(f, EINVAL); + break; + } + + count = get_int32(buf+4); + if ((n = ram_file_read(f, count, &bin, &offset, &error)) < 0) { + error_reply(f, error); + } else { + header[0] = RAM_FILE_RESP_DATA; + put_int32(n, header+1); + driver_output_binary(f->port, header, sizeof(header), + bin, 0, n); + driver_free_binary(bin); + } + break; + + case RAM_FILE_READ: + count = get_int32(buf); + if ((n = ram_file_read(f, count, &bin, NULL, &error)) < 0) + error_reply(f, error); + else { + header[0] = RAM_FILE_RESP_DATA; + put_int32(n, header+1); + driver_output_binary(f->port, header, sizeof(header), + bin, 0, n); + driver_free_binary(bin); + } + break; + + case RAM_FILE_TRUNCATE: + if (!(f->flags & RAM_FILE_MODE_WRITE)) { + error_reply(f, EACCES); + break; + } + if (f->end > f->cur) + sys_memzero(f->buf + f->cur, f->end - f->cur); + f->end = f->cur; + reply(f, 1, 0); + break; + + case RAM_FILE_GET: /* return a copy of the file */ + n = f->end; /* length */ + if ((bin = driver_alloc_binary(n)) == NULL) { + error_reply(f, ENOMEM); + break; + } + sys_memcpy(bin->orig_bytes, f->buf, n); + + header[0] = RAM_FILE_RESP_DATA; + put_int32(n, header+1); + driver_output_binary(f->port, header, sizeof(header), + bin, 0, n); + driver_free_binary(bin); + break; + + case RAM_FILE_GET_CLOSE: /* return the file and close driver */ + n = f->end; /* length */ + bin = f->bin; + f->bin = NULL; /* NUKE IT */ + header[0] = RAM_FILE_RESP_DATA; + put_int32(n, header+1); + driver_output_binary(f->port, header, sizeof(header), + bin, 0, n); + driver_free_binary(bin); + driver_failure(f->port, 0); + break; + + case RAM_FILE_SIZE: + numeric_reply(f, f->end); + break; + + case RAM_FILE_SET: /* re-init file with new data */ + if ((n = ram_file_init(f, buf, count, &error)) < 0) + error_reply(f, error); + else + numeric_reply(f, n); /* 0 is not used */ + break; + + case RAM_FILE_COMPRESS: /* inline compress the file */ + ram_file_compress(f); + break; + + case RAM_FILE_UNCOMPRESS: /* inline uncompress file */ + ram_file_uncompress(f); + break; + + case RAM_FILE_UUENCODE: /* uuencode file */ + ram_file_uuencode(f); + break; + + case RAM_FILE_UUDECODE: /* uudecode file */ + ram_file_uudecode(f); + break; + } + /* + * Ignore anything else -- let the caller hang. + */ +} diff --git a/erts/emulator/drivers/common/zlib_drv.c b/erts/emulator/drivers/common/zlib_drv.c new file mode 100644 index 0000000000..723efeaa13 --- /dev/null +++ b/erts/emulator/drivers/common/zlib_drv.c @@ -0,0 +1,650 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2003-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * ZLib interface for erlang + * + */ +#include <stdio.h> +#include <zlib.h> +#include <errno.h> +#include <string.h> + +#include "erl_driver.h" + + +#define DEFLATE_INIT 1 +#define DEFLATE_INIT2 2 +#define DEFLATE_SETDICT 3 +#define DEFLATE_RESET 4 +#define DEFLATE_END 5 +#define DEFLATE_PARAMS 6 +#define DEFLATE 7 + +#define INFLATE_INIT 8 +#define INFLATE_INIT2 9 +#define INFLATE_SETDICT 10 +#define INFLATE_SYNC 11 +#define INFLATE_RESET 12 +#define INFLATE_END 13 +#define INFLATE 14 + +#define CRC32_0 15 +#define CRC32_1 16 +#define CRC32_2 17 + +#define SET_BUFSZ 18 +#define GET_BUFSZ 19 +#define GET_QSIZE 20 + +#define ADLER32_1 21 +#define ADLER32_2 22 + +#define CRC32_COMBINE 23 +#define ADLER32_COMBINE 24 + +#define DEFAULT_BUFSZ 4000 + +static int zlib_init(void); +static ErlDrvData zlib_start(ErlDrvPort port, char* buf); +static void zlib_stop(ErlDrvData e); +static int zlib_ctl(ErlDrvData drv_data, unsigned int command, char *buf, + int len, char **rbuf, int rlen); +static void zlib_outputv(ErlDrvData drv_data, ErlIOVec *ev); + +ErlDrvEntry zlib_driver_entry = { + zlib_init, + zlib_start, + zlib_stop, + NULL, /* output */ + NULL, /* ready_input */ + NULL, /* ready_output */ + "zlib_drv", + NULL, /* finish */ + NULL, /* handle */ + zlib_ctl, + NULL, /* timeout */ + zlib_outputv, + NULL, /* read_async */ + NULL, /* flush */ + NULL, /* call */ + NULL, /* event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, /* handle2 */ + NULL, /* process_exit */ +}; + +typedef enum { + ST_NONE = 0, + ST_DEFLATE = 1, + ST_INFLATE = 2 +} ZLibState; + + +typedef struct { + z_stream s; + ZLibState state; + ErlDrvBinary* bin; + int binsz; + int binsz_need; + uLong crc; + int inflate_eos_seen; + int want_crc; /* 1 if crc is calculated on clear text */ + ErlDrvPort port; /* the associcated port */ +} ZLibData; + +static int zlib_inflate(ZLibData* d, int flush); +static int zlib_deflate(ZLibData* d, int flush); + +#if defined(_OSE_) || defined(__WIN32__) +static int i32(char* buf) +#else +static inline int i32(char* buf) +#endif +{ + return (int) ( + (((int)((unsigned char*)buf)[0]) << 24) | + (((int)((unsigned char*)buf)[1]) << 16) | + (((int)((unsigned char*)buf)[2]) << 8) | + (((int)((unsigned char*)buf)[3]) << 0)); +} + +static char* zlib_reason(int code, int* err) +{ + switch(code) { + case Z_OK: + *err = 0; + return "ok"; + case Z_STREAM_END: + *err = 0; + return "stream_end"; + case Z_ERRNO: + *err = 1; + return erl_errno_id(errno); + case Z_STREAM_ERROR: + *err = 1; + return "stream_error"; + case Z_DATA_ERROR: + *err = 1; + return "data_error"; + case Z_MEM_ERROR: + *err = 1; + return "mem_error"; + case Z_BUF_ERROR: + *err = 1; + return "buf_error"; + case Z_VERSION_ERROR: + *err = 1; + return "version_error"; + default: + *err = 1; + return "unknown_error"; + } +} + + +static int zlib_return(int code, char** rbuf, int rlen) +{ + int msg_code = 0; /* 0=ok, 1=error */ + char* dst = *rbuf; + char* src; + int len = 0; + + src = zlib_reason(code, &msg_code); + *dst++ = msg_code; + rlen--; + len = 1; + + while((rlen > 0) && *src) { + *dst++ = *src++; + rlen--; + len++; + } + return len; +} + +static int zlib_value2(int msg_code, int value, char** rbuf, int rlen) +{ + char* dst = *rbuf; + + if (rlen < 5) { + return -1; + } + *dst++ = msg_code; + *dst++ = (value >> 24) & 0xff; + *dst++ = (value >> 16) & 0xff; + *dst++ = (value >> 8) & 0xff; + *dst++ = value & 0xff; + return 5; +} + +static int zlib_value(int value, char** rbuf, int rlen) +{ + return zlib_value2(2, value, rbuf, rlen); +} + +static int zlib_output_init(ZLibData* d) +{ + if (d->bin != NULL) + driver_free_binary(d->bin); + if ((d->bin = driver_alloc_binary(d->binsz_need)) == NULL) + return -1; + d->binsz = d->binsz_need; + d->s.next_out = (unsigned char*)d->bin->orig_bytes; + d->s.avail_out = d->binsz; + return 0; +} + +/* + * Send compressed or uncompressed data + * and restart output procesing + */ +static int zlib_output(ZLibData* d) +{ + if (d->bin != NULL) { + int len = d->binsz - d->s.avail_out; + if (len > 0) { + if (driver_output_binary(d->port, NULL, 0, d->bin, 0, len) < 0) + return -1; + } + driver_free_binary(d->bin); + d->bin = NULL; + d->binsz = 0; + } + return zlib_output_init(d); +} + +static int zlib_inflate(ZLibData* d, int flush) +{ + int res = Z_OK; + + if ((d->bin == NULL) && (zlib_output_init(d) < 0)) { + errno = ENOMEM; + return Z_ERRNO; + } + + while ((driver_sizeq(d->port) > 0) && (res != Z_STREAM_END)) { + int vlen; + SysIOVec* iov = driver_peekq(d->port, &vlen); + int len; + int possibly_more_output = 0; + + d->s.next_in = iov[0].iov_base; + d->s.avail_in = iov[0].iov_len; + while((possibly_more_output || (d->s.avail_in > 0)) && (res != Z_STREAM_END)) { + res = inflate(&d->s, Z_NO_FLUSH); + if (res == Z_NEED_DICT) { + /* Essential to eat the header bytes that zlib has looked at */ + len = iov[0].iov_len - d->s.avail_in; + driver_deq(d->port, len); + return res; + } + if (res == Z_BUF_ERROR) { + /* Was possible more output, but actually not */ + res = Z_OK; + } + else if (res < 0) { + return res; + } + if (d->s.avail_out != 0) { + possibly_more_output = 0; + } else { + if (d->want_crc) + d->crc = crc32(d->crc, (unsigned char*)d->bin->orig_bytes, + d->binsz - d->s.avail_out); + zlib_output(d); + possibly_more_output = 1; + } + } + len = iov[0].iov_len - d->s.avail_in; + driver_deq(d->port, len); + } + + if (d->want_crc) { + d->crc = crc32(d->crc, (unsigned char*) d->bin->orig_bytes, + d->binsz - d->s.avail_out); + } + zlib_output(d); + if (res == Z_STREAM_END) { + d->inflate_eos_seen = 1; + } + return res; +} + +static int zlib_deflate(ZLibData* d, int flush) +{ + int res = Z_OK; + + if ((d->bin == NULL) && (zlib_output_init(d) < 0)) { + errno = ENOMEM; + return Z_ERRNO; + } + + while ((driver_sizeq(d->port) > 0) && (res != Z_STREAM_END)) { + int vlen; + SysIOVec* iov = driver_peekq(d->port, &vlen); + int len; + + d->s.next_in = iov[0].iov_base; + d->s.avail_in = iov[0].iov_len; + + while((d->s.avail_in > 0) && (res != Z_STREAM_END)) { + if ((res = deflate(&d->s, Z_NO_FLUSH)) < 0) { + return res; + } + if (d->s.avail_out == 0) { + zlib_output(d); + } + } + len = iov[0].iov_len - d->s.avail_in; + if (d->want_crc) { + d->crc = crc32(d->crc, iov[0].iov_base, len); + } + driver_deq(d->port, len); + } + + if (flush != Z_NO_FLUSH) { + if ((res = deflate(&d->s, flush)) < 0) { + return res; + } + if (flush == Z_FINISH) { + while (d->s.avail_out < d->binsz) { + zlib_output(d); + if (res == Z_STREAM_END) { + break; + } + if ((res = deflate(&d->s, flush)) < 0) { + return res; + } + } + } else { + while (d->s.avail_out == 0) { + zlib_output(d); + if ((res = deflate(&d->s, flush)) < 0) { + return res; + } + } + if (d->s.avail_out < d->binsz) { + zlib_output(d); + } + } + } + return res; +} + + + +static void* zlib_alloc(void* data, unsigned int items, unsigned int size) +{ + return (void*) driver_alloc(items*size); +} + +static void zlib_free(void* data, void* addr) +{ + driver_free(addr); +} + +static int zlib_init() +{ + return 0; +} + +static ErlDrvData zlib_start(ErlDrvPort port, char* buf) +{ + ZLibData* d; + + if ((d = (ZLibData*) driver_alloc(sizeof(ZLibData))) == NULL) + return ERL_DRV_ERROR_GENERAL; + + memset(&d->s, 0, sizeof(z_stream)); + + d->s.zalloc = zlib_alloc; + d->s.zfree = zlib_free; + d->s.opaque = d; + d->s.data_type = Z_BINARY; + + d->port = port; + d->state = ST_NONE; + d->bin = NULL; + d->binsz = 0; + d->binsz_need = DEFAULT_BUFSZ; + d->crc = crc32(0L, Z_NULL, 0); + d->inflate_eos_seen = 0; + d->want_crc = 0; + return (ErlDrvData)d; +} + + +static void zlib_stop(ErlDrvData e) +{ + ZLibData* d = (ZLibData*)e; + + if (d->state == ST_DEFLATE) + deflateEnd(&d->s); + else if (d->state == ST_INFLATE) + inflateEnd(&d->s); + + if (d->bin != NULL) + driver_free_binary(d->bin); + + driver_free(d); +} + +static int zlib_ctl(ErlDrvData drv_data, unsigned int command, char *buf, + int len, char **rbuf, int rlen) +{ + ZLibData* d = (ZLibData*)drv_data; + int res; + + switch(command) { + case DEFLATE_INIT: + if (len != 4) goto badarg; + if (d->state != ST_NONE) goto badarg; + res = deflateInit(&d->s, i32(buf)); + if (res == Z_OK) { + d->state = ST_DEFLATE; + d->want_crc = 0; + d->crc = crc32(0L, Z_NULL, 0); + } + return zlib_return(res, rbuf, rlen); + + case DEFLATE_INIT2: { + int wbits; + + if (len != 20) goto badarg; + if (d->state != ST_NONE) goto badarg; + wbits = i32(buf+8); + res = deflateInit2(&d->s, i32(buf), i32(buf+4), wbits, + i32(buf+12), i32(buf+16)); + if (res == Z_OK) { + d->state = ST_DEFLATE; + d->want_crc = (wbits < 0); + d->crc = crc32(0L, Z_NULL, 0); + } + return zlib_return(res, rbuf, rlen); + } + + case DEFLATE_SETDICT: + if (d->state != ST_DEFLATE) goto badarg; + res = deflateSetDictionary(&d->s, (unsigned char*)buf, len); + if (res == Z_OK) { + return zlib_value(d->s.adler, rbuf, rlen); + } else { + return zlib_return(res, rbuf, rlen); + } + + case DEFLATE_RESET: + if (len != 0) goto badarg; + if (d->state != ST_DEFLATE) goto badarg; + driver_deq(d->port, driver_sizeq(d->port)); + res = deflateReset(&d->s); + return zlib_return(res, rbuf, rlen); + + case DEFLATE_END: + if (len != 0) goto badarg; + if (d->state != ST_DEFLATE) goto badarg; + driver_deq(d->port, driver_sizeq(d->port)); + res = deflateEnd(&d->s); + d->state = ST_NONE; + return zlib_return(res, rbuf, rlen); + + case DEFLATE_PARAMS: + if (len != 8) goto badarg; + if (d->state != ST_DEFLATE) goto badarg; + res = deflateParams(&d->s, i32(buf), i32(buf+4)); + return zlib_return(res, rbuf, rlen); + + case DEFLATE: + if (d->state != ST_DEFLATE) goto badarg; + if (len != 4) goto badarg; + res = zlib_deflate(d, i32(buf)); + return zlib_return(res, rbuf, rlen); + + case INFLATE_INIT: + if (len != 0) goto badarg; + if (d->state != ST_NONE) goto badarg; + res = inflateInit(&d->s); + if (res == Z_OK) { + d->state = ST_INFLATE; + d->inflate_eos_seen = 0; + d->want_crc = 0; + d->crc = crc32(0L, Z_NULL, 0); + } + return zlib_return(res, rbuf, rlen); + + case INFLATE_INIT2: { + int wbits; + + if (len != 4) goto badarg; + if (d->state != ST_NONE) goto badarg; + wbits = i32(buf); + res = inflateInit2(&d->s, wbits); + if (res == Z_OK) { + d->state = ST_INFLATE; + d->inflate_eos_seen = 0; + d->want_crc = (wbits < 0); + d->crc = crc32(0L, Z_NULL, 0); + } + return zlib_return(res, rbuf, rlen); + } + + case INFLATE_SETDICT: + if (d->state != ST_INFLATE) goto badarg; + res = inflateSetDictionary(&d->s, (unsigned char*)buf, len); + return zlib_return(res, rbuf, rlen); + + case INFLATE_SYNC: + if (d->state != ST_INFLATE) goto badarg; + if (len != 0) goto badarg; + if (driver_sizeq(d->port) == 0) { + res = Z_BUF_ERROR; + } else { + int vlen; + SysIOVec* iov = driver_peekq(d->port, &vlen); + + d->s.next_in = iov[0].iov_base; + d->s.avail_in = iov[0].iov_len; + res = inflateSync(&d->s); + } + return zlib_return(res, rbuf, rlen); + + case INFLATE_RESET: + if (d->state != ST_INFLATE) goto badarg; + if (len != 0) goto badarg; + driver_deq(d->port, driver_sizeq(d->port)); + res = inflateReset(&d->s); + d->inflate_eos_seen = 0; + return zlib_return(res, rbuf, rlen); + + case INFLATE_END: + if (d->state != ST_INFLATE) goto badarg; + if (len != 0) goto badarg; + driver_deq(d->port, driver_sizeq(d->port)); + res = inflateEnd(&d->s); + if (res == Z_OK && d->inflate_eos_seen == 0) { + res = Z_DATA_ERROR; + } + d->state = ST_NONE; + return zlib_return(res, rbuf, rlen); + + case INFLATE: + if (d->state != ST_INFLATE) goto badarg; + if (len != 4) goto badarg; + res = zlib_inflate(d, i32(buf)); + if (res == Z_NEED_DICT) { + return zlib_value2(3, d->s.adler, rbuf, rlen); + } else { + return zlib_return(res, rbuf, rlen); + } + + case GET_QSIZE: + return zlib_value(driver_sizeq(d->port), rbuf, rlen); + + case GET_BUFSZ: + return zlib_value(d->binsz_need, rbuf, rlen); + + case SET_BUFSZ: { + int need; + if (len != 4) goto badarg; + need = i32(buf); + if ((need < 16) || (need > 0x00ffffff)) + goto badarg; + if (d->binsz_need != need) { + d->binsz_need = need; + if (d->bin != NULL) { + if (d->s.avail_out == d->binsz) { + driver_free_binary(d->bin); + d->bin = NULL; + d->binsz = 0; + } + else + zlib_output(d); + } + } + return zlib_return(Z_OK, rbuf, rlen); + } + + case CRC32_0: + return zlib_value(d->crc, rbuf, rlen); + + case CRC32_1: { + uLong crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, (unsigned char*) buf, len); + return zlib_value(crc, rbuf, rlen); + } + + case CRC32_2: { + uLong crc; + if (len < 4) goto badarg; + crc = (unsigned int) i32(buf); + crc = crc32(crc, (unsigned char*) buf+4, len-4); + return zlib_value(crc, rbuf, rlen); + } + + case ADLER32_1: { + uLong adler = adler32(0L, Z_NULL, 0); + adler = adler32(adler, (unsigned char*) buf, len); + return zlib_value(adler, rbuf, rlen); + } + + case ADLER32_2: { + uLong adler; + if (len < 4) goto badarg; + adler = (unsigned int) i32(buf); + adler = adler32(adler, (unsigned char*) buf+4, len-4); + return zlib_value(adler, rbuf, rlen); + } + + case CRC32_COMBINE: { + uLong crc, crc1, crc2, len2; + if (len != 12) goto badarg; + crc1 = (unsigned int) i32(buf); + crc2 = (unsigned int) i32(buf+4); + len2 = (unsigned int) i32(buf+8); + crc = crc32_combine(crc1, crc2, len2); + return zlib_value(crc, rbuf, rlen); + } + + case ADLER32_COMBINE: { + uLong adler, adler1, adler2, len2; + if (len != 12) goto badarg; + adler1 = (unsigned int) i32(buf); + adler2 = (unsigned int) i32(buf+4); + len2 = (unsigned int) i32(buf+8); + adler = adler32_combine(adler1, adler2, len2); + return zlib_value(adler, rbuf, rlen); + } + } + + badarg: + errno = EINVAL; + return zlib_return(Z_ERRNO, rbuf, rlen); +} + + + +static void zlib_outputv(ErlDrvData drv_data, ErlIOVec *ev) +{ + ZLibData* d = (ZLibData*) drv_data; + + driver_enqv(d->port, ev, 0); +} |