aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/drivers/common
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2019-02-06 19:10:26 +0100
committerSverker Eriksson <[email protected]>2019-02-06 19:10:26 +0100
commit98cfd6016f8b40fc97e03b31177d14318349040f (patch)
treec0fcdd768071c36bfbcbf186d369d9ca14c47421 /erts/emulator/drivers/common
parente2ca71b6e7172b320b5b171359d53a161383fb19 (diff)
parent3825199794da28d79b21052a2e69e2335921d55e (diff)
downloadotp-98cfd6016f8b40fc97e03b31177d14318349040f.tar.gz
otp-98cfd6016f8b40fc97e03b31177d14318349040f.tar.bz2
otp-98cfd6016f8b40fc97e03b31177d14318349040f.zip
Merge tag 'OTP-21.2' into sverker/map-from-ks-vs-bug
Diffstat (limited to 'erts/emulator/drivers/common')
-rw-r--r--erts/emulator/drivers/common/efile_drv.c4247
-rw-r--r--erts/emulator/drivers/common/erl_efile.h176
-rw-r--r--erts/emulator/drivers/common/gzio.c712
-rw-r--r--erts/emulator/drivers/common/gzio.h10
-rw-r--r--erts/emulator/drivers/common/inet_drv.c1837
-rw-r--r--erts/emulator/drivers/common/zlib_drv.c736
6 files changed, 1501 insertions, 6217 deletions
diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c
deleted file mode 100644
index 3adb8db661..0000000000
--- a/erts/emulator/drivers/common/efile_drv.c
+++ /dev/null
@@ -1,4247 +0,0 @@
-/*
- * %CopyrightBegin%
- *
- * Copyright Ericsson AB 1996-2016. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * %CopyrightEnd%
- */
-/*
- * Purpose: Provides file and directory operations.
- *
- * This file is generic, and does the work of decoding the commands
- * and encoding the responses. System-specific functions are found in
- * the unix_efile.c and win_efile.c files.
- */
-
-/* Operations */
-
-#define FILE_OPEN 1 /* Essential for startup */
-#define FILE_READ 2
-#define FILE_LSEEK 3
-#define FILE_WRITE 4
-#define FILE_FSTAT 5 /* Essential for startup */
-#define FILE_PWD 6 /* Essential for startup */
-#define FILE_READDIR 7 /* Essential for startup */
-#define FILE_CHDIR 8
-#define FILE_FSYNC 9
-#define FILE_MKDIR 10
-#define FILE_DELETE 11
-#define FILE_RENAME 12
-#define FILE_RMDIR 13
-#define FILE_TRUNCATE 14
-#define FILE_READ_FILE 15 /* Essential for startup */
-#define FILE_WRITE_INFO 16
-#define FILE_LSTAT 19
-#define FILE_READLINK 20
-#define FILE_LINK 21
-#define FILE_SYMLINK 22
-#define FILE_CLOSE 23
-#define FILE_PWRITEV 24
-#define FILE_PREADV 25
-#define FILE_SETOPT 26
-#define FILE_IPREAD 27
-#define FILE_ALTNAME 28
-#define FILE_READ_LINE 29
-#define FILE_FDATASYNC 30
-#define FILE_FADVISE 31
-#define FILE_SENDFILE 32
-#define FILE_FALLOCATE 33
-#define FILE_CLOSE_ON_PORT_EXIT 34
-/* Return codes */
-
-#define FILE_RESP_OK 0
-#define FILE_RESP_ERROR 1
-#define FILE_RESP_DATA 2
-#define FILE_RESP_NUMBER 3
-#define FILE_RESP_INFO 4
-#define FILE_RESP_NUMERR 5
-#define FILE_RESP_LDATA 6
-#define FILE_RESP_N2DATA 7
-#define FILE_RESP_EOF 8
-#define FILE_RESP_FNAME 9
-#define FILE_RESP_ALL_DATA 10
-#define FILE_RESP_LFNAME 11
-
-/* Options */
-
-#define FILE_OPT_DELAYED_WRITE 0
-#define FILE_OPT_READ_AHEAD 1
-
-/* IPREAD variants */
-
-#define IPREAD_S32BU_P32BU 0
-
-/* Limits */
-
-#define FILE_SEGMENT_READ (256*1024)
-#define FILE_SEGMENT_WRITE (256*1024)
-
-/* Internal */
-
-/* Set to 1 to test having read_ahead implicitly for read_line */
-#define ALWAYS_READ_LINE_AHEAD 0
-
-
-/* Must not be possible to get from malloc()! */
-#define FILE_FD_INVALID ((Sint)(-1))
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <ctype.h>
-#include <sys/types.h>
-#include <stdlib.h>
-
-/* Need (NON)BLOCKING macros for sendfile */
-#ifndef WANT_NONBLOCKING
-#define WANT_NONBLOCKING
-#endif
-
-#include "sys.h"
-
-#include "erl_driver.h"
-#include "erl_efile.h"
-#include "erl_threads.h"
-#include "gzio.h"
-#include "dtrace-wrapper.h"
-
-
-static ErlDrvSysInfo sys_info;
-
-/* For explanation of this var, see comment for same var in erl_async.c */
-static unsigned gcc_optimizer_hack = 0;
-
-#ifdef USE_VM_PROBES
-
-#define DTRACE_EFILE_BUFSIZ 128
-
-#define DTRACE_INVOKE_SETUP(op) \
- do { DTRACE3(efile_drv_int_entry, d->sched_i1, d->sched_i2, op); } while (0)
-#define DTRACE_INVOKE_SETUP_BY_NAME(op) \
- struct t_data *d = (struct t_data *) data ; \
- DTRACE_INVOKE_SETUP(op)
-#define DTRACE_INVOKE_RETURN(op) \
- do { DTRACE3(efile_drv_int_return, d->sched_i1, d->sched_i2, \
- op); } while (0) ; gcc_optimizer_hack++ ;
-
-/* Assign human-friendlier id numbers to scheduler & I/O worker threads */
-int dt_driver_idnum = 0;
-int dt_driver_io_worker_base = 5000;
-erts_mtx_t dt_driver_mutex;
-pthread_key_t dt_driver_key;
-
-typedef struct {
- int thread_num;
- Uint64 tag;
-} dt_private;
-
-dt_private *get_dt_private(int);
-#else /* USE_VM_PROBES */
-#define DTRACE_INVOKE_SETUP(op) do {} while (0)
-#define DTRACE_INVOKE_SETUP_BY_NAME(op) do {} while (0)
-#define DTRACE_INVOKE_RETURN(op) do {} while (0)
-#endif /* USE_VM_PROBES */
-
-/* #define TRACE 1 */
-#ifdef TRACE
-# define TRACE_C(c) do { putchar(c); fflush(stdout); } while (0)
-# define TRACE_S(s) do { fputs((s), stdout); fflush(stdout); } while (0)
-# define TRACE_F(args) do { printf args ;fflush(stdout); } while (0)
-#else
-# define TRACE_C(c) ((void)(0))
-# define TRACE_S(s) ((void)(0))
-# define TRACE_F(args) ((void)(0))
-#endif
-
-
-#ifdef USE_THREADS
-#define THRDS_AVAILABLE (sys_info.async_threads > 0)
-#ifdef HARDDEBUG /* HARDDEBUG in io.c is expected too */
-#define TRACE_DRIVER fprintf(stderr, "Efile: ")
-#else
-#define TRACE_DRIVER
-#endif
-#define MUTEX_INIT(m, p) do { IF_THRDS { TRACE_DRIVER; (m = driver_pdl_create(p)); } } while (0)
-#define MUTEX_LOCK(m) do { IF_THRDS { TRACE_DRIVER; driver_pdl_lock(m); } } while (0)
-#define MUTEX_UNLOCK(m) do { IF_THRDS { TRACE_DRIVER; driver_pdl_unlock(m); } } while (0)
-#else
-#define THRDS_AVAILABLE (0)
-#define MUTEX_INIT(m, p)
-#define MUTEX_LOCK(m)
-#define MUTEX_UNLOCK(m)
-#endif
-#define IF_THRDS if (THRDS_AVAILABLE)
-
-
-#define SENDFILE_FLGS_USE_THREADS (1 << 0)
-/**
- * On DARWIN sendfile can deadlock with close if called in
- * different threads. So until Apple fixes so that sendfile
- * is not buggy we disable usage of the async pool for
- * DARWIN. The testcase t_sendfile_crashduring reproduces
- * this error when using +A 10 and enabling SENDFILE_FLGS_USE_THREADS.
- */
-#if defined(__APPLE__) && defined(__MACH__)
-#define USE_THRDS_FOR_SENDFILE(DATA) 0
-#else
-#define USE_THRDS_FOR_SENDFILE(DATA) (DATA->flags & SENDFILE_FLGS_USE_THREADS)
-#endif /* defined(__APPLE__) && defined(__MACH__) */
-
-
-
-#if 0
-/* Experimental, for forcing all file operations to use the same thread. */
- static unsigned file_fixed_key = 1;
-# define KEY(desc) (&file_fixed_key)
-#else
-# define KEY(desc) (&(desc)->key)
-#endif
-
-#ifndef MAX
-# define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#endif
-
-#ifdef FILENAMES_16BIT
-#ifdef USE_VM_PROBES
-#error 16bit characters in filenames and dtrace in combination is not supported.
-#endif
-# define FILENAME_BYTELEN(Str) filename_len_16bit(Str)
-# define FILENAME_COPY(To,From) filename_cpy_16bit((To),(From))
-# define FILENAME_CHARSIZE 2
-
- static int filename_len_16bit(char *str)
- {
- char *p = str;
- while(*p != '\0' || p[1] != '\0') {
- p += 2;
- }
- return (p - str);
- }
-
- static void filename_cpy_16bit(char *to, char *from)
- {
- while(*from != '\0' || from[1] != '\0') {
- *to++ = *from++;
- *to++ = *from++;
- }
- *to++ = *from++;
- *to++ = *from++;
- }
-
-#else
-# define FILENAME_BYTELEN(Str) strlen(Str)
-# define FILENAME_COPY(To,From) strcpy(To,From)
-# define FILENAME_CHARSIZE 1
-#endif
-
-#if (MAXPATHLEN+1)*FILENAME_CHARSIZE+1 > BUFSIZ
-# define RESBUFSIZE ((MAXPATHLEN+1)*FILENAME_CHARSIZE+1)
-#else
-# define RESBUFSIZE BUFSIZ
-#endif
-
-#define READDIR_CHUNKS (5)
-
-
-
-#if ALWAYS_READ_LINE_AHEAD
-#define DEFAULT_LINEBUF_SIZE 2048
-#else
-#define DEFAULT_LINEBUF_SIZE 512 /* Small, it's usually discarded anyway */
-#endif
-
-typedef unsigned char uchar;
-
-static ErlDrvData file_start(ErlDrvPort port, char* command);
-static int file_init(void);
-static void file_stop(ErlDrvData);
-static void file_output(ErlDrvData, char* buf, ErlDrvSizeT len);
-static ErlDrvSSizeT file_control(ErlDrvData, unsigned int command,
- char* buf, ErlDrvSizeT len,
- char **rbuf, ErlDrvSizeT rlen);
-static void file_timeout(ErlDrvData);
-static void file_outputv(ErlDrvData, ErlIOVec*);
-static void file_async_ready(ErlDrvData, ErlDrvThreadData);
-static void file_flush(ErlDrvData);
-
-#ifdef HAVE_SENDFILE
-static void file_ready_output(ErlDrvData data, ErlDrvEvent event);
-static void file_stop_select(ErlDrvEvent event, void* _);
-#endif /* HAVE_SENDFILE */
-
-
-enum e_timer {timer_idle, timer_again, timer_write};
-#ifdef HAVE_SENDFILE
-enum e_sendfile {sending, not_sending};
-#define SENDFILE_USE_THREADS (1 << 0)
-#endif /* HAVE_SENDFILE */
-
-struct t_data;
-
-typedef struct {
- SWord fd;
- ErlDrvPort port;
- unsigned int key; /* Async queue key */
- unsigned flags; /* Original flags from FILE_OPEN. */
- void (*invoke)(void *);
- struct t_data *d;
- void (*free)(void *);
- struct t_data *cq_head; /* Queue of incoming commands */
- struct t_data *cq_tail; /* -""- */
- enum e_timer timer_state;
-#ifdef HAVE_SENDFILE
- enum e_sendfile sendfile_state;
-#endif /* HAVE_SENDFILE */
- size_t read_bufsize;
- ErlDrvBinary *read_binp;
- size_t read_offset;
- size_t read_size;
- size_t write_bufsize;
- unsigned long write_delay;
- int write_error;
- Efile_error write_errInfo;
- ErlDrvPDL q_mtx; /* Mutex for the driver queue, known by the emulator. Also used for
- mutual exclusion when accessing field(s) below. */
- size_t write_buffered;
-#ifdef USE_VM_PROBES
- int idnum; /* Unique ID # for this driver thread/desc */
- char port_str[DTRACE_TERM_BUF_SIZE];
-#endif
-} file_descriptor;
-
-
-static int reply_error(file_descriptor*, Efile_error* errInfo);
-
-struct erl_drv_entry efile_driver_entry = {
- file_init,
- file_start,
- file_stop,
- file_output,
- NULL,
-#ifdef HAVE_SENDFILE
- file_ready_output,
-#else
- NULL,
-#endif /* HAVE_SENDFILE */
- "efile",
- NULL,
- NULL,
- file_control,
- file_timeout,
- file_outputv,
- file_async_ready,
- file_flush,
- NULL,
- NULL,
- ERL_DRV_EXTENDED_MARKER,
- ERL_DRV_EXTENDED_MAJOR_VERSION,
- ERL_DRV_EXTENDED_MINOR_VERSION,
- ERL_DRV_FLAG_USE_PORT_LOCKING,
- NULL,
- NULL,
-#ifdef HAVE_SENDFILE
- file_stop_select
-#else
- NULL
-#endif /* HAVE_SENDFILE */
-};
-
-
-
-static int thread_short_circuit;
-
-#define DRIVER_ASYNC(level, desc, f_invoke, data, f_free) \
-if (thread_short_circuit >= (level)) { \
- (*(f_invoke))(data); \
- file_async_ready((ErlDrvData)(desc), (data)); \
-} else { \
- driver_async((desc)->port, KEY(desc), (f_invoke), (data), (f_free)); \
-}
-
-
-
-struct t_pbuf_spec {
- Sint64 offset;
- size_t size;
-};
-
-struct t_pwritev {
- ErlDrvPort port;
- ErlDrvPDL q_mtx;
- size_t size;
- unsigned cnt;
- unsigned n;
- struct t_pbuf_spec specs[1];
-};
-
-struct t_preadv {
- ErlIOVec eiov;
- unsigned n;
- unsigned cnt;
- size_t size;
- Sint64 offsets[1];
-};
-
-#define READDIR_BUFSIZE (8*1024)*READDIR_CHUNKS
-#if READDIR_BUFSIZE < (1 + (2 + MAXPATHLEN)*FILENAME_CHARSIZE*READDIR_CHUNKS)
-# undef READDIR_BUFSIZE
-# define READDIR_BUFSIZE (1 + (2 + MAXPATHLEN)*FILENAME_CHARSIZE*READDIR_CHUNKS)
-#endif
-
-struct t_readdir_buf {
- struct t_readdir_buf *next;
- size_t n;
- char buf[READDIR_BUFSIZE];
-};
-
-struct t_data
-{
- struct t_data *next;
- int command;
- int level;
- void (*invoke)(void *);
- void (*free)(void *);
- void *data_to_free; /* used by FILE_CLOSE_ON_PORT_EXIT only */
- int again;
- int reply;
-#ifdef USE_VM_PROBES
- int sched_i1;
- Uint64 sched_i2;
- char sched_utag[DTRACE_EFILE_BUFSIZ+1];
-#endif
- int result_ok;
- Efile_error errInfo;
- int flags;
- SWord fd;
- int is_fd_unused;
- /**/
- Efile_info info;
- EFILE_DIR_HANDLE dir_handle; /* Handle to open directory. */
- ErlDrvBinary *bin;
- int drive;
- size_t n;
- /*off_t offset;*/
- /*size_t bytesRead; Bytes read from the file. */
- /**/
- union {
- struct {
- Sint64 offset;
- int origin;
- Sint64 location;
- } lseek;
- struct {
- ErlDrvPort port;
- ErlDrvPDL q_mtx;
- size_t size;
- size_t reply_size;
- } writev;
- struct t_pwritev pwritev;
- struct t_preadv preadv;
- struct {
- ErlDrvBinary *binp;
- size_t bin_offset;
- size_t bin_size;
- size_t size;
- } read;
- struct {
- ErlDrvBinary *binp; /* in - out */
- size_t read_offset; /* in - out */
- size_t read_size; /* in - out */
- size_t nl_pos; /* out */
- short nl_skip; /* out, 0 or 1 */
-#if !ALWAYS_READ_LINE_AHEAD
- short read_ahead; /* in, bool */
-#endif
- } read_line;
- struct {
- ErlDrvBinary *binp;
- int size;
- int offset;
- } read_file;
- struct {
- struct t_readdir_buf *first_buf;
- struct t_readdir_buf *last_buf;
- } read_dir;
- struct {
- Sint64 offset;
- Sint64 length;
- int advise;
- } fadvise;
-#ifdef HAVE_SENDFILE
- struct {
- ErlDrvPort port;
- ErlDrvPDL q_mtx;
- int out_fd;
- off_t offset;
- Uint64 nbytes;
- Uint64 written;
- } sendfile;
-#endif /* HAVE_SENDFILE */
- struct {
- Sint64 offset;
- Sint64 length;
- } fallocate;
- } c;
- char b[1];
-};
-
-#define EF_ALLOC(S) driver_alloc((S))
-#define EF_REALLOC(P, S) driver_realloc((P), (S))
-#define EF_SAFE_ALLOC(S) ef_safe_alloc((S))
-#define EF_SAFE_REALLOC(P, S) ef_safe_realloc((P), (S))
-#define EF_FREE(P) do { if((P)) driver_free((P)); } while(0)
-
-static void *ef_safe_alloc(Uint s)
-{
- void *p = EF_ALLOC(s);
- if (!p) erts_exit(ERTS_ERROR_EXIT, "efile drv: Can't allocate %lu bytes of memory\n", (unsigned long)s);
- return p;
-}
-
-/*********************************************************************
- * ErlIOVec manipulation functions.
- */
-
-/* char EV_CHAR_P(ErlIOVec *ev, int p, int q) */
-#define EV_CHAR_P(ev, p, q) \
- (((char *)(ev)->iov[q].iov_base) + (p))
-
-/* int EV_GET_CHAR(ErlIOVec *ev, char *p, int *pp, int *qp) */
-#define EV_GET_CHAR(ev, p, pp, qp) efile_ev_get_char(ev, p ,pp, qp)
-static int
-efile_ev_get_char(ErlIOVec *ev, char *p, size_t *pp, size_t *qp) {
- if (*pp + 1 <= ev->iov[*qp].iov_len) {
- *p = *EV_CHAR_P(ev, *pp, *qp);
- if (*pp + 1 < ev->iov[*qp].iov_len)
- *pp += 1;
- else {
- *qp += 1;
- *pp = 0;
- }
- return !0;
- }
- return 0;
-}
-
-/* Uint32 EV_UINT32(ErlIOVec *ev, int p, int q)*/
-#define EV_UINT32(ev, p, q) \
- ((Uint32) ((unsigned char *)(ev)->iov[q].iov_base)[p])
-
-/* int EV_GET_UINT32(ErlIOVec *ev, Uint32 *p, int *pp, int *qp) */
-#define EV_GET_UINT32(ev, p, pp, qp) efile_ev_get_uint32(ev, p, pp, qp)
-static int
-efile_ev_get_uint32(ErlIOVec *ev, Uint32 *p, size_t *pp, size_t *qp) {
- if (*pp + 4 <= ev->iov[*qp].iov_len) {
- *p = (EV_UINT32(ev, *pp, *qp) << 24)
- | (EV_UINT32(ev, *pp + 1, *qp) << 16)
- | (EV_UINT32(ev, *pp + 2, *qp) << 8)
- | (EV_UINT32(ev, *pp + 3, *qp));
- if (*pp + 4 < ev->iov[*qp].iov_len)
- *pp += 4;
- else {
- *qp += 1;
- *pp = 0;
- }
- return !0;
- }
- return 0;
-}
-
-/* Uint64 EV_UINT64(ErlIOVec *ev, int p, int q)*/
-#define EV_UINT64(ev, p, q) \
- ((Uint64) ((unsigned char *)(ev)->iov[q].iov_base)[p])
-
-/* int EV_GET_UINT64(ErlIOVec *ev, Uint64 *p, int *pp, int *qp) */
-#define EV_GET_UINT64(ev, p, pp, qp) efile_ev_get_uint64(ev, p, pp, qp)
-static int
-efile_ev_get_uint64(ErlIOVec *ev, Uint64 *p, size_t *pp, size_t *qp) {
- if (*pp + 8 <= ev->iov[*qp].iov_len) {
- *p = (EV_UINT64(ev, *pp, *qp) << 56)
- | (EV_UINT64(ev, *pp + 1, *qp) << 48)
- | (EV_UINT64(ev, *pp + 2, *qp) << 40)
- | (EV_UINT64(ev, *pp + 3, *qp) << 32)
- | (EV_UINT64(ev, *pp + 4, *qp) << 24)
- | (EV_UINT64(ev, *pp + 5, *qp) << 16)
- | (EV_UINT64(ev, *pp + 6, *qp) << 8)
- | (EV_UINT64(ev, *pp + 7, *qp));
- if (*pp + 8 < ev->iov[*qp].iov_len)
- *pp += 8;
- else {
- *qp += 1;
- *pp = 0;
- }
- return !0;
- }
- return 0;
-}
-
-/* int EV_GET_SINT64(ErlIOVec *ev, Uint64 *p, int *pp, int *qp) */
-#define EV_GET_SINT64(ev, p, pp, qp) efile_ev_get_sint64(ev, p, pp, qp)
-static int
-efile_ev_get_sint64(ErlIOVec *ev, Sint64 *p, size_t *pp, size_t *qp) {
- Uint64 *tmp = (Uint64*)p;
- return EV_GET_UINT64(ev, tmp, pp, qp);
-}
-
-#if 0
-
-static void ev_clear(ErlIOVec *ev) {
- ASSERT(ev);
- ev->size = 0;
- ev->vsize = 0;
- ev->iov = NULL;
- ev->binv = NULL;
-}
-
-/* Assumes that ->iov and ->binv were allocated with sys_alloc().
- */
-static void ev_free(ErlIOVec *ev) {
- if (! ev) {
- return;
- }
- if (ev->vsize > 0) {
- int i;
- ASSERT(ev->iov);
- ASSERT(ev->binv);
- for (i = 0; i < ev->vsize; i++) {
- if (ev->binv[i]) {
- driver_free_binary(ev->binv[i]);
- }
- }
- EF_FREE(ev->iov);
- EF_FREE(ev->binv);
- }
-}
-
-/* Copy the contents from source to dest.
- * Data in binaries is not copied, just the pointers;
- * and refc is incremented.
- */
-static ErlIOVec *ev_copy(ErlIOVec *dest, ErlIOVec *source) {
- int *ip;
- ASSERT(dest);
- ASSERT(source);
- if (source->vsize == 0) {
- /* Empty source */
- ev_clear(dest);
- return dest;
- }
- /* Allocate ->iov and ->binv */
- dest->iov = EF_ALLOC(sizeof(*dest->iov) * source->vsize);
- if (! dest->iov) {
- return NULL;
- }
- dest->binv = EF_ALLOC(sizeof(*dest->binv) * source->vsize);
- if (! dest->binv) {
- EF_FREE(dest->iov);
- return NULL;
- }
- dest->size = source->size;
- /* Copy one vector element at the time.
- * Use *ip as an alias for dest->vsize to improve readabiliy.
- * Keep dest consistent in every iteration by using
- * dest->vsize==*ip as loop variable.
- */
- for (ip = &dest->vsize, *ip = 0; *ip < source->vsize; (*ip)++) {
- if (source->iov[*ip].iov_len == 0) {
- /* Empty vector element */
- dest->iov[*ip].iov_len = 0;
- dest->iov[*ip].iov_base = NULL;
- dest->binv[*ip] = NULL;
- } else {
- /* Non empty vector element */
- if (source->binv[*ip]) {
- /* Contents in binary - copy pointers and increment refc */
- dest->iov[*ip] = source->iov[*ip];
- dest->binv[*ip] = source->binv[*ip];
- driver_binary_inc_refc(source->binv[*ip]);
- } else {
- /* Contents not in binary - allocate new binary and copy data */
- if (! (dest->binv[*ip] =
- driver_alloc_binary(source->iov[*ip].iov_len))) {
- goto failed;
- }
- sys_memcpy(dest->binv[*ip]->orig_bytes,
- source->iov[*ip].iov_base,
- source->iov[*ip].iov_len);
- dest->iov[*ip].iov_base = dest->binv[*ip]->orig_bytes;
- dest->iov[*ip].iov_len = source->iov[*ip].iov_len;
- }
- }
- }
- return dest;
- failed:
- ev_free(dest);
- return NULL;
-}
-
-#endif
-
-
-
-/*********************************************************************
- * Command queue functions
- */
-
-static void cq_enq(file_descriptor *desc, struct t_data *d) {
- ASSERT(d);
- if (desc->cq_head) {
- ASSERT(desc->cq_tail);
- ASSERT(!desc->cq_tail->next);
- desc->cq_tail = desc->cq_tail->next = d;
- } else {
- ASSERT(desc->cq_tail == NULL);
- desc->cq_head = desc->cq_tail = d;
- }
- d->next = NULL;
-}
-
-static struct t_data *cq_deq(file_descriptor *desc) {
- struct t_data *d = desc->cq_head;
- ASSERT(d || (!d && !desc->cq_tail));
- if (d) {
- ASSERT(!d->next || (d->next && desc->cq_tail != d));
- if ((desc->cq_head = d->next) == NULL) {
- ASSERT(desc->cq_tail == d);
- desc->cq_tail = NULL;
- }
- }
- return d;
-}
-
-
-/*********************************************************************
- * Driver entry point -> init
- */
-static int
-file_init(void)
-{
- char buf[21]; /* enough to hold any 64-bit integer */
- size_t bufsz = sizeof(buf);
- thread_short_circuit = (erl_drv_getenv("ERL_EFILE_THREAD_SHORT_CIRCUIT",
- buf,
- &bufsz) == 0
- ? atoi(buf)
- : 0);
- driver_system_info(&sys_info, sizeof(ErlDrvSysInfo));
-
- /* run initiation of efile_driver if needed */
- efile_init();
-
-#ifdef USE_VM_PROBES
- erts_mtx_init(&dt_driver_mutex, "efile_drv dtrace mutex");
- pthread_key_create(&dt_driver_key, NULL);
-#endif /* USE_VM_PROBES */
-
- return 0;
-}
-
-
-/*********************************************************************
- * Driver entry point -> start
- */
-static ErlDrvData
-file_start(ErlDrvPort port, char* command)
-
-{
- file_descriptor* desc;
-
- if ((desc = (file_descriptor*) EF_ALLOC(sizeof(file_descriptor)))
- == NULL) {
- errno = ENOMEM;
- return ERL_DRV_ERROR_ERRNO;
- }
- desc->fd = FILE_FD_INVALID;
- desc->port = port;
- desc->key = driver_async_port_key(port);
- desc->flags = 0;
- desc->invoke = NULL;
- desc->d = NULL;
- desc->free = NULL;
- desc->cq_head = NULL;
- desc->cq_tail = NULL;
- desc->timer_state = timer_idle;
-#ifdef HAVE_SENDFILE
- desc->sendfile_state = not_sending;
-#endif
- desc->read_bufsize = 0;
- desc->read_binp = NULL;
- desc->read_offset = 0;
- desc->read_size = 0;
- desc->write_delay = 0L;
- desc->write_bufsize = 0;
- desc->write_error = 0;
- MUTEX_INIT(desc->q_mtx, port); /* Refc is one, referenced by emulator now */
- desc->write_buffered = 0;
-#ifdef USE_VM_PROBES
- dtrace_drvport_str(port, desc->port_str);
- get_dt_private(0); /* throw away return value */
-#endif /* USE_VM_PROBES */
- return (ErlDrvData) desc;
-}
-
-static void do_close(int flags, SWord fd) {
- if (flags & EFILE_COMPRESSED) {
- erts_gzclose((ErtsGzFile)(fd));
- } else {
- efile_closefile((int) fd);
- }
-}
-
-static void invoke_close(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- DTRACE_INVOKE_SETUP(FILE_CLOSE);
- d->again = 0;
- do_close(d->flags, d->fd);
- DTRACE_INVOKE_RETURN(FILE_CLOSE);
-}
-
-static void free_data(void *data)
-{
- struct t_data *d = (struct t_data *) data;
-
- switch (d->command) {
- case FILE_OPEN:
- if (d->is_fd_unused && d->fd != FILE_FD_INVALID) {
- /* This is OK to do in scheduler thread because there can be no async op
- ongoing for this fd here, as we exited during async open.
- Ideally, this close should happen in an async thread too, but that would
- require a substantial rewrite, as we are here because of a dead port and
- cannot schedule async jobs for that port any more... */
- do_close(d->flags, d->fd);
- }
- break;
- case FILE_CLOSE_ON_PORT_EXIT:
- EF_FREE(d->data_to_free);
- break;
- }
-
- EF_FREE(data);
-}
-
-
-/*
- * Sends back an error reply to Erlang.
- */
-
-static void reply_posix_error(file_descriptor *desc, int posix_errno) {
- char response[256]; /* Response buffer. */
- char* s;
- char* t;
-
- /*
- * Contents of buffer sent back:
- *
- * +-----------------------------------------+
- * | FILE_RESP_ERROR | Posix error id string |
- * +-----------------------------------------+
- */
-
- TRACE_C('E');
-
- response[0] = FILE_RESP_ERROR;
- for (s = erl_errno_id(posix_errno), t = response+1; *s; s++, t++)
- *t = tolower(*s);
- driver_output2(desc->port, response, t-response, NULL, 0);
-}
-
-static void reply_Uint_posix_error(file_descriptor *desc, Uint num,
- int posix_errno) {
- char response[256]; /* Response buffer. */
- char* s;
- char* t;
-
- /*
- * Contents of buffer sent back:
- *
- * +----------------------------------------------------------------------+
- * | FILE_RESP_NUMERR | 64-bit number (big-endian) | Posix error id string |
- * +----------------------------------------------------------------------+
- */
-
- TRACE_C('N');
-
- response[0] = FILE_RESP_NUMERR;
-#if SIZEOF_VOID_P == 4
- put_int32(0, response+1);
-#else
- put_int32(num>>32, response+1);
-#endif
- put_int32((Uint32)num, response+1+4);
- for (s = erl_errno_id(posix_errno), t = response+1+4+4; *s; s++, t++)
- *t = tolower(*s);
- driver_output2(desc->port, response, t-response, NULL, 0);
-}
-
-#ifdef HAVE_SENDFILE
-static void reply_string_error(file_descriptor *desc, char* str) {
- char response[256]; /* Response buffer. */
- char* s;
- char* t;
-
- response[0] = FILE_RESP_ERROR;
- for (s = str, t = response+1; *s; s++, t++)
- *t = tolower(*s);
- driver_output2(desc->port, response, t-response, NULL, 0);
-}
-#endif
-
-static int reply_error(file_descriptor *desc,
- Efile_error *errInfo) /* The error codes. */
-{
- reply_posix_error(desc, errInfo->posix_errno);
- return 0;
-}
-
-static int reply_Uint_error(file_descriptor *desc, Uint num,
- Efile_error *errInfo) /* The error codes. */
-{
- reply_Uint_posix_error(desc, num, errInfo->posix_errno);
- return 0;
-}
-
-static int reply_ok(file_descriptor *desc) {
- char c = FILE_RESP_OK;
-
- driver_output2(desc->port, &c, 1, NULL, 0);
- return 0;
-}
-
-static int reply(file_descriptor *desc, int ok, Efile_error *errInfo) {
- if (!ok) {
- reply_error(desc, errInfo);
- } else {
- TRACE_C('K');
- reply_ok(desc);
- }
- return 0;
-}
-
-static int reply_Uint(file_descriptor *desc, Uint result) {
- char tmp[1+4+4];
-
- /*
- * Contents of buffer sent back:
- *
- * +-----------------------------------------------+
- * | FILE_RESP_NUMBER | 64-bit number (big-endian) |
- * +-----------------------------------------------+
- */
-
- TRACE_C('R');
-
- tmp[0] = FILE_RESP_NUMBER;
-#if SIZEOF_VOID_P == 4
- put_int32(0, tmp+1);
-#else
- put_int32(result>>32, tmp+1);
-#endif
- put_int32((Uint32)result, tmp+1+4);
- driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0);
- return 0;
-}
-
-static int reply_Sint64(file_descriptor *desc, Sint64 result) {
- char tmp[1+4+4];
-
- /*
- * Contents of buffer sent back:
- *
- * +-----------------------------------------------+
- * | FILE_RESP_NUMBER | 64-bit number (big-endian) |
- * +-----------------------------------------------+
- */
-
- TRACE_C('R');
-
- tmp[0] = FILE_RESP_NUMBER;
- put_int64(result, tmp+1);
- driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0);
- return 0;
-}
-
-#if 0
-static void reply_again(file_descriptor *desc) {
- char tmp[1];
- tmp[0] = FILE_RESP_AGAIN;
- driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0);
-}
-#endif
-
-static void reply_ev(file_descriptor *desc, char response, ErlIOVec *ev) {
- char tmp[1];
- /* Data arriving at the Erlang process:
- * [Response, Binary0, Binary1, .... | BinaryN-1]
- */
- tmp[0] = response;
- driver_outputv(desc->port, tmp, sizeof(tmp), ev, 0);
-}
-
-static void reply_data(file_descriptor *desc,
- ErlDrvBinary *binp, size_t offset, size_t len) {
- char header[1+4+4];
- /* Data arriving at the Erlang process:
- * [?FILE_RESP_DATA, 64-bit length (big-endian) | Data]
- */
- header[0] = FILE_RESP_DATA;
-#if SIZEOF_SIZE_T == 4
- put_int32(0, header+1);
-#else
- put_int32(len>>32, header+1);
-#endif
- put_int32((Uint32)len, header+1+4);
- driver_output_binary(desc->port, header, sizeof(header),
- binp, offset, len);
-}
-
-static void reply_buf(file_descriptor *desc, char *buf, size_t len) {
- char header[1+4+4];
- /* Data arriving at the Erlang process:
- * [?FILE_RESP_DATA, 64-bit length (big-endian) | Data]
- */
- header[0] = FILE_RESP_DATA;
-#if SIZEOF_SIZE_T == 4
- put_int32(0, header+1);
-#else
- put_int32(len>>32, header+1);
-#endif
- put_int32((Uint32)len, header+1+4);
- driver_output2(desc->port, header, sizeof(header), buf, len);
-}
-
-static int reply_eof(file_descriptor *desc) {
- char c = FILE_RESP_EOF;
-
- driver_output2(desc->port, &c, 1, NULL, 0);
- return 0;
-}
-
-static void invoke_name(void *data, int (*f)(Efile_error *, char *))
-{
- struct t_data *d = (struct t_data *) data;
- char *name = (char *) d->b;
-
- d->again = 0;
- d->result_ok = (*f)(&d->errInfo, name);
-}
-
-static void invoke_mkdir(void *data)
-{
- DTRACE_INVOKE_SETUP_BY_NAME(FILE_MKDIR);
- invoke_name(data, efile_mkdir);
- DTRACE_INVOKE_RETURN(FILE_MKDIR);
-}
-
-static void invoke_rmdir(void *data)
-{
- DTRACE_INVOKE_SETUP_BY_NAME(FILE_RMDIR);
- invoke_name(data, efile_rmdir);
- DTRACE_INVOKE_RETURN(FILE_RMDIR);
-}
-
-static void invoke_delete_file(void *data)
-{
- DTRACE_INVOKE_SETUP_BY_NAME(FILE_DELETE);
- invoke_name(data, efile_delete_file);
- DTRACE_INVOKE_RETURN(FILE_DELETE);
-}
-
-static void invoke_chdir(void *data)
-{
- DTRACE_INVOKE_SETUP_BY_NAME(FILE_CHDIR);
- invoke_name(data, efile_chdir);
- DTRACE_INVOKE_RETURN(FILE_CHDIR);
-}
-
-static void invoke_fdatasync(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int fd = (int) d->fd;
- DTRACE_INVOKE_SETUP(FILE_FDATASYNC);
-
- d->again = 0;
- d->result_ok = efile_fdatasync(&d->errInfo, fd);
- DTRACE_INVOKE_RETURN(FILE_FDATASYNC);
-}
-
-static void invoke_fsync(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int fd = (int) d->fd;
- DTRACE_INVOKE_SETUP(FILE_FSYNC);
-
- d->again = 0;
- d->result_ok = efile_fsync(&d->errInfo, fd);
- DTRACE_INVOKE_RETURN(FILE_FSYNC);
-}
-
-static void invoke_truncate(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int fd = (int) d->fd;
- DTRACE_INVOKE_SETUP(FILE_TRUNCATE);
-
- d->again = 0;
- d->result_ok = efile_truncate_file(&d->errInfo, &fd, d->flags);
- DTRACE_INVOKE_RETURN(FILE_TRUNCATE);
-}
-
-static void invoke_read(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int status, segment;
- size_t size, read_size;
- DTRACE_INVOKE_SETUP(FILE_READ);
-
- segment = d->again && d->c.read.bin_size >= 2*FILE_SEGMENT_READ;
- if (segment) {
- size = FILE_SEGMENT_READ;
- } else {
- size = d->c.read.bin_size;
- }
- read_size = size;
- if (d->flags & EFILE_COMPRESSED) {
- read_size = erts_gzread((ErtsGzFile)d->fd,
- d->c.read.binp->orig_bytes + d->c.read.bin_offset,
- size);
- status = (read_size != (size_t) -1);
- if (!status) {
- d->errInfo.posix_errno = EIO;
- }
- } else {
- status = efile_read(&d->errInfo, d->flags, (int) d->fd,
- d->c.read.binp->orig_bytes + d->c.read.bin_offset,
- size,
- &read_size);
- }
- if ( (d->result_ok = status)) {
- ASSERT(read_size <= size);
- d->c.read.bin_offset += read_size;
- if (read_size < size || !segment) {
- d->c.read.bin_size = 0;
- d->again = 0;
- } else {
- d->c.read.bin_size -= read_size;
- }
- } else {
- d->again = 0;
- }
- DTRACE_INVOKE_RETURN(FILE_READ);
-}
-
-static void free_read(void *data)
-{
- struct t_data *d = (struct t_data *) data;
-
- driver_free_binary(d->c.read.binp);
- EF_FREE(d);
-}
-
-static void invoke_read_line(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int status;
- size_t read_size = 0;
- int local_loop = (d->again == 0);
- DTRACE_INVOKE_SETUP(FILE_READ_LINE);
-
- do {
- size_t size = (d->c.read_line.binp)->orig_size -
- d->c.read_line.read_offset - d->c.read_line.read_size;
- if (size == 0) {
- /* Need more place */
- ErlDrvSizeT need = (d->c.read_line.read_size >= DEFAULT_LINEBUF_SIZE) ?
- d->c.read_line.read_size + DEFAULT_LINEBUF_SIZE : DEFAULT_LINEBUF_SIZE;
- ErlDrvBinary *newbin;
-#if !ALWAYS_READ_LINE_AHEAD
- /* Use read_ahead size if need does not exceed it */
- if (need < (d->c.read_line.binp)->orig_size &&
- d->c.read_line.read_ahead)
- need = (d->c.read_line.binp)->orig_size;
-#endif
- newbin = driver_alloc_binary(need);
- if (newbin == NULL) {
- d->result_ok = 0;
- d->errInfo.posix_errno = ENOMEM;
- d->again = 0;
- break;
- }
- memcpy(newbin->orig_bytes, (d->c.read_line.binp)->orig_bytes + d->c.read_line.read_offset,
- d->c.read_line.read_size);
- driver_free_binary(d->c.read_line.binp);
- d->c.read_line.binp = newbin;
- d->c.read_line.read_offset = 0;
- size = need - d->c.read_line.read_size;
- }
- if (d->flags & EFILE_COMPRESSED) {
- read_size = erts_gzread((ErtsGzFile)d->fd,
- d->c.read_line.binp->orig_bytes +
- d->c.read_line.read_offset + d->c.read_line.read_size,
- size);
- status = (read_size != (size_t) -1);
- if (!status) {
- d->errInfo.posix_errno = EIO;
- }
- } else {
- status = efile_read(&d->errInfo, d->flags, (int) d->fd,
- d->c.read_line.binp->orig_bytes +
- d->c.read_line.read_offset + d->c.read_line.read_size,
- size,
- &read_size);
- }
- if ( (d->result_ok = status)) {
- void *nl_ptr = memchr((d->c.read_line.binp)->orig_bytes +
- d->c.read_line.read_offset + d->c.read_line.read_size,'\n',read_size);
- ASSERT(read_size <= size);
- d->c.read_line.read_size += read_size;
- if (nl_ptr != NULL) {
- /* If found, we're done */
- d->c.read_line.nl_pos = ((char *) nl_ptr) -
- ((char *) ((d->c.read_line.binp)->orig_bytes)) + 1;
- if (d->c.read_line.nl_pos > 1 &&
- *(((char *) nl_ptr) - 1) == '\r') {
- --d->c.read_line.nl_pos;
- *(((char *) nl_ptr) - 1) = '\n';
- d->c.read_line.nl_skip = 1;
- } else {
- d->c.read_line.nl_skip = 0;
- }
- d->again = 0;
-#if !ALWAYS_READ_LINE_AHEAD
- if (!(d->c.read_line.read_ahead)) {
- /* Ouch! Undo buffering... */
- size_t too_much = d->c.read_line.read_size - d->c.read_line.nl_skip -
- (d->c.read_line.nl_pos - d->c.read_line.read_offset);
- d->c.read_line.read_size -= too_much;
- ASSERT(d->c.read_line.read_size >= 0);
- if (d->flags & EFILE_COMPRESSED) {
- Sint64 location = erts_gzseek((ErtsGzFile)d->fd,
- -((Sint64) too_much), EFILE_SEEK_CUR);
- if (location == -1) {
- d->result_ok = 0;
- d->errInfo.posix_errno = errno;
- }
- } else {
- Sint64 location;
- d->result_ok = efile_seek(&d->errInfo, (int) d->fd,
- -((Sint64) too_much), EFILE_SEEK_CUR,
- &location);
- }
- }
-#endif
- break;
- } else if (read_size == 0) {
- d->c.read_line.nl_pos =
- d->c.read_line.read_offset + d->c.read_line.read_size;
- d->c.read_line.nl_skip = 0;
- d->again = 0;
- break;
- }
- } else {
- d->again = 0;
- break;
- }
- } while (local_loop);
- DTRACE_INVOKE_RETURN(FILE_READ_LINE);
-}
-
-static void free_read_line(void *data)
-{
- struct t_data *d = (struct t_data *) data;
-
- driver_free_binary(d->c.read_line.binp);
- EF_FREE(d);
-}
-
-static void invoke_read_file(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- size_t read_size;
- int chop;
- DTRACE_INVOKE_SETUP(FILE_READ_FILE);
-
- if (! d->c.read_file.binp) { /* First invocation only */
- int fd;
- Sint64 size;
-
- if (! (d->result_ok =
- efile_openfile(&d->errInfo, d->b,
- EFILE_MODE_READ, &fd, &size))) {
- goto done;
- }
- d->fd = fd;
- d->c.read_file.size = (int) size;
- if (size < 0 || size != d->c.read_file.size ||
- ! (d->c.read_file.binp =
- driver_alloc_binary(d->c.read_file.size))) {
- d->result_ok = 0;
- d->errInfo.posix_errno = ENOMEM;
- goto close;
- }
- d->c.read_file.offset = 0;
- }
- /* Invariant: d->c.read_file.size >= d->c.read_file.offset */
-
- read_size = (size_t) (d->c.read_file.size - d->c.read_file.offset);
- if (! read_size) goto close;
- chop = d->again && read_size >= FILE_SEGMENT_READ*2;
- if (chop) read_size = FILE_SEGMENT_READ;
- d->result_ok =
- efile_read(&d->errInfo,
- EFILE_MODE_READ,
- (int) d->fd,
- d->c.read_file.binp->orig_bytes + d->c.read_file.offset,
- read_size,
- &read_size);
- if (d->result_ok) {
- d->c.read_file.offset += read_size;
- if (chop) goto chop_done; /* again */
- }
- close:
- efile_closefile((int) d->fd);
- done:
- d->again = 0;
- chop_done:
- DTRACE_INVOKE_RETURN(FILE_READ_FILE);
-}
-
-static void free_read_file(void *data)
-{
- struct t_data *d = (struct t_data *) data;
-
- if (d->c.read_file.binp) driver_free_binary(d->c.read_file.binp);
- EF_FREE(d);
-}
-
-
-
-static void invoke_preadv(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- struct t_preadv *c = &d->c.preadv;
- ErlIOVec *ev = &c->eiov;
- size_t bytes_read_so_far = 0;
- unsigned char *p = (unsigned char *)ev->iov[0].iov_base + 4+4+8*c->cnt;
- DTRACE_INVOKE_SETUP(FILE_PREADV);
-
- while (c->cnt < c->n) {
- size_t read_size = ev->iov[1 + c->cnt].iov_len - c->size;
- size_t bytes_read = 0;
- int chop = d->again
- && bytes_read_so_far + read_size >= 2*FILE_SEGMENT_READ;
- if (chop) {
- ASSERT(bytes_read_so_far < FILE_SEGMENT_READ);
- read_size = FILE_SEGMENT_READ + FILE_SEGMENT_READ/2
- - bytes_read_so_far;
- }
- if ( (d->result_ok
- = efile_pread(&d->errInfo,
- (int) d->fd,
- c->offsets[c->cnt] + c->size,
- ((char *)ev->iov[1 + c->cnt].iov_base) + c->size,
- read_size,
- &bytes_read))) {
- bytes_read_so_far += bytes_read;
- if (chop && bytes_read == read_size) {
- c->size += bytes_read;
- goto done;
- }
- ASSERT(bytes_read <= read_size);
- ev->iov[1 + c->cnt].iov_len = bytes_read + c->size;
- ev->size += bytes_read + c->size;
- put_int64(bytes_read + c->size, p); p += 8;
- c->size = 0;
- c->cnt++;
- if (d->again
- && bytes_read_so_far >= FILE_SEGMENT_READ
- && c->cnt < c->n) {
- goto done;
- }
- } else {
- /* In case of a read error, ev->size will not be correct,
- * which does not matter since no read data is returned
- * to Erlang.
- */
- break;
- }
- }
- d->again = 0;
- done:
- DTRACE_INVOKE_RETURN(FILE_PREADV);
-}
-
-static void free_preadv(void *data) {
- struct t_data *d = data;
- int i;
- ErlIOVec *ev = &d->c.preadv.eiov;
-
- for(i = 0; i < ev->vsize; i++) {
- driver_free_binary(ev->binv[i]);
- }
- EF_FREE(d);
-}
-
-static void invoke_ipread(void *data)
-{
- struct t_data *d = data;
- struct t_preadv *c = &d->c.preadv;
- ErlIOVec *ev = &c->eiov;
- size_t bytes_read = 0;
- char buf[2*sizeof(Uint32)];
- Uint32 offset, size;
- DTRACE_INVOKE_SETUP(FILE_IPREAD);
-
- /* Read indirection header */
- if (! efile_pread(&d->errInfo, (int) d->fd, c->offsets[0],
- buf, sizeof(buf), &bytes_read)) {
- goto error;
- }
- if (bytes_read != sizeof(buf)) goto done; /* eof */
- size = get_int32(buf);
- offset = get_int32(buf+4);
- if (size > c->size) goto done; /* eof */
- c->n = 1;
- c->cnt = 0;
- c->size = 0;
- c->offsets[0] = offset;
- if (! (ev->binv[0] = driver_alloc_binary(3*8))) {
- d->errInfo.posix_errno = ENOMEM;
- goto error;
- }
- ev->vsize = 1;
- ev->iov[0].iov_len = 3*8;
- ev->iov[0].iov_base = ev->binv[0]->orig_bytes;
- ev->size = ev->iov[0].iov_len;
- put_int64(offset, ev->iov[0].iov_base);
- put_int64(size, ((char *)ev->iov[0].iov_base) + 2*8);
- if (size == 0) {
- put_int64(size, ((char *)ev->iov[0].iov_base) + 8);
- goto done;
- }
- if (! (ev->binv[1] = driver_alloc_binary(size))) {
- d->errInfo.posix_errno = ENOMEM;
- goto error;
- }
- ev->vsize = 2;
- ev->iov[1].iov_len = size;
- ev->iov[1].iov_base = ev->binv[1]->orig_bytes;
- /* Read data block */
- d->invoke = invoke_preadv;
- invoke_preadv(data);
- DTRACE_INVOKE_RETURN(FILE_IPREAD);
- return;
- error:
- d->result_ok = 0;
- d->again = 0;
- DTRACE_INVOKE_RETURN(FILE_IPREAD);
- return;
- done:
- d->result_ok = !0;
- d->again = 0;
- DTRACE_INVOKE_RETURN(FILE_IPREAD);
-}
-
-/* invoke_writev and invoke_pwritev are the only thread functions that
- * access non-thread data i.e the port queue and a mutex in the port
- * structure that is used to lock the port queue.
- *
- * The port will normally not be terminated until the port queue is
- * empty, but if the port is killed, i.e., exit(Port, kill) is called,
- * it will terminate regardless of the port queue state. When the
- * port is invalid driver_peekq() returns NULL and set the size to -1,
- * and driver_sizeq() returns -1.
- */
-
-static void invoke_writev(void *data) {
- struct t_data *d = (struct t_data *) data;
- SysIOVec *iov0;
- SysIOVec *iov;
- int iovlen;
- int iovcnt;
- size_t size;
- size_t p;
- int segment;
- DTRACE_INVOKE_SETUP(FILE_WRITE);
-
- segment = d->again && d->c.writev.size >= 2*FILE_SEGMENT_WRITE;
- if (segment) {
- size = FILE_SEGMENT_WRITE;
- } else {
- size = d->c.writev.size;
- }
-
- /* Copy the io vector to avoid locking the port que while writing,
- * also, both we and efile_writev might/will change the SysIOVec
- * when segmenting or due to partial write and we do not want to
- * tamper with the actual queue that we get from driver_peekq
- */
- MUTEX_LOCK(d->c.writev.q_mtx); /* Lock before accessing the port queue */
- iov0 = driver_peekq(d->c.writev.port, &iovlen);
-
- /* Calculate iovcnt */
- for (p = 0, iovcnt = 0;
- p < size && iovcnt < iovlen;
- p += iov0[iovcnt++].iov_len)
- ;
- iov = EF_SAFE_ALLOC(sizeof(SysIOVec)*iovcnt);
- memcpy(iov,iov0,iovcnt*sizeof(SysIOVec));
- MUTEX_UNLOCK(d->c.writev.q_mtx);
- /* Let go of lock until we deque from original vector */
-
- if (iovlen > 0) {
- ASSERT(iov[iovcnt-1].iov_len > p - size);
- iov[iovcnt-1].iov_len -= p - size;
- if (d->flags & EFILE_COMPRESSED) {
- int i, status = 1;
- for (i = 0; i < iovcnt; i++) {
- if (iov[i].iov_base && iov[i].iov_len > 0) {
- /* Just in case, I do not know what gzwrite does
- * with errno.
- */
- errno = EINVAL;
- status = erts_gzwrite((ErtsGzFile)d->fd,
- iov[i].iov_base,
- iov[i].iov_len) == iov[i].iov_len;
- if (! status) {
- d->errInfo.posix_errno =
- d->errInfo.os_errno = errno; /* XXX Correct? */
- break;
- }
- }
- }
- d->result_ok = status;
- } else {
- d->result_ok = efile_writev(&d->errInfo,
- d->flags, (int) d->fd,
- iov, iovcnt);
- }
- } else if (iovlen == 0) {
- d->result_ok = 1;
- }
- else { /* Port has terminated */
- d->result_ok = 0;
- d->errInfo.posix_errno = d->errInfo.os_errno = EINVAL;
- }
- EF_FREE(iov);
-
- if (! d->result_ok) {
- d->again = 0;
- MUTEX_LOCK(d->c.writev.q_mtx);
- driver_deq(d->c.writev.port, d->c.writev.size);
- MUTEX_UNLOCK(d->c.writev.q_mtx);
- } else {
- if (! segment) {
- d->again = 0;
- }
- d->c.writev.size -= size;
- TRACE_F(("w%lu", (unsigned long)size));
- MUTEX_LOCK(d->c.writev.q_mtx);
- driver_deq(d->c.writev.port, size);
- MUTEX_UNLOCK(d->c.writev.q_mtx);
- }
-
-
- DTRACE_INVOKE_RETURN(FILE_WRITE);
-}
-
-static void invoke_pwd(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- DTRACE_INVOKE_SETUP(FILE_PWD);
-
- d->again = 0;
- d->result_ok = efile_getdcwd(&d->errInfo,d->drive, d->b+1,
- RESBUFSIZE-1);
- DTRACE_INVOKE_RETURN(FILE_PWD);
-}
-
-static void invoke_readlink(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- char resbuf[RESBUFSIZE]; /* Result buffer. */
- DTRACE_INVOKE_SETUP(FILE_READLINK);
-
- d->again = 0;
- d->result_ok = efile_readlink(&d->errInfo, d->b, resbuf+1,
- RESBUFSIZE-1);
- if (d->result_ok != 0)
- FILENAME_COPY((char *) d->b + 1, resbuf+1);
- DTRACE_INVOKE_RETURN(FILE_READLINK);
-}
-
-static void invoke_altname(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- char resbuf[RESBUFSIZE]; /* Result buffer. */
- DTRACE_INVOKE_SETUP(FILE_ALTNAME);
-
- d->again = 0;
- d->result_ok = efile_altname(&d->errInfo, d->b, resbuf+1,
- RESBUFSIZE-1);
- if (d->result_ok != 0)
- FILENAME_COPY((char *) d->b + 1, resbuf+1);
- DTRACE_INVOKE_RETURN(FILE_ALTNAME);
-}
-
-static void invoke_pwritev(void *data) {
- struct t_data* const d = (struct t_data *) data;
- struct t_pwritev * const c = &d->c.pwritev;
- SysIOVec *iov0;
- SysIOVec *iov;
- int iovlen;
- int iovcnt;
- size_t p;
- int segment;
- size_t size, write_size, written;
- DTRACE_INVOKE_SETUP(FILE_PWRITEV);
-
- segment = d->again && c->size >= 2*FILE_SEGMENT_WRITE;
- if (segment) {
- size = FILE_SEGMENT_WRITE;
- } else {
- size = c->size;
- }
- d->result_ok = !0;
- p = 0;
- /* Lock the queue just for a while, we don't want it locked during write */
- MUTEX_LOCK(c->q_mtx);
- iov0 = driver_peekq(c->port, &iovlen);
- iov = EF_SAFE_ALLOC(sizeof(SysIOVec)*iovlen);
- memcpy(iov,iov0,sizeof(SysIOVec)*iovlen);
- MUTEX_UNLOCK(c->q_mtx);
-
- if (iovlen < 0)
- goto error; /* Port terminated */
- for (iovcnt = 0, written = 0;
- c->cnt < c->n && iovcnt < iovlen && written < size;
- c->cnt++) {
- int chop;
- write_size = c->specs[c->cnt].size;
- if (iov[iovcnt].iov_len - p < write_size) {
- goto error;
- }
- chop = segment && written + write_size >= 2*FILE_SEGMENT_WRITE;
- if (chop) {
- ASSERT(written < FILE_SEGMENT_WRITE);
- write_size = FILE_SEGMENT_WRITE + FILE_SEGMENT_WRITE/2
- - written;
- }
- d->result_ok = efile_pwrite(&d->errInfo, (int) d->fd,
- (char *)(iov[iovcnt].iov_base) + p,
- write_size,
- c->specs[c->cnt].offset);
- if (! d->result_ok) {
- d->again = 0;
- goto deq_error;
- }
- written += write_size;
- c->size -= write_size;
- if (chop) {
- c->specs[c->cnt].offset += write_size;
- c->specs[c->cnt].size -= write_size;
- /* Schedule out (d->again != 0) */
- break;
- }
- /* Move forward in buffer */
- p += write_size;
- ASSERT(iov[iovcnt].iov_len >= p);
- if (iov[iovcnt].iov_len == p) {
- /* Move to next iov[], we trust that it is not a
- * zero length vector, and thereby depend on that
- * such are not queued.
- */
- iovcnt++; p = 0;
- }
- }
- if (! segment) {
- if (c->cnt != c->n) {
- /* Mismatch between number of
- * pos/size specs vs number of queued buffers .
- */
- error:
- d->errInfo.posix_errno = EINVAL;
- d->result_ok = 0;
- d->again = 0;
- deq_error:
- MUTEX_LOCK(c->q_mtx);
- driver_deq(c->port, c->size);
- MUTEX_UNLOCK(c->q_mtx);
-
- goto done;
- } else {
- ASSERT(written == size);
- d->again = 0;
- }
- } else {
- ASSERT(written >= FILE_SEGMENT_WRITE);
- }
-
- MUTEX_LOCK(c->q_mtx);
- driver_deq(c->port, written);
- MUTEX_UNLOCK(c->q_mtx);
- done:
- EF_FREE(iov); /* Free our copy of the vector, nothing to restore */
-
- DTRACE_INVOKE_RETURN(FILE_PWRITEV);
-}
-
-static void invoke_flstat(void *data)
-{
- struct t_data *d = (struct t_data *) data;
-
- DTRACE3(efile_drv_int_entry, d->sched_i1, d->sched_i2,
- d->command == FILE_LSTAT ? FILE_LSTAT : FILE_FSTAT);
- d->again = 0;
- d->result_ok = efile_fileinfo(&d->errInfo, &d->info,
- d->b, d->command == FILE_LSTAT);
- DTRACE3(efile_drv_int_entry, d->sched_i1, d->sched_i2,
- d->command == FILE_LSTAT ? FILE_LSTAT : FILE_FSTAT);
- gcc_optimizer_hack++;
-}
-
-static void invoke_link(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- char *name = d->b;
- char *new_name;
- DTRACE_INVOKE_SETUP(FILE_LINK);
-
- d->again = 0;
- new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE;
- d->result_ok = efile_link(&d->errInfo, name, new_name);
- DTRACE_INVOKE_RETURN(FILE_LINK);
-}
-
-static void invoke_symlink(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- char *name = d->b;
- char *new_name;
- DTRACE_INVOKE_SETUP(FILE_SYMLINK);
-
- d->again = 0;
- new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE;
- d->result_ok = efile_symlink(&d->errInfo, name, new_name);
- DTRACE_INVOKE_RETURN(FILE_SYMLINK);
-}
-
-static void invoke_rename(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- char *name = d->b;
- char *new_name;
- DTRACE_INVOKE_SETUP(FILE_RENAME);
-
- d->again = 0;
- new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE;
- d->result_ok = efile_rename(&d->errInfo, name, new_name);
- DTRACE_INVOKE_RETURN(FILE_RENAME);
-}
-
-static void invoke_write_info(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- DTRACE_INVOKE_SETUP(FILE_WRITE_INFO);
-
- d->again = 0;
- d->result_ok = efile_write_info(&d->errInfo, &d->info, d->b);
- DTRACE_INVOKE_RETURN(FILE_WRITE_INFO);
-}
-
-static void invoke_lseek(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int status;
- DTRACE_INVOKE_SETUP(FILE_LSEEK);
-
- d->again = 0;
- if (d->flags & EFILE_COMPRESSED) {
- int offset = (int) d->c.lseek.offset;
-
- if (offset != d->c.lseek.offset) {
- d->errInfo.posix_errno = EINVAL;
- status = 0;
- } else {
- d->c.lseek.location = erts_gzseek((ErtsGzFile)d->fd,
- offset, d->c.lseek.origin);
- if (d->c.lseek.location == -1) {
- d->errInfo.posix_errno = errno;
- status = 0;
- } else {
- status = 1;
- }
- }
- } else {
- status = efile_seek(&d->errInfo, (int) d->fd,
- d->c.lseek.offset, d->c.lseek.origin,
- &d->c.lseek.location);
- }
- d->result_ok = status;
- DTRACE_INVOKE_RETURN(FILE_LSEEK);
-}
-
-static void invoke_readdir(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- char *p = NULL;
- size_t file_bs;
- size_t n = 0, total = 0;
- struct t_readdir_buf *b = NULL;
- int res = 0;
- DTRACE_INVOKE_SETUP(FILE_READDIR);
-
- d->again = 0;
- d->errInfo.posix_errno = 0;
-
- do {
- total = READDIR_BUFSIZE;
- n = 1;
- b = EF_SAFE_ALLOC(sizeof(struct t_readdir_buf));
- b->next = NULL;
-
- if (d->c.read_dir.last_buf) {
- d->c.read_dir.last_buf->next = b;
- } else {
- d->c.read_dir.first_buf = b;
- }
- d->c.read_dir.last_buf = b;
-
- p = &b->buf[0];
- p[0] = FILE_RESP_LFNAME;
- file_bs = READDIR_BUFSIZE - n;
-
- do {
- res = efile_readdir(&d->errInfo, d->b, &d->dir_handle, p + n + 2, &file_bs);
-
- if (res) {
- put_int16((Uint16)file_bs, p + n);
- n += 2 + file_bs;
- file_bs = READDIR_BUFSIZE - n;
- }
- } while( res && ((total - n - 2) >= MAXPATHLEN*FILENAME_CHARSIZE));
-
- b->n = n;
- } while(res);
-
- d->result_ok = (d->errInfo.posix_errno == 0);
- DTRACE_INVOKE_RETURN(FILE_READDIR);
-}
-
-static void invoke_open(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int status = 1; /* Status of open call. */
- DTRACE_INVOKE_SETUP(FILE_OPEN);
-
- d->again = 0;
- if ((d->flags & EFILE_COMPRESSED) == 0) {
- int fd;
- status = efile_openfile(&d->errInfo, d->b, d->flags, &fd, NULL);
- d->fd = fd;
- } else {
- char* mode = NULL;
-
- if (((d->flags & (EFILE_MODE_READ_WRITE)) == EFILE_MODE_READ_WRITE) ||
- (d->flags & EFILE_MODE_APPEND)) {
- status = 0;
- d->errInfo.posix_errno = EINVAL;
- } else {
- status = efile_may_openfile(&d->errInfo, d->b);
- if (status || (d->errInfo.posix_errno != EISDIR)) {
- mode = (d->flags & EFILE_MODE_READ) ? "rb" : "wb";
- d->fd = (SWord) erts_gzopen(d->b, mode);
- if ((ErtsGzFile)d->fd) {
- status = 1;
- } else {
- if (errno == 0) {
- errno = ENOMEM;
- }
- d->errInfo.posix_errno = errno;
- status = 0;
- }
- }
- }
- }
-
- d->result_ok = status;
- if (!status) {
- d->fd = FILE_FD_INVALID;
- }
- DTRACE_INVOKE_RETURN(FILE_OPEN);
-}
-
-static void invoke_fadvise(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int fd = (int) d->fd;
- off_t offset = (off_t) d->c.fadvise.offset;
- off_t length = (off_t) d->c.fadvise.length;
- int advise = (int) d->c.fadvise.advise;
- DTRACE_INVOKE_SETUP(FILE_FADVISE);
-
- d->again = 0;
- d->result_ok = efile_fadvise(&d->errInfo, fd, offset, length, advise);
- DTRACE_INVOKE_RETURN(FILE_FADVISE);
-}
-
-#ifdef HAVE_SENDFILE
-static void invoke_sendfile(void *data)
-{
- struct t_data *d = (struct t_data *)data;
- int fd = d->fd;
- int out_fd = (int)d->c.sendfile.out_fd;
- Uint64 nbytes = d->c.sendfile.nbytes;
- int result = 0;
- d->again = 0;
-
- result = efile_sendfile(&d->errInfo, fd, out_fd, &d->c.sendfile.offset, &nbytes, NULL);
-
- d->c.sendfile.written += nbytes;
-
- if (result == 1 || (result == 0 && USE_THRDS_FOR_SENDFILE(d))) {
- d->result_ok = 0;
- } else if (result == 0 && (d->errInfo.posix_errno == EAGAIN
- || d->errInfo.posix_errno == EINTR)) {
- if ((d->c.sendfile.nbytes - nbytes) != 0) {
- d->result_ok = 1;
- if (d->c.sendfile.nbytes != 0)
- d->c.sendfile.nbytes -= nbytes;
- } else if (nbytes == 0 && d->c.sendfile.nbytes == 0) {
- d->result_ok = 1;
- } else
- d->result_ok = 0;
- } else {
- d->result_ok = -1;
- }
-}
-
-static void free_sendfile(void *data) {
- struct t_data *d = (struct t_data *)data;
- if (USE_THRDS_FOR_SENDFILE(d)) {
- SET_NONBLOCKING(d->c.sendfile.out_fd);
- } else {
- MUTEX_LOCK(d->c.sendfile.q_mtx);
- driver_deq(d->c.sendfile.port,1);
- MUTEX_UNLOCK(d->c.sendfile.q_mtx);
- driver_select(d->c.sendfile.port, (ErlDrvEvent)(long)d->c.sendfile.out_fd, ERL_DRV_USE_NO_CALLBACK|ERL_DRV_WRITE, 0);
- }
- EF_FREE(data);
-}
-
-static void file_ready_output(ErlDrvData data, ErlDrvEvent event)
-{
- file_descriptor* fd = (file_descriptor*) data;
-
- switch (fd->d->command) {
- case FILE_SENDFILE:
- driver_select(fd->d->c.sendfile.port, event,
- (int)ERL_DRV_WRITE,(int) 0);
- invoke_sendfile((void *)fd->d);
- file_async_ready(data, (ErlDrvThreadData)fd->d);
- break;
- default:
- break;
- }
-}
-
-static void file_stop_select(ErlDrvEvent event, void* _)
-{
-
-}
-
-static int flush_sendfile(file_descriptor *desc,void *_) {
- if (desc->sendfile_state == sending) {
- desc->d->result_ok = -1;
- desc->d->errInfo.posix_errno = ECONNABORTED;
- file_async_ready((ErlDrvData)desc,(ErlDrvThreadData)desc->d);
- }
- return 1;
-}
-#endif /* HAVE_SENDFILE */
-
-
-static void invoke_fallocate(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- int fd = (int) d->fd;
- Sint64 offset = d->c.fallocate.offset;
- Sint64 length = d->c.fallocate.length;
-
- d->again = 0;
- d->result_ok = efile_fallocate(&d->errInfo, fd, offset, length);
-}
-
-static void free_readdir(void *data)
-{
- struct t_data *d = (struct t_data *) data;
- struct t_readdir_buf *b1 = d->c.read_dir.first_buf;
-
- while (b1) {
- struct t_readdir_buf *b2 = b1;
- b1 = b1->next;
- EF_FREE(b2);
- }
- EF_FREE(d);
-}
-
-
-
-static void try_free_read_bin(file_descriptor *desc) {
- if ((desc->read_size == 0)
- && (desc->read_offset >= desc->read_binp->orig_size)) {
- ASSERT(desc->read_offset == desc->read_binp->orig_size);
- driver_free_binary(desc->read_binp);
- desc->read_binp = NULL;
- desc->read_offset = 0;
- desc->read_size = 0;
- }
-}
-
-
-
-static int try_again(file_descriptor *desc, struct t_data *d) {
- if (! d->again)
- return 0;
- if (desc->timer_state != timer_idle) {
- driver_cancel_timer(desc->port);
- }
- desc->timer_state = timer_again;
- desc->invoke = d->invoke;
- desc->d = d;
- desc->free = d->free;
- driver_set_timer(desc->port, 0L);
- return !0;
-}
-
-
-
-static void cq_execute(file_descriptor *desc) {
- struct t_data *d;
- register void *void_ptr; /* Soft cast variable */
- if (desc->timer_state == timer_again)
- return;
-#ifdef HAVE_SENDFILE
- if (desc->sendfile_state == sending)
- return;
-#endif
- if (! (d = cq_deq(desc)))
- return;
- TRACE_F(("x%i", (int) d->command));
- d->again = sys_info.async_threads == 0;
- DRIVER_ASYNC(d->level, desc, d->invoke, void_ptr=d, d->free);
-}
-
-static struct t_data *async_write(file_descriptor *desc, int *errp,
- int reply, Uint32 reply_size
-#ifdef USE_VM_PROBES
- ,Sint64 *dt_i1, Sint64 *dt_i2, Sint64 *dt_i3
-#endif
-) {
- struct t_data *d;
- if (! (d = EF_ALLOC(sizeof(struct t_data) - 1))) {
- if (errp) *errp = ENOMEM;
- return NULL;
- }
- TRACE_F(("w%lu", (unsigned long)desc->write_buffered));
- d->command = FILE_WRITE;
- d->fd = desc->fd;
- d->flags = desc->flags;
- d->c.writev.port = desc->port;
- d->c.writev.q_mtx = desc->q_mtx;
- d->c.writev.size = desc->write_buffered;
-#ifdef USE_VM_PROBES
- if (dt_i1 != NULL) {
- *dt_i1 = d->fd;
- *dt_i2 = d->flags;
- *dt_i3 = d->c.writev.size;
- }
-#endif
- d->reply = reply;
- d->c.writev.reply_size = reply_size;
- d->invoke = invoke_writev;
- d->free = free_data;
- d->level = 1;
- cq_enq(desc, d);
- desc->write_buffered = 0;
- return d;
-}
-
-static int flush_write(file_descriptor *desc, int *errp
-#ifdef USE_VM_PROBES
- , dt_private *dt_priv, char *dt_utag
-#endif
-) {
- int result = 0;
-#ifdef USE_VM_PROBES
- Sint64 dt_i1 = 0, dt_i2 = 0, dt_i3 = 0;
-#endif
- struct t_data *d = NULL;
-
- MUTEX_LOCK(desc->q_mtx);
- if (desc->write_buffered > 0) {
- if ((d = async_write(desc, errp, 0, 0
-#ifdef USE_VM_PROBES
- ,&dt_i1, &dt_i2, &dt_i3
-#endif
- )) == NULL) {
- result = -1;
- }
- }
- MUTEX_UNLOCK(desc->q_mtx);
-#ifdef USE_VM_PROBES
- if (d != NULL) {
- d->sched_i1 = dt_priv->thread_num;
- d->sched_i2 = dt_priv->tag;
- d->sched_utag[0] = '\0';
- if (dt_utag != NULL) {
- if (dt_utag[0] == '\0') {
- dt_utag = NULL;
- } else {
- strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1);
- d->sched_utag[sizeof(d->sched_utag) - 1] = '\0';
- }
- }
- DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++,
- dt_utag, FILE_WRITE,
- NULL, NULL, dt_i1, dt_i2, dt_i3, 0, desc->port_str);
- }
-#endif /* USE_VM_PROBES */
- return result;
-}
-
-static int check_write_error(file_descriptor *desc, int *errp) {
- if (desc->write_error) {
- if (errp) *errp = desc->write_errInfo.posix_errno;
- desc->write_error = 0;
- return -1;
- }
- return 0;
-}
-
-static int flush_write_check_error(file_descriptor *desc, int *errp
-#ifdef USE_VM_PROBES
- , dt_private *dt_priv, char *dt_utag
-#endif
- ) {
- int r;
- if ( (r = flush_write(desc, errp
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- )) != 0) {
- check_write_error(desc, NULL);
- return r;
- } else {
- return check_write_error(desc, errp);
- }
-}
-
-static struct t_data *async_lseek(file_descriptor *desc, int *errp, int reply,
- Sint64 offset, int origin
-#ifdef USE_VM_PROBES
- , Sint64 *dt_i1, Sint64 *dt_i2, Sint64 *dt_i3
-#endif
- ) {
- struct t_data *d;
- if (! (d = EF_ALLOC(sizeof(struct t_data)))) {
- *errp = ENOMEM;
- return NULL;
- }
- d->flags = desc->flags;
- d->fd = desc->fd;
- d->command = FILE_LSEEK;
- d->reply = reply;
- d->c.lseek.offset = offset;
- d->c.lseek.origin = origin;
-#ifdef USE_VM_PROBES
- if (dt_i1 != NULL) {
- *dt_i1 = d->fd;
- *dt_i2 = d->c.lseek.offset;
- *dt_i3 = d->c.lseek.origin;
- }
-#endif
- d->invoke = invoke_lseek;
- d->free = free_data;
- d->level = 1;
- cq_enq(desc, d);
- return d;
-}
-
-static void flush_read(file_descriptor *desc) {
- desc->read_offset = 0;
- desc->read_size = 0;
- if (desc->read_binp) {
- driver_free_binary(desc->read_binp);
- desc->read_binp = NULL;
- }
-}
-
-static int lseek_flush_read(file_descriptor *desc, int *errp
-#ifdef USE_VM_PROBES
- ,dt_private *dt_priv, char *dt_utag
-#endif
- ) {
- int r = 0;
- size_t read_size = desc->read_size;
-#ifdef USE_VM_PROBES
- Sint64 dt_i1 = 0, dt_i2 = 0, dt_i3 = 0;
-#endif
- struct t_data *d;
-
- flush_read(desc);
- if (read_size != 0) {
- if ((d = async_lseek(desc, errp, 0,
- -((ssize_t)read_size), EFILE_SEEK_CUR
-#ifdef USE_VM_PROBES
- , &dt_i1, &dt_i2, &dt_i3
-#endif
- )) == NULL) {
- r = -1;
- } else {
-#ifdef USE_VM_PROBES
- d->sched_i1 = dt_priv->thread_num;
- d->sched_i2 = dt_priv->tag;
- d->sched_utag[0] = '\0';
- if (dt_utag != NULL) {
- if (dt_utag[0] == '\0') {
- dt_utag = NULL;
- } else {
- strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1);
- d->sched_utag[sizeof(d->sched_utag) - 1] = '\0';
- }
- }
- DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++,
- dt_utag, FILE_LSEEK,
- NULL, NULL, dt_i1, dt_i2, dt_i3, 0, desc->port_str);
-#endif /* USE_VM_PROBES */
- }
- }
- return r;
-}
-
-
-/*********************************************************************
- * Driver entry point -> stop
- * The close has to be scheduled on async thread, so that currently active
- * async operation does not suddenly have the ground disappearing under their feet...
- */
-static void
-file_stop(ErlDrvData e)
-{
- file_descriptor* desc = (file_descriptor*)e;
-
- TRACE_C('p');
-
- IF_THRDS {
- flush_read(desc);
- if (desc->fd != FILE_FD_INVALID) {
- struct t_data *d = EF_SAFE_ALLOC(sizeof(struct t_data));
- d->command = FILE_CLOSE_ON_PORT_EXIT;
- d->reply = !0;
- d->fd = desc->fd;
- d->flags = desc->flags;
- d->invoke = invoke_close;
- d->free = free_data;
- d->level = 2;
- d->data_to_free = (void *) desc;
- cq_enq(desc, d);
- desc->fd = FILE_FD_INVALID;
- desc->flags = 0;
- cq_execute(desc);
- } else {
- EF_FREE(desc);
- }
- } else {
- if (desc->fd != FILE_FD_INVALID) {
- do_close(desc->flags, desc->fd);
- desc->fd = FILE_FD_INVALID;
- desc->flags = 0;
- }
- if (desc->read_binp) {
- driver_free_binary(desc->read_binp);
- }
- EF_FREE(desc);
- }
-}
-
-/*********************************************************************
- * Driver entry point -> ready_async
- */
-static void
-file_async_ready(ErlDrvData e, ErlDrvThreadData data)
-{
- file_descriptor *desc = (file_descriptor*)e;
- struct t_data *d = (struct t_data *) data;
- char header[5]; /* result code + count */
- char resbuf[RESBUFSIZE]; /* Result buffer. */
-#ifdef USE_VM_PROBES
- int sched_i1 = d->sched_i1, sched_i2 = d->sched_i2, command = d->command,
- result_ok = d->result_ok,
- posix_errno = d->result_ok ? 0 : d->errInfo.posix_errno;
- DTRACE_CHARBUF(sched_utag, DTRACE_EFILE_BUFSIZ+1);
-
- sched_utag[0] = '\0';
- if (DTRACE_ENABLED(efile_drv_return)) {
- strncpy(sched_utag, d->sched_utag, DTRACE_EFILE_BUFSIZ);
- sched_utag[DTRACE_EFILE_BUFSIZ] = '\0';
- }
-#endif /* USE_VM_PROBES */
-
- TRACE_C('r');
-
- if (try_again(desc, d)) {
- /* DTRACE TODO: what kind of probe makes sense here? */
- return;
- }
-
- switch (d->command)
- {
- case FILE_READ:
- if (!d->result_ok) {
- reply_error(desc, &d->errInfo);
- } else {
- size_t available_bytes =
- d->c.read.bin_offset + d->c.read.bin_size - desc->read_offset;
- if (available_bytes < d->c.read.size) {
- d->c.read.size = available_bytes;
- }
- TRACE_C('D');
- reply_data(desc, d->c.read.binp,
- desc->read_offset, d->c.read.size);
- desc->read_offset += d->c.read.size;
- desc->read_size =
- d->c.read.bin_offset + d->c.read.bin_size - desc->read_offset;
- try_free_read_bin(desc);
- }
- free_read(data);
- break;
- case FILE_READ_LINE:
- /* The read_line stucture differs from the read structure.
- The data->read_offset and d->c.read_line.read_offset are copies, as are
- data->read_size and d->c.read_line.read_size
- The read_line function does not kniow in advance how large the binary has to be,
- why new allocation (but not reallocation of the old binary, for obvious reasons)
- may happen in the worker thread. */
- if (!d->result_ok) {
- reply_error(desc, &d->errInfo);
- } else {
- size_t len = d->c.read_line.nl_pos - d->c.read_line.read_offset;
- TRACE_C('L');
- reply_data(desc, d->c.read_line.binp,
- d->c.read_line.read_offset, len);
- desc->read_offset = d->c.read_line.read_offset + d->c.read_line.nl_skip + len;
- desc->read_size =
- d->c.read_line.read_size - d->c.read_line.nl_skip - len;
- if (desc->read_binp != d->c.read_line.binp) { /* New binary allocated */
- driver_free_binary(desc->read_binp);
- desc->read_binp = d->c.read_line.binp;
- driver_binary_inc_refc(desc->read_binp);
- }
-#if !ALWAYS_READ_LINE_AHEAD
- ASSERT(desc->read_bufsize > 0 || desc->read_size == 0);
- if (desc->read_bufsize == 0) {
- desc->read_offset = desc->read_binp->orig_size; /* triggers cleanup */
- }
-#endif
- try_free_read_bin(desc);
- }
- free_read_line(data);
- break;
- case FILE_READ_FILE:
- if (!d->result_ok)
- reply_error(desc, &d->errInfo);
- else {
- header[0] = FILE_RESP_ALL_DATA;
- TRACE_C('R');
- driver_output_binary(desc->port, header, 1,
- d->c.read_file.binp,
- 0, d->c.read_file.offset);
- }
- free_read_file(data);
- break;
- case FILE_WRITE:
- if (d->reply) {
- if (! d->result_ok) {
- reply_error(desc, &d->errInfo);
- } else {
- reply_Uint(desc, d->c.writev.reply_size);
- }
- } else {
- if (! d->result_ok) {
- desc->write_error = !0;
- desc->write_errInfo = d->errInfo;
- }
- }
- free_data(data);
- break;
- case FILE_LSEEK:
- if (d->reply) {
- if (d->result_ok)
- reply_Sint64(desc, d->c.lseek.location);
- else
- reply_error(desc, &d->errInfo);
- }
- free_data(data);
- break;
- case FILE_MKDIR:
- case FILE_RMDIR:
- case FILE_CHDIR:
- case FILE_DELETE:
- case FILE_FDATASYNC:
- case FILE_FSYNC:
- case FILE_TRUNCATE:
- case FILE_LINK:
- case FILE_SYMLINK:
- case FILE_RENAME:
- case FILE_WRITE_INFO:
- case FILE_FADVISE:
- case FILE_FALLOCATE:
- reply(desc, d->result_ok, &d->errInfo);
- free_data(data);
- break;
- case FILE_ALTNAME:
- case FILE_PWD:
- case FILE_READLINK:
- {
- int length;
- char *resbuf = d->b;
-
- if (!d->result_ok)
- reply_error(desc, &d->errInfo);
- else {
- resbuf[0] = FILE_RESP_FNAME;
- length = 1+FILENAME_BYTELEN((char*) resbuf+1);
- TRACE_C('R');
- driver_output2(desc->port, resbuf, 1, resbuf+1, length-1);
- }
- free_data(data);
- break;
- }
- case FILE_OPEN:
- if (!d->result_ok) {
- reply_error(desc, &d->errInfo);
- } else {
- ASSERT(d->is_fd_unused);
- desc->fd = d->fd;
- desc->flags = d->flags;
- d->is_fd_unused = 0;
- reply_Uint(desc, d->fd);
- }
- free_data(data);
- break;
- case FILE_FSTAT:
- case FILE_LSTAT:
- {
- if (d->result_ok) {
- resbuf[0] = FILE_RESP_INFO;
-
- put_int32(d->info.size_high, &resbuf[1 + ( 0 * 4)]);
- put_int32(d->info.size_low, &resbuf[1 + ( 1 * 4)]);
- put_int32(d->info.type, &resbuf[1 + ( 2 * 4)]);
-
- /* Note 64 bit indexing in resbuf here */
- put_int64(d->info.accessTime, &resbuf[1 + ( 3 * 4)]);
- put_int64(d->info.modifyTime, &resbuf[1 + ( 5 * 4)]);
- put_int64(d->info.cTime, &resbuf[1 + ( 7 * 4)]);
-
- put_int32(d->info.mode, &resbuf[1 + ( 9 * 4)]);
- put_int32(d->info.links, &resbuf[1 + (10 * 4)]);
- put_int32(d->info.major_device, &resbuf[1 + (11 * 4)]);
- put_int32(d->info.minor_device, &resbuf[1 + (12 * 4)]);
- put_int32(d->info.inode, &resbuf[1 + (13 * 4)]);
- put_int32(d->info.uid, &resbuf[1 + (14 * 4)]);
- put_int32(d->info.gid, &resbuf[1 + (15 * 4)]);
- put_int32(d->info.access, &resbuf[1 + (16 * 4)]);
-
-#define RESULT_SIZE (1 + (17 * 4))
- TRACE_C('R');
- driver_output2(desc->port, resbuf, RESULT_SIZE, NULL, 0);
-#undef RESULT_SIZE
- } else
- reply_error(desc, &d->errInfo);
- }
- free_data(data);
- break;
- case FILE_READDIR:
- if (!d->result_ok) {
- reply_error(desc, &d->errInfo);
- } else {
- struct t_readdir_buf *b1 = d->c.read_dir.first_buf;
- char op = FILE_RESP_LFNAME;
-
- TRACE_C('R');
- ASSERT(b1);
-
- while (b1) {
- struct t_readdir_buf *b2 = b1;
- char *p = &b1->buf[0];
- driver_output2(desc->port, p, 1, p + 1, b1->n - 1);
- b1 = b1->next;
- EF_FREE(b2);
- }
- driver_output2(desc->port, &op, 1, NULL, 0);
-
- d->c.read_dir.first_buf = NULL;
- d->c.read_dir.last_buf = NULL;
- }
- free_readdir(data);
- break;
- case FILE_CLOSE:
- if (d->reply) {
- TRACE_C('K');
- reply_ok(desc);
-#ifdef USE_VM_PROBES
- result_ok = 1;
-#endif
- }
- free_data(data);
- break;
- case FILE_PWRITEV:
- if (!d->result_ok) {
- reply_Uint_error(desc, d->c.pwritev.cnt, &d->errInfo);
- } else {
- reply_Uint(desc, d->c.pwritev.n);
- }
- free_data(data);
- break;
- case FILE_PREADV:
- if (!d->result_ok) {
- reply_error(desc, &d->errInfo);
- } else {
- reply_ev(desc, FILE_RESP_LDATA, &d->c.preadv.eiov);
- }
- free_preadv(data);
- break;
- case FILE_IPREAD:
- if (!d->result_ok) {
- reply_error(desc, &d->errInfo);
- } else if (!d->c.preadv.eiov.vsize) {
- reply_eof(desc);
- } else {
- reply_ev(desc, FILE_RESP_N2DATA, &d->c.preadv.eiov);
- }
- free_preadv(data);
- break;
-#ifdef HAVE_SENDFILE
- case FILE_SENDFILE:
- if (d->result_ok == -1) {
- if (d->errInfo.posix_errno == ECONNRESET ||
- d->errInfo.posix_errno == ENOTCONN ||
- d->errInfo.posix_errno == EPIPE)
- reply_string_error(desc,"closed");
- else
- reply_error(desc, &d->errInfo);
- desc->sendfile_state = not_sending;
- free_sendfile(data);
- } else if (d->result_ok == 0) {
- reply_Sint64(desc, d->c.sendfile.written);
- desc->sendfile_state = not_sending;
- free_sendfile(data);
- } else if (d->result_ok == 1) { /* If we are using select to send the rest of the data */
- desc->sendfile_state = sending;
- desc->d = d;
- driver_select(desc->port, (ErlDrvEvent)(long)d->c.sendfile.out_fd,
- ERL_DRV_USE_NO_CALLBACK|ERL_DRV_WRITE, 1);
- }
- break;
-#endif
- case FILE_CLOSE_ON_PORT_EXIT:
- /* See file_stop. However this is never invoked after the port is killed. */
- free_data(data);
- desc = NULL;
- /* This is it for this port, so just send dtrace and return, avoid doing anything to the freed data */
- DTRACE6(efile_drv_return, sched_i1, sched_i2, sched_utag,
- command, result_ok, posix_errno);
- return;
- default:
- abort();
- }
- DTRACE6(efile_drv_return, sched_i1, sched_i2, sched_utag,
- command, result_ok, posix_errno);
- if (desc->write_buffered != 0 && desc->timer_state == timer_idle ) {
- desc->timer_state = timer_write;
- driver_set_timer(desc->port, desc->write_delay);
- }
- cq_execute(desc);
-
-}
-
-
-/*********************************************************************
- * Driver entry point -> output
- */
-static void
-file_output(ErlDrvData e, char* buf, ErlDrvSizeT count)
-{
- file_descriptor* desc = (file_descriptor*)e;
- Efile_error errInfo; /* The error codes for the last operation. */
- Sint fd; /* The file descriptor for this port, if any,
- * -1 if none.
- */
- char* name; /* Points to the filename in buf. */
- int command;
- struct t_data *d = NULL;
-#ifdef USE_VM_PROBES
- char *dt_utag = NULL;
- char *dt_s1 = NULL, *dt_s2 = NULL;
- Sint64 dt_i1 = 0;
- Sint64 dt_i2 = 0;
- Sint64 dt_i3 = 0;
- Sint64 dt_i4 = 0;
- dt_private *dt_priv = get_dt_private(0);
-#endif /* USE_VM_PROBES */
-
- TRACE_C('o');
-
- fd = desc->fd;
- name = buf+1;
- command = *(uchar*)buf++;
-
- switch(command) {
-
- case FILE_MKDIR:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_mkdir;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
- case FILE_RMDIR:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_rmdir;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
- case FILE_DELETE:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_delete_file;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
- case FILE_RENAME:
- {
- char* new_name;
- int namelen = FILENAME_BYTELEN(name)+FILENAME_CHARSIZE;
- new_name = name+namelen;
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1
- + namelen
- + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
- FILENAME_COPY(d->b + namelen, new_name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_s2 = d->b + namelen;
- dt_utag = buf + namelen + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE;
-#endif
- d->flags = desc->flags;
- d->fd = fd;
- d->command = command;
- d->invoke = invoke_rename;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
- case FILE_CHDIR:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_chdir;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
- case FILE_PWD:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1);
-
- d->drive = *(uchar*)buf;
-#ifdef USE_VM_PROBES
- dt_utag = buf + 1;
-#endif
- d->command = command;
- d->invoke = invoke_pwd;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_READDIR:
-#ifdef USE_THREADS
- if (sys_info.async_threads > 0)
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) +
- FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-#endif
- d->dir_handle = NULL;
- d->command = command;
- d->invoke = invoke_readdir;
- d->free = free_readdir;
- d->level = 2;
- d->c.read_dir.first_buf = NULL;
- d->c.read_dir.last_buf = NULL;
- goto done;
- }
- else
-#endif
- {
- size_t resbufsize;
- size_t n = 0, total = 0;
- int res = 0;
- char resbuf[READDIR_BUFSIZE];
-
- EFILE_DIR_HANDLE dir_handle; /* Handle to open directory. */
-
- total = READDIR_BUFSIZE;
- errInfo.posix_errno = 0;
- dir_handle = NULL;
- resbuf[0] = FILE_RESP_LFNAME;
-
-#ifdef USE_VM_PROBES
- dt_s1 = name;
- dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-#endif
- /* Fill the buffer with multiple directory listings before sending it to the
- * receiving process. READDIR_CHUNKS is minimum number of files sent to the
- * receiver.
- * Format for each driver_output2:
- * ------------------------------------
- * | Type | Len | Filename | ...
- * | 1 byte | 2 bytes | Len bytes | ...
- * ------------------------------------
- */
-
- do {
- n = 1;
- resbufsize = READDIR_BUFSIZE - n;
-
- do {
- res = efile_readdir(&errInfo, name, &dir_handle, resbuf + n + 2, &resbufsize);
-
- if (res) {
- put_int16((Uint16)resbufsize, resbuf + n);
- n += 2 + resbufsize;
- resbufsize = READDIR_BUFSIZE - n;
- }
- } while( res && ((total - n - 2) >= MAXPATHLEN*FILENAME_CHARSIZE));
-
- if (n > 1) {
- driver_output2(desc->port, resbuf, 1, resbuf + 1, n - 1);
- }
- } while(res);
-
- if (errInfo.posix_errno != 0) {
- reply_error(desc, &errInfo);
- return;
- }
-#ifdef USE_VM_PROBES
- if (dt_utag != NULL && dt_utag[0] == '\0') {
- dt_utag = NULL;
- }
-
- DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag,
- dt_utag, command, name, dt_s2,
- dt_i1, dt_i2, dt_i3, dt_i4, desc->port_str);
- DTRACE6(efile_drv_return, dt_priv->thread_num, dt_priv->tag++,
- dt_utag, command, 1, 0);
-#endif
- TRACE_C('R');
- driver_output2(desc->port, resbuf, 1, NULL, 0);
- return;
- }
- case FILE_OPEN:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(buf+4) +
- FILENAME_CHARSIZE);
-
- d->flags = get_int32((uchar*)buf);
- name = buf+4;
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_i1 = d->flags;
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_open;
- d->free = free_data;
- d->level = 2;
- d->is_fd_unused = 1;
- goto done;
- }
-
- case FILE_FDATASYNC:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data));
-
- d->fd = fd;
-#ifdef USE_VM_PROBES
- dt_utag = name;
- dt_i1 = fd;
-#endif
- d->command = command;
- d->invoke = invoke_fdatasync;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_FSYNC:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data));
-
- d->fd = fd;
-#ifdef USE_VM_PROBES
- dt_utag = name;
- dt_i1 = fd;
-#endif
- d->command = command;
- d->invoke = invoke_fsync;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
-
- case FILE_FSTAT:
- case FILE_LSTAT:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) +
- FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
- d->fd = fd;
-#ifdef USE_VM_PROBES
- dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE;
- if (command == FILE_LSTAT) {
- dt_s1 = d->b;
- } else {
- dt_i1 = fd;
- }
-#endif
- d->command = command;
- d->invoke = invoke_flstat;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_TRUNCATE:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data));
-
- d->flags = desc->flags;
- d->fd = fd;
-#ifdef USE_VM_PROBES
- dt_utag = name;
- dt_i1 = fd;
- dt_i2 = d->flags;
-#endif
- d->command = command;
- d->invoke = invoke_truncate;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_WRITE_INFO:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1
- + FILENAME_BYTELEN(buf + 9*4) + FILENAME_CHARSIZE);
-
- d->info.mode = get_int32(buf + 0 * 4);
- d->info.uid = get_int32(buf + 1 * 4);
- d->info.gid = get_int32(buf + 2 * 4);
- d->info.accessTime = get_int64(buf + 3 * 4);
- d->info.modifyTime = get_int64(buf + 5 * 4);
- d->info.cTime = get_int64(buf + 7 * 4);
-
- FILENAME_COPY(d->b, buf + 9*4);
-#ifdef USE_VM_PROBES
- dt_i1 = d->info.mode;
- dt_i2 = d->info.uid;
- dt_i3 = d->info.gid;
- dt_s1 = d->b;
- dt_utag = buf + 9 * 4 + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_write_info;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_READLINK:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 +
- MAX(RESBUFSIZE, (FILENAME_BYTELEN(name) +
- FILENAME_CHARSIZE)) + 1);
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_readlink;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_ALTNAME:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 +
- MAX(RESBUFSIZE, (FILENAME_BYTELEN(name) +
- FILENAME_CHARSIZE)) + 1);
- FILENAME_COPY(d->b, name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE;
-#endif
- d->command = command;
- d->invoke = invoke_altname;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
-
- case FILE_LINK:
- {
- char* new_name;
- int namelen = FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-
- new_name = name+namelen;
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1
- + namelen
- + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
- FILENAME_COPY(d->b + namelen, new_name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_s2 = d->b + namelen;
- dt_utag = buf + namelen + FILENAME_BYTELEN(dt_s2) + FILENAME_CHARSIZE;
-#endif
- d->flags = desc->flags;
- d->fd = fd;
- d->command = command;
- d->invoke = invoke_link;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_SYMLINK:
- {
- char* new_name;
- int namelen = FILENAME_BYTELEN(name) + FILENAME_CHARSIZE;
-
- new_name = name+namelen;
- d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1
- + namelen
- + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE);
-
- FILENAME_COPY(d->b, name);
- FILENAME_COPY(d->b + namelen, new_name);
-#ifdef USE_VM_PROBES
- dt_s1 = d->b;
- dt_s2 = d->b + namelen;
- dt_utag = buf + namelen + FILENAME_BYTELEN(dt_s2) + FILENAME_CHARSIZE;
-#endif
- d->flags = desc->flags;
- d->fd = fd;
- d->command = command;
- d->invoke = invoke_symlink;
- d->free = free_data;
- d->level = 2;
- goto done;
- }
-
- case FILE_FADVISE:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data));
-
- d->fd = fd;
- d->command = command;
- d->invoke = invoke_fadvise;
- d->free = free_data;
- d->level = 2;
- d->c.fadvise.offset = get_int64((uchar*) buf);
- d->c.fadvise.length = get_int64(((uchar*) buf) + sizeof(Sint64));
- d->c.fadvise.advise = get_int32(((uchar*) buf) + 2 * sizeof(Sint64));
-#ifdef USE_VM_PROBES
- dt_i1 = d->fd;
- dt_i2 = d->c.fadvise.offset;
- dt_i3 = d->c.fadvise.length;
- dt_i4 = d->c.fadvise.advise;
- dt_utag = buf + 3 * sizeof(Sint64);
-#endif
- goto done;
- }
-
- case FILE_FALLOCATE:
- {
- d = EF_SAFE_ALLOC(sizeof(struct t_data));
-
- d->fd = fd;
- d->command = command;
- d->invoke = invoke_fallocate;
- d->free = free_data;
- d->level = 2;
- d->c.fallocate.offset = get_int64((uchar*) buf);
- d->c.fallocate.length = get_int64(((uchar*) buf) + sizeof(Sint64));
- goto done;
- }
-
- }
-
- /*
- * Ignore anything else -- let the caller hang.
- */
-
- return;
-
- done:
- if (d) {
-#ifdef USE_VM_PROBES
- d->sched_i1 = dt_priv->thread_num;
- d->sched_i2 = dt_priv->tag;
- d->sched_utag[0] = '\0';
- if (dt_utag != NULL) {
- if (dt_utag[0] == '\0') {
- dt_utag = NULL;
- } else {
- strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1);
- d->sched_utag[sizeof(d->sched_utag) - 1] = '\0';
- }
- }
- DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++,
- dt_utag, command, dt_s1, dt_s2,
- dt_i1, dt_i2, dt_i3, dt_i4, desc->port_str);
-#endif
- cq_enq(desc, d);
- }
-}
-
-/*********************************************************************
- * Driver entry point -> flush
- */
-static void
-file_flush(ErlDrvData e) {
- file_descriptor *desc = (file_descriptor *)e;
-#ifdef DEBUG
- int r;
-#endif
-#ifdef USE_VM_PROBES
- dt_private *dt_priv = get_dt_private(dt_driver_io_worker_base);
-#endif
-
- TRACE_C('f');
-
-#ifdef HAVE_SENDFILE
- flush_sendfile(desc, NULL);
-#endif
-
-#ifdef DEBUG
- r =
-#endif
- flush_write(desc, NULL
-#ifdef USE_VM_PROBES
- , dt_priv, (desc->d == NULL) ? NULL : desc->d->sched_utag
-#endif
- );
- /* Only possible reason for bad return value is ENOMEM, and
- * there is nobody to tell...
- */
-#ifdef DEBUG
- ASSERT(r == 0);
-#endif
- cq_execute(desc);
-}
-
-
-
-/*********************************************************************
- * Driver entry point -> control
- * Only debug functionality...
- */
-static ErlDrvSSizeT
-file_control(ErlDrvData e, unsigned int command,
- char* buf, ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen) {
- file_descriptor *desc = (file_descriptor *)e;
- switch (command) {
- case 'K' :
- if (rlen < 4) {
- *rbuf = EF_ALLOC(4);
- }
- (*rbuf)[0] = ((desc->key) >> 24) & 0xFF;
- (*rbuf)[1] = ((desc->key) >> 16) & 0xFF;
- (*rbuf)[2] = ((desc->key) >> 8) & 0xFF;
- (*rbuf)[3] = (desc->key) & 0xFF;
- return 4;
- default:
- return 0;
- }
-}
-
-/*********************************************************************
- * Driver entry point -> timeout
- */
-static void
-file_timeout(ErlDrvData e) {
- file_descriptor *desc = (file_descriptor *)e;
- enum e_timer timer_state = desc->timer_state;
-#ifdef USE_VM_PROBES
- dt_private *dt_priv = get_dt_private(dt_driver_io_worker_base);
-#endif
-
- TRACE_C('t');
-
- desc->timer_state = timer_idle;
- switch (timer_state) {
- case timer_idle:
- ASSERT(0);
- break;
- case timer_again:
- ASSERT(desc->invoke);
- ASSERT(desc->free);
- driver_async(desc->port, KEY(desc), desc->invoke, desc->d, desc->free);
- break;
- case timer_write: {
-#ifdef DEBUG
- int r =
-#endif
- flush_write(desc, NULL
-#ifdef USE_VM_PROBES
- , dt_priv, (desc->d == NULL) ? NULL : desc->d->sched_utag
-#endif
- );
- /* Only possible reason for bad return value is ENOMEM, and
- * there is nobody to tell...
- */
- ASSERT(r == 0);
- cq_execute(desc);
- } break;
- } /* case */
-}
-
-
-
-/*********************************************************************
- * Driver entry point -> outputv
- */
-static void
-file_outputv(ErlDrvData e, ErlIOVec *ev) {
- file_descriptor* desc = (file_descriptor*)e;
- char command;
- size_t p, q;
- int err;
- struct t_data *d = NULL;
-#ifdef USE_VM_PROBES
- Sint64 dt_i1 = 0, dt_i2 = 0, dt_i3 = 0;
- Sint64 dt_i4 = 0;
- char *dt_utag = NULL;
- char *dt_s1 = NULL;
- dt_private *dt_priv = get_dt_private(dt_driver_io_worker_base);
-#endif
-
- TRACE_C('v');
-
- p = 0; q = 1;
- if (! EV_GET_CHAR(ev, &command, &p, &q)) {
- /* Empty command */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- /* 'command' contains the decoded command number,
- * 'p' and 'q' point out the next byte in the command:
- * ((char *)ev->iov[q].iov_base) + p;
- */
-
- TRACE_F(("%i", (int) command));
-
- switch (command) {
-
- case FILE_CLOSE: {
-#ifdef USE_VM_PROBES
- dt_utag = EV_CHAR_P(ev, p, q);
-#endif
- flush_read(desc);
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (desc->fd != FILE_FD_INVALID) {
- if (! (d = EF_ALLOC(sizeof(struct t_data)))) {
- reply_posix_error(desc, ENOMEM);
- } else {
- d->command = command;
- d->reply = !0;
- d->fd = desc->fd;
- d->flags = desc->flags;
-#ifdef USE_VM_PROBES
- dt_i1 = d->fd;
- dt_i2 = d->flags;
-#endif
- d->invoke = invoke_close;
- d->free = free_data;
- d->level = 2;
- cq_enq(desc, d);
- desc->fd = FILE_FD_INVALID;
- desc->flags = 0;
- }
- } else {
- reply_posix_error(desc, EBADF);
- }
- } goto done;
-
- case FILE_READ: {
- Uint32 sizeH, sizeL;
- size_t size, alloc_size;
-
- if (!EV_GET_UINT32(ev, &sizeH, &p, &q)
- || !EV_GET_UINT32(ev, &sizeL, &p, &q)) {
- /* Wrong buffer length to contain the read count */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#ifdef USE_VM_PROBES
- dt_utag = EV_CHAR_P(ev, p, q);
-#endif
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
-#if ALWAYS_READ_LINE_AHEAD
- if (desc->read_bufsize == 0 && desc->read_binp != NULL && desc->read_size > 0) {
- /* We have allocated a buffer for line mode but should not really have a
- read-ahead buffer... */
- if (lseek_flush_read(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- }
-#endif
-#if SIZEOF_SIZE_T == 4
- if (sizeH != 0) {
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- size = sizeL;
-#else
- size = ((size_t)sizeH << 32) | sizeL;
-#endif
- if ((desc->fd == FILE_FD_INVALID)
- || (! (desc->flags & EFILE_MODE_READ)) ) {
- reply_posix_error(desc, EBADF);
- goto done;
- }
- if (size == 0) {
- reply_buf(desc, &command, 0);
- goto done;
- }
- if (desc->read_size >= size) {
- /* We already have all data */
- TRACE_C('D');
- reply_data(desc, desc->read_binp, desc->read_offset, size);
- desc->read_offset += size;
- desc->read_size -= size;
- try_free_read_bin(desc);
- goto done;
- }
- /* We may have some of the data
- */
- /* Justification for the following strange formula:
- * If the read request is for such a large block as more than
- * half the buffer size it may lead to a lot of unnecessary copying,
- * since the tail of the old buffer is copied to the head of the
- * new, and if the tail is almost half the buffer it is a lot
- * to copy. Therefore allocate the exact amount needed in
- * this case, giving no lingering tail. */
- alloc_size =
- size > (desc->read_bufsize>>1) ?
- size : desc->read_bufsize;
- if (! desc->read_binp) {
- /* Need to allocate a new binary for the result */
- if (! (desc->read_binp = driver_alloc_binary(alloc_size))) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- } else {
- /* We already have a buffer */
- if (desc->read_binp->orig_size - desc->read_offset < size) {
- /* Need to allocate a new binary for the result */
- ErlDrvBinary *binp;
- if (! (binp = driver_alloc_binary(alloc_size))) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- /* Move data we already have to the new binary */
- sys_memcpy(binp->orig_bytes,
- desc->read_binp->orig_bytes + desc->read_offset,
- desc->read_size);
- driver_free_binary(desc->read_binp);
- desc->read_offset = 0;
- desc->read_binp = binp;
- }
- }
- if (! (d = EF_ALLOC(sizeof(struct t_data)))) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- d->command = command;
- d->reply = !0;
- d->fd = desc->fd;
- d->flags = desc->flags;
- d->c.read.binp = desc->read_binp;
- d->c.read.bin_offset = desc->read_offset + desc->read_size;
- d->c.read.bin_size = desc->read_binp->orig_size - d->c.read.bin_offset;
- d->c.read.size = size;
-#ifdef USE_VM_PROBES
- dt_i1 = d->fd;
- dt_i2 = d->flags;
- dt_i3 = d->c.read.size;
-#endif
- driver_binary_inc_refc(d->c.read.binp);
- d->invoke = invoke_read;
- d->free = free_read;
- d->level = 1;
- cq_enq(desc, d);
- } goto done; /* case FILE_READ: */
-
- case FILE_READ_LINE: {
- /*
- * Icky little creature... We do mostly as ordinary file read, but with a few differences.
- * 1) We have to scan for proper newline sequence if there is a buffer already, we cannot know
- * in advance if the buffer contains a whole line without scanning.
- * 2) We do not know how large the buffer needs to be in advance. We give a default buffer,
- * but the worker may need to allocate a new one. Freeing the old and rereferencing a newly
- * allocated binary + dealing with offsets and lengts are done in file_async ready
- * for this OP.
- */
-#ifdef USE_VM_PROBES
- dt_utag = EV_CHAR_P(ev, p, q);
-#endif
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (ev->size != 1
-#ifdef USE_VM_PROBES
- + FILENAME_BYTELEN(dt_utag) + FILENAME_CHARSIZE
-#endif
- ) {
- /* Wrong command length */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- if ((desc->fd == FILE_FD_INVALID)
- || (! (desc->flags & EFILE_MODE_READ)) ) {
- reply_posix_error(desc, EBADF);
- goto done;
- }
- if (desc->read_size > 0) {
- /* look for '\n' in what we'we already got */
- void *nl_ptr = memchr(desc->read_binp->orig_bytes + desc->read_offset,'\n',desc->read_size);
- if (nl_ptr != NULL) {
- /* If found, we're done */
- int skip = 0;
- size_t size = ((char *) nl_ptr) -
- ((char *) (desc->read_binp->orig_bytes + desc->read_offset)) + 1;
- if (size > 1 &&
- *(((char *) nl_ptr) - 1) == '\r') {
- *(((char *) nl_ptr) - 1) = '\n';
- skip = 1;
- --size;
- }
- reply_data(desc, desc->read_binp, desc->read_offset, size);
- desc->read_offset += (size + skip);
- desc->read_size -= (size + skip);
- try_free_read_bin(desc);
- goto done;
- }
- }
- /* Now, it's up to the thread to work out the need for more buffers and such, it's
- no use doing it in this thread as we do not have the information required anyway.
- Even a NULL buffer could be handled by the thread, but code is simplified by us
- allocating it */
- if (! desc->read_binp) {
- int alloc_size = (desc->read_bufsize > DEFAULT_LINEBUF_SIZE) ? desc->read_bufsize :
- DEFAULT_LINEBUF_SIZE;
- /* Allocate a new binary for the result */
- if (! (desc->read_binp = driver_alloc_binary(alloc_size))) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- }
- if (! (d = EF_ALLOC(sizeof(struct t_data)))) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
-
- d->command = command;
- d->reply = !0;
- d->fd = desc->fd;
- d->flags = desc->flags;
- d->c.read_line.binp = desc->read_binp;
- d->c.read_line.read_offset = desc->read_offset;
- d->c.read_line.read_size = desc->read_size;
-#ifdef USE_VM_PROBES
- dt_i1 = d->fd;
- dt_i2 = d->flags;
- dt_i3 = d->c.read_line.read_offset;
-#endif
-#if !ALWAYS_READ_LINE_AHEAD
- d->c.read_line.read_ahead = (desc->read_bufsize > 0);
-#ifdef USE_VM_PROBES
- dt_i4 = d->c.read_line.read_ahead;
-#endif
-#endif
- driver_binary_inc_refc(d->c.read.binp);
- d->invoke = invoke_read_line;
- d->free = free_read_line;
- d->level = 1;
- cq_enq(desc, d);
- } goto done;
- case FILE_WRITE: { /* Dtrace: The dtrace user tag is not last in message,
- but follows the message tag directly.
- This is handled specially in prim_file.erl */
- ErlDrvSizeT skip = 1;
- ErlDrvSizeT size = ev->size - skip;
-
-#ifdef USE_VM_PROBES
- dt_utag = EV_CHAR_P(ev, p, q);
- skip += FILENAME_BYTELEN(dt_utag) + FILENAME_CHARSIZE;
- size = ev->size - skip;
-#endif
- if (lseek_flush_read(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (! (desc->flags & EFILE_MODE_WRITE)) {
- reply_posix_error(desc, EBADF);
- goto done;
- }
- if (size == 0) {
- reply_Uint(desc, size);
- goto done;
- }
- MUTEX_LOCK(desc->q_mtx);
- if (driver_enqv(desc->port, ev, skip)) {
- MUTEX_UNLOCK(desc->q_mtx);
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- desc->write_buffered += size;
- if (desc->write_buffered < desc->write_bufsize) {
- MUTEX_UNLOCK(desc->q_mtx);
- reply_Uint(desc, size);
- if (desc->timer_state == timer_idle) {
- desc->timer_state = timer_write;
- driver_set_timer(desc->port, desc->write_delay);
- }
- } else {
- if ((d = async_write(desc, &err, !0, size
-#ifdef USE_VM_PROBES
- , &dt_i1, &dt_i2, &dt_i3
-#endif
- )) == NULL) {
- MUTEX_UNLOCK(desc->q_mtx);
- reply_posix_error(desc, err);
- goto done;
- } else {
- MUTEX_UNLOCK(desc->q_mtx);
- }
- }
- } goto done; /* case FILE_WRITE */
-
- case FILE_PWRITEV: { /* Dtrace: The dtrace user tag is not last in message,
- but follows the message tag directly.
- This is handled specially in prim_file.erl */
- Uint32 i, j, n;
- size_t total;
-#ifdef USE_VM_PROBES
- char dt_tmp;
- int dt_utag_bytes = 1;
-
- dt_utag = EV_CHAR_P(ev, p, q);
- /* This will work for UTF-8, but not for UTF-16 - extra reminder here */
-#ifdef FILENAMES_16BIT
-#error 16bit characters in filenames and dtrace in combination is not supported.
-#endif
- while (EV_GET_CHAR(ev, &dt_tmp, &p, &q) && dt_tmp != '\0') {
- dt_utag_bytes++;
- }
-#endif
- if (ev->size < 1+4
-#ifdef USE_VM_PROBES
- + dt_utag_bytes
-#endif
- || !EV_GET_UINT32(ev, &n, &p, &q)) {
- /* Buffer too short to contain even the number of pos/size specs */
- reply_Uint_posix_error(desc, 0, EINVAL);
- goto done;
- }
- if (lseek_flush_read(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_Uint_posix_error(desc, 0, err);
- goto done;
- }
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_Uint_posix_error(desc, 0, err);
- goto done;
- }
- if (n == 0) {
- /* Trivial case - nothing to write */
- if (ev->size != 1+4) {
- reply_posix_error(desc, err);
- } else {
- reply_Uint(desc, 0);
- }
- goto done;
- }
- if (ev->size < 1+4+8*(2*n)
-#ifdef USE_VM_PROBES
- + dt_utag_bytes
-#endif
- ) {
- /* Buffer too short to contain even the pos/size specs */
- reply_Uint_posix_error(desc, 0, EINVAL);
- goto done;
- }
- d = EF_ALLOC(sizeof(struct t_data)
- + (n * sizeof(struct t_pbuf_spec)));
- if (! d) {
- reply_Uint_posix_error(desc, 0, ENOMEM);
- goto done;
- }
- d->command = command;
- d->reply = !0;
- d->fd = desc->fd;
- d->flags = desc->flags;
-#ifdef USE_VM_PROBES
- dt_i1 = d->fd;
- dt_i2 = d->flags;
-#endif
- d->c.pwritev.port = desc->port;
- d->c.pwritev.q_mtx = desc->q_mtx;
- d->c.pwritev.n = n;
- d->c.pwritev.cnt = 0;
- total = 0;
- j = 0;
- /* Create pos/size specs in the thread data structure
- * for all non-zero size binaries. Calculate total size.
- */
- for(i = 0; i < n; i++) {
- Uint32 sizeH, sizeL;
- size_t size;
- if ( !EV_GET_SINT64(ev, &d->c.pwritev.specs[i].offset, &p, &q)
- || !EV_GET_UINT32(ev, &sizeH, &p, &q)
- || !EV_GET_UINT32(ev, &sizeL, &p, &q)) {
- /* Misalignment in buffer */
- reply_Uint_posix_error(desc, 0, EINVAL);
- EF_FREE(d);
- goto done;
- }
-#if SIZEOF_SIZE_T == 4
- if (sizeH != 0) {
- reply_Uint_posix_error(desc, 0, EINVAL);
- EF_FREE(d);
- goto done;
- }
- size = sizeL;
-#else
- size = ((size_t)sizeH<<32) | sizeL;
-#endif
- if (size > 0) {
- total += size;
- d->c.pwritev.specs[j].size = size;
- j++;
- }
- }
- d->c.pwritev.size = total;
-#ifdef USE_VM_PROBES
- dt_i3 = d->c.pwritev.size;
-#endif
- if (j == 0) {
- /* Trivial case - nothing to write */
- EF_FREE(d);
- reply_Uint(desc, 0);
- } else {
- ErlDrvSizeT skip = 1 + 4 + 8 * (2*n)
-#ifdef USE_VM_PROBES
- + dt_utag_bytes
-#endif
- ;
- if (skip + total != ev->size) {
- /* Actual amount of data does not match
- * total of all pos/size specs
- */
- EF_FREE(d);
- reply_Uint_posix_error(desc, 0, EINVAL);
- } else {
- /* Enqueue the data */
- MUTEX_LOCK(desc->q_mtx);
- driver_enqv(desc->port, ev, skip);
- MUTEX_UNLOCK(desc->q_mtx);
- /* Execute the command */
- d->invoke = invoke_pwritev;
- d->free = free_data;
- d->level = 1;
- cq_enq(desc, d);
- }
- }
- } goto done; /* case FILE_PWRITEV: */
-
- case FILE_PREADV: { /* Dtrace: The dtrace user tag is not last in message,
- but follows the message tag directly.
- This is handled specially in prim_file.erl */
- register void * void_ptr;
- Uint32 i, n;
- ErlIOVec *res_ev;
-#ifdef USE_VM_PROBES
- char dt_tmp;
- int dt_utag_bytes = 1;
- /* This will work for UTF-8, but not for UTF-16 - extra reminder here */
-#ifdef FILENAMES_16BIT
-#error 16bit characters in filenames and dtrace in combination is not supported.
-#endif
- dt_utag = EV_CHAR_P(ev, p, q);
- while (EV_GET_CHAR(ev, &dt_tmp, &p, &q) && dt_tmp != '\0') {
- dt_utag_bytes++;
- }
-#endif
- if (lseek_flush_read(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (ev->size < 1+8
-#ifdef USE_VM_PROBES
- + dt_utag_bytes
-#endif
- || !EV_GET_UINT32(ev, &n, &p, &q)
- || !EV_GET_UINT32(ev, &n, &p, &q)) {
- /* Buffer too short to contain even the number of pos/size specs */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- if (ev->size < 1+8+8*(2*n)
-#ifdef USE_VM_PROBES
- + dt_utag_bytes
-#endif
- ) {
- /* Buffer wrong length to contain the pos/size specs */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- /* Create the thread data structure with the contained ErlIOVec
- * and corresponding binaries for the response
- */
- d = EF_ALLOC(sizeof(*d)
- + (n * sizeof(*d->c.preadv.offsets))
- + ((1+n) * (sizeof(*res_ev->iov)
- + sizeof(*res_ev->binv))));
- if (! d) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- d->command = command;
- d->reply = !0;
- d->fd = desc->fd;
- d->flags = desc->flags;
-#ifdef USE_VM_PROBES
- dt_i1 = d->fd;
- dt_i2 = d->flags;
-#endif
- d->c.preadv.n = n;
- d->c.preadv.cnt = 0;
- d->c.preadv.size = 0;
- res_ev = &d->c.preadv.eiov;
- /* XXX possible alignment problems here for weird machines */
- res_ev->vsize = 1+d->c.preadv.n;
- res_ev->iov = void_ptr = &d->c.preadv.offsets[d->c.preadv.n];
- res_ev->binv = void_ptr = &res_ev->iov[res_ev->vsize];
- /* Read in the pos/size specs and allocate binaries for the results */
- for (i = 1; i < 1+n; i++) {
- Uint32 sizeH, sizeL;
- size_t size;
- if ( !EV_GET_SINT64(ev, &d->c.preadv.offsets[i-1], &p, &q)
- || !EV_GET_UINT32(ev, &sizeH, &p, &q)
- || !EV_GET_UINT32(ev, &sizeL, &p, &q)) {
- reply_posix_error(desc, EINVAL);
- break;
- }
-#if SIZEOF_SIZE_T == 4
- if (sizeH != 0) {
- reply_posix_error(desc, EINVAL);
- break;
- }
- size = sizeL;
-#else
- size = ((size_t)sizeH<<32) | sizeL;
-#endif
-#ifdef USE_VM_PROBES
- dt_i3 += size;
-#endif
- if (! (res_ev->binv[i] = driver_alloc_binary(size))) {
- reply_posix_error(desc, ENOMEM);
- break;
- } else {
- res_ev->iov[i].iov_len = size;
- res_ev->iov[i].iov_base = res_ev->binv[i]->orig_bytes;
- }
- }
- if (i < 1+n) {
- for (i--; i > 0; i--) {
- driver_free_binary(res_ev->binv[i]);
- }
- EF_FREE(d);
- goto done;
- }
- /* Allocate the header binary (index 0) */
- res_ev->binv[0] = driver_alloc_binary(4+4+8*n);
- if (! res_ev->binv[0]) {
- reply_posix_error(desc, ENOMEM);
- for (i = 1; i < 1+n; i++) {
- driver_free_binary(res_ev->binv[i]);
- }
- EF_FREE(d);
- goto done;
- }
- res_ev->iov[0].iov_len = 4+4+8*n;
- res_ev->iov[0].iov_base = res_ev->binv[0]->orig_bytes;
- /* Fill in the number of buffers in the header */
- put_int32(0, res_ev->iov[0].iov_base);
- put_int32(n, (char *)(res_ev->iov[0].iov_base) + 4);
- /**/
- res_ev->size = res_ev->iov[0].iov_len;
- if (n == 0) {
- /* Trivial case - nothing to read */
- reply_ev(desc, FILE_RESP_LDATA, res_ev);
- free_preadv(d);
- goto done;
- } else {
- d->invoke = invoke_preadv;
- d->free = free_preadv;
- d->level = 1;
- cq_enq(desc, d);
- }
- } goto done; /* case FILE_PREADV: */
-
- case FILE_LSEEK: {
- Sint64 offset; /* Offset for seek */
- Uint32 origin; /* Origin of seek. */
-
- if (ev->size < 1+8+4
- || !EV_GET_SINT64(ev, &offset, &p, &q)
- || !EV_GET_UINT32(ev, &origin, &p, &q)) {
- /* Wrong length of buffer to contain offset and origin */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#ifdef USE_VM_PROBES
- dt_utag = EV_CHAR_P(ev, p, q);
-#endif
- if (lseek_flush_read(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if ((d = async_lseek(desc, &err, !0, offset, origin
-#ifdef USE_VM_PROBES
- , &dt_i1, &dt_i2, &dt_i3
-#endif
- )) == NULL) {
- reply_posix_error(desc, err);
- goto done;
- }
- } goto done;
-
- case FILE_READ_FILE: {
- char *filename;
- if (ev->size < 1+1) {
- /* Buffer contains empty name */
- reply_posix_error(desc, ENOENT);
- goto done;
- }
-#ifndef USE_VM_PROBES
- /* In the dtrace case, the iov has an extra element, the dtrace utag - we will need
- another test to see that
- the filename is in a single buffer: */
- if (ev->size-1 != ev->iov[q].iov_len-p) {
- /* Name not in one single buffer */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#else
- if (((byte *)ev->iov[q].iov_base)[ev->iov[q].iov_len-1] != '\0') {
- /* Name not in one single buffer */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#endif
- filename = EV_CHAR_P(ev, p, q);
- d = EF_ALLOC(sizeof(struct t_data) -1 + FILENAME_BYTELEN(filename) + FILENAME_CHARSIZE);
- if (! d) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- d->command = command;
- d->reply = !0;
- /* Copy name */
- FILENAME_COPY(d->b, filename);
-#ifdef USE_VM_PROBES
- {
- char dt_tmp;
-
- /* This will work for UTF-8, but not for UTF-16 - extra reminder here */
-#ifdef FILENAMES_16BIT
-#error 16bit characters in filenames and dtrace in combination is not supported.
-#endif
- while (EV_GET_CHAR(ev, &dt_tmp, &p, &q) && dt_tmp != '\0')
- ;
- dt_s1 = d->b;
- dt_utag = EV_CHAR_P(ev, p, q);
- }
-#endif
- d->c.read_file.binp = NULL;
- d->invoke = invoke_read_file;
- d->free = free_read_file;
- d->level = 2;
- cq_enq(desc, d);
- } goto done;
-
- case FILE_IPREAD: {
- /* This operation cheets by using invoke_preadv() and free_preadv()
- * plus its own invoke_ipread. Therefore the result format is
- * a bit awkward - the header binary contains one extra 64 bit
- * field that invoke_preadv() fortunately ignores,
- * and the first 64 bit field does not contain the number of
- * data binaries which invoke_preadv() also ignores.
- */
- register void * void_ptr;
- char mode;
- Sint64 hdr_offset;
- Uint32 max_size;
- ErlIOVec *res_ev;
- int vsize;
- if (! EV_GET_CHAR(ev, &mode, &p, &q)) {
- /* Empty command */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- if (mode != IPREAD_S32BU_P32BU) {
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- if (ev->size < 1+1+8+4
- || !EV_GET_SINT64(ev, &hdr_offset, &p, &q)
- || !EV_GET_UINT32(ev, &max_size, &p, &q)) {
- /* Buffer too short to contain
- * the header offset and max size spec */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#ifdef USE_VM_PROBES
- dt_utag = EV_CHAR_P(ev, p, q);
-#endif
- if (lseek_flush_read(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- /* Create the thread data structure with the contained ErlIOVec
- * and corresponding binaries for the response
- */
- vsize = 2;
- d = EF_ALLOC(sizeof(*d) +
- vsize*(sizeof(*res_ev->iov) + sizeof(*res_ev->binv)));
- if (! d) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- d->command = command;
- d->reply = !0;
- d->fd = desc->fd;
- d->flags = desc->flags;
- d->c.preadv.offsets[0] = hdr_offset;
- d->c.preadv.size = max_size;
-#ifdef USE_VM_PROBES
- dt_i1 = d->fd;
- dt_i2 = d->flags;
- dt_i3 = d->c.preadv.offsets[0];
- dt_i4 = d->c.preadv.size;
-#endif
- res_ev = &d->c.preadv.eiov;
- /* XXX possible alignment problems here for weird machines */
- res_ev->iov = void_ptr = d + 1;
- res_ev->binv = void_ptr = res_ev->iov + vsize;
- res_ev->size = 0;
- res_ev->vsize = 0;
- d->invoke = invoke_ipread;
- d->free = free_preadv;
- d->level = 1;
- cq_enq(desc, d);
- } goto done; /* case FILE_IPREAD: */
-
- case FILE_SETOPT: {
- char opt;
-
- if (ev->size < 1+1
- || !EV_GET_CHAR(ev, &opt, &p, &q)) {
- /* Buffer too short to contain even the option type */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#ifdef USE_VM_PROBES
- dt_i1 = opt;
- dt_utag = EV_CHAR_P(ev, p, q);
-#endif
- switch (opt) {
- case FILE_OPT_DELAYED_WRITE: {
- Uint32 sizeH, sizeL, delayH, delayL;
- if (ev->size != 1+1+4*sizeof(Uint32)
-#ifdef USE_VM_PROBES
- + FILENAME_BYTELEN(dt_utag) + FILENAME_CHARSIZE
-#endif
- || !EV_GET_UINT32(ev, &sizeH, &p, &q)
- || !EV_GET_UINT32(ev, &sizeL, &p, &q)
- || !EV_GET_UINT32(ev, &delayH, &p, &q)
- || !EV_GET_UINT32(ev, &delayL, &p, &q)) {
- /* Buffer has wrong length to contain the option values */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#if SIZEOF_SIZE_T == 4
- if (sizeH != 0) {
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- desc->write_bufsize = sizeL;
-#else
- desc->write_bufsize = ((size_t)sizeH << 32) | sizeL;
-#endif
-#if SIZEOF_LONG == 4
- if (delayH != 0) {
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- desc->write_delay = delayL;
-#else
- desc->write_delay = ((unsigned long)delayH << 32) | delayL;
-#endif
-#ifdef USE_VM_PROBES
- dt_i2 = desc->write_delay;
-#endif
- TRACE_C('K');
- reply_ok(desc);
- } goto done;
- case FILE_OPT_READ_AHEAD: {
- Uint32 sizeH, sizeL;
- if (ev->size != 1+1+2*sizeof(Uint32)
-#ifdef USE_VM_PROBES
- + FILENAME_BYTELEN(dt_utag)+FILENAME_CHARSIZE
-#endif
- || !EV_GET_UINT32(ev, &sizeH, &p, &q)
- || !EV_GET_UINT32(ev, &sizeL, &p, &q)) {
- /* Buffer has wrong length to contain the option values */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-#if SIZEOF_SIZE_T == 4
- if (sizeH != 0) {
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- desc->read_bufsize = sizeL;
-#else
- desc->read_bufsize = ((size_t)sizeH << 32) | sizeL;
-#endif
-#ifdef USE_VM_PROBES
- dt_i2 = desc->read_bufsize;
-#endif
- TRACE_C('K');
- reply_ok(desc);
- } goto done;
- default:
- reply_posix_error(desc, EINVAL);
- goto done;
- } /* case FILE_OPT_DELAYED_WRITE: */
- } ASSERT(0); goto done; /* case FILE_SETOPT: */
-
- case FILE_SENDFILE: {
-
-#ifdef HAVE_SENDFILE
- struct t_data *d;
- Uint32 out_fd, offsetH, offsetL, hd_len, tl_len;
- Uint64 nbytes;
- char flags;
-
- if (ev->size < 1 + 7 * sizeof(Uint32) + sizeof(char)
- || !EV_GET_UINT32(ev, &out_fd, &p, &q)
- || !EV_GET_CHAR(ev, &flags, &p, &q)
- || !EV_GET_UINT32(ev, &offsetH, &p, &q)
- || !EV_GET_UINT32(ev, &offsetL, &p, &q)
- || !EV_GET_UINT64(ev, &nbytes, &p, &q)
- || !EV_GET_UINT32(ev, &hd_len, &p, &q)
- || !EV_GET_UINT32(ev, &tl_len, &p, &q)) {
- /* Buffer has wrong length to contain all the needed values */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-
- if (hd_len != 0 || tl_len != 0) {
- /* We do not allow header, trailers */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-
-
- if (flags & SENDFILE_FLGS_USE_THREADS && !THRDS_AVAILABLE) {
- /* We do not allow use_threads flag on a system where
- no threads are available. */
- reply_posix_error(desc, EINVAL);
- goto done;
- }
-
- d = EF_SAFE_ALLOC(sizeof(struct t_data));
- d->fd = desc->fd;
- d->command = command;
- d->invoke = invoke_sendfile;
- d->free = free_sendfile;
- d->flags = flags;
- d->level = 2;
-
- d->c.sendfile.out_fd = (int) out_fd;
- d->c.sendfile.written = 0;
- d->c.sendfile.port = desc->port;
- d->c.sendfile.q_mtx = desc->q_mtx;
-
- #if SIZEOF_OFF_T == 4
- if (offsetH != 0) {
- reply_posix_error(desc, EINVAL);
- goto done;
- }
- d->c.sendfile.offset = (off_t) offsetL;
- #else
- d->c.sendfile.offset = ((off_t) offsetH << 32) | offsetL;
- #endif
-
- d->c.sendfile.nbytes = nbytes;
-
- if (USE_THRDS_FOR_SENDFILE(d)) {
- SET_BLOCKING(d->c.sendfile.out_fd);
- } else {
- /**
- * Write a place holder to queue in order to force file_flush
- * to be called before the driver is closed.
- */
- char tmp[1] = "";
- MUTEX_LOCK(d->c.sendfile.q_mtx);
- if (driver_enq(d->c.sendfile.port, tmp, 1)) {
- MUTEX_UNLOCK(d->c.sendfile.q_mtx);
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- MUTEX_UNLOCK(d->c.sendfile.q_mtx);
- }
-
- cq_enq(desc, d);
-#else
- reply_posix_error(desc, ENOTSUP);
-#endif
- goto done;
- } /* case FILE_SENDFILE: */
-
- } /* switch(command) */
-
- if (lseek_flush_read(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- }
- if (flush_write_check_error(desc, &err
-#ifdef USE_VM_PROBES
- , dt_priv, dt_utag
-#endif
- ) < 0) {
- reply_posix_error(desc, err);
- goto done;
- } else {
- /* Flatten buffer and send it to file_output(desc, buf, len) */
- int len = ev->size;
- char *buf = EF_ALLOC(len);
- if (! buf) {
- reply_posix_error(desc, ENOMEM);
- goto done;
- }
- driver_vec_to_buf(ev, buf, len);
- file_output((ErlDrvData) desc, buf, len);
- EF_FREE(buf);
- goto done;
- }
-
- done:
- if (d != NULL) {
-#ifdef USE_VM_PROBES
- /*
- * If d == NULL, then either:
- * 1). There was an error of some sort, or
- * 2). The command given to us is actually implemented
- * by file_output() instead.
- *
- * Case #1 is probably a TODO item, perhaps?
- * Case #2 we definitely don't want to activate a probe.
- */
- d->sched_i1 = dt_priv->thread_num;
- d->sched_i2 = dt_priv->tag;
- d->sched_utag[0] = '\0';
- if (dt_utag != NULL) {
- if (dt_utag[0] == '\0') {
- dt_utag = NULL;
- } else {
- strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1);
- d->sched_utag[sizeof(d->sched_utag) - 1] = '\0';
- }
- }
- DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++,
- dt_utag, command, dt_s1, NULL, dt_i1, dt_i2, dt_i3, dt_i4,
- desc->port_str);
-#endif
- }
- cq_execute(desc);
-}
-
-#ifdef USE_VM_PROBES
-dt_private *
-get_dt_private(int base)
-{
- dt_private *dt_priv = (dt_private *) pthread_getspecific(dt_driver_key);
-
- if (dt_priv == NULL) {
- dt_priv = EF_SAFE_ALLOC(sizeof(dt_private));
- erts_mtx_lock(&dt_driver_mutex);
- dt_priv->thread_num = (base + dt_driver_idnum++);
- erts_mtx_unlock(&dt_driver_mutex);
- dt_priv->tag = 0;
- pthread_setspecific(dt_driver_key, dt_priv);
- }
- return dt_priv;
-}
-#endif /* USE_VM_PROBES */
diff --git a/erts/emulator/drivers/common/erl_efile.h b/erts/emulator/drivers/common/erl_efile.h
deleted file mode 100644
index b7f063b4f2..0000000000
--- a/erts/emulator/drivers/common/erl_efile.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * %CopyrightBegin%
- *
- * Copyright Ericsson AB 1997-2016. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * %CopyrightEnd%
- */
-/*
- * Defines the interfaces between the generic efile driver and its
- * operating-system dependent helpers.
- */
-
-#include "sys.h"
-#include "erl_driver.h"
-
-/*
- * Open modes for efile_openfile().
- */
-#define EFILE_MODE_READ 1
-#define EFILE_MODE_WRITE 2 /* Implies truncating file when used alone. */
-#define EFILE_MODE_READ_WRITE 3
-#define EFILE_MODE_APPEND 4
-#define EFILE_COMPRESSED 8
-#define EFILE_MODE_EXCL 16
-#define EFILE_NO_TRUNCATE 32 /* Special for reopening on VxWorks */
-#define EFILE_MODE_SYNC 64
-
-/*
- * Seek modes for efile_seek().
- */
-#define EFILE_SEEK_SET 0
-#define EFILE_SEEK_CUR 1
-#define EFILE_SEEK_END 2
-
-/*
- * File types returned by efile_fileinfo().
- */
-#define FT_DEVICE 1
-#define FT_DIRECTORY 2
-#define FT_REGULAR 3
-#define FT_SYMLINK 4
-#define FT_OTHER 5
-
-/*
- * Access attributes returned by efile_fileinfo() (the bits can be ORed
- * together).
- */
-#define FA_NONE 0
-#define FA_WRITE 1
-#define FA_READ 2
-
-/* Some OS'es (i.e. Windows) has filenames in wide charaqcters. That requires special handling */
-/* Note that we do *not* honor alignment in the communication to the OS specific driver, */
-/* which is not a problem on x86, but might be on other platforms. The OS specific efile */
-/* implementation is expected to align if needed */
-#ifdef __WIN32__
-#define FILENAMES_16BIT 1
-#endif
-
-/* We use sendfilev if it exist on solaris */
-#if !defined(HAVE_SENDFILE) && defined(HAVE_SENDFILEV)
-#define HAVE_SENDFILE
-#endif
-
-/*
- * An handle to an open directory. To be cast to the correct type
- * in the system-dependent directory functions.
- */
-
-typedef struct _Efile_Dir_Handle* EFILE_DIR_HANDLE;
-
-/*
- * Error information from the last call.
- */
-typedef struct _Efile_error {
- int posix_errno; /* Posix error number, as in <errno.h>. */
- int os_errno; /* Os-dependent error number (not used). */
-} Efile_error;
-
-/*
- * Describes what is returned by file:file_info/1.
- */
-
-typedef struct _Efile_info {
- Uint32 size_low; /* Size of file, lower 32 bits.. */
- Uint32 size_high; /* Size of file, higher 32 bits. */
- Uint32 type; /* Type of file -- one of FT_*. */
- Uint32 access; /* Access to file -- one of FA_*. */
- Uint32 mode; /* Access permissions -- bit field. */
- Uint32 links; /* Number of links to file. */
- Uint32 major_device; /* Major device or file system. */
- Uint32 minor_device; /* Minor device (for devices). */
- Uint32 inode; /* Inode number. */
- Uint32 uid; /* User id of owner. */
- Uint32 gid; /* Group id of owner. */
- Sint64 accessTime; /* Last time the file was accessed. */
- Sint64 modifyTime; /* Last time the file was modified. */
- Sint64 cTime; /* Creation time (Windows) or last
- * inode change (Unix).
- */
-} Efile_info;
-
-
-#ifdef HAVE_SENDFILE
-/*
- * Describes the structure of headers/trailers for sendfile
- */
-struct t_sendfile_hdtl {
- SysIOVec *headers;
- int hdr_cnt;
- SysIOVec *trailers;
- int trl_cnt;
-};
-#endif /* HAVE_SENDFILE */
-
-/*
- * Functions.
- */
-int efile_init(void);
-int efile_mkdir(Efile_error* errInfo, char* name);
-int efile_rmdir(Efile_error* errInfo, char* name);
-int efile_delete_file(Efile_error* errInfo, char* name);
-int efile_rename(Efile_error* errInfo, char* src, char* dst);
-int efile_chdir(Efile_error* errInfo, char* name);
-int efile_getdcwd(Efile_error* errInfo, int drive,
- char* buffer, size_t size);
-int efile_readdir(Efile_error* errInfo, char* name,
- EFILE_DIR_HANDLE* dir_handle,
- char* buffer, size_t *size);
-int efile_openfile(Efile_error* errInfo, char* name, int flags,
- int* pfd, Sint64* pSize);
-void efile_closefile(int fd);
-int efile_fdatasync(Efile_error* errInfo, int fd);
-int efile_fsync(Efile_error* errInfo, int fd);
-int efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo,
- char *name, int info_for_link);
-int efile_write_info(Efile_error* errInfo, Efile_info* pInfo, char *name);
-int efile_write(Efile_error* errInfo, int flags, int fd,
- char* buf, size_t count);
-int efile_writev(Efile_error* errInfo, int flags, int fd,
- SysIOVec* iov, int iovcnt);
-int efile_read(Efile_error* errInfo, int flags, int fd,
- char* buf, size_t count, size_t* pBytesRead);
-int efile_seek(Efile_error* errInfo, int fd,
- Sint64 offset, int origin, Sint64* new_location);
-int efile_truncate_file(Efile_error* errInfo, int *fd, int flags);
-int efile_pwrite(Efile_error* errInfo, int fd,
- char* buf, size_t count, Sint64 offset);
-int efile_pread(Efile_error* errInfo, int fd,
- Sint64 offset, char* buf, size_t count, size_t* pBytesRead);
-int efile_readlink(Efile_error* errInfo, char *name,
- char* buffer, size_t size);
-int efile_altname(Efile_error* errInfo, char *name,
- char* buffer, size_t size);
-int efile_link(Efile_error* errInfo, char* old, char* new);
-int efile_symlink(Efile_error* errInfo, char* old, char* new);
-int efile_may_openfile(Efile_error* errInfo, char *name);
-int efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length,
- int advise);
-#ifdef HAVE_SENDFILE
-int efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd,
- off_t *offset, Uint64 *nbytes, struct t_sendfile_hdtl *hdtl);
-#endif /* HAVE_SENDFILE */
-int efile_fallocate(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length);
diff --git a/erts/emulator/drivers/common/gzio.c b/erts/emulator/drivers/common/gzio.c
index 1ef1602ec9..86c3b07cea 100644
--- a/erts/emulator/drivers/common/gzio.c
+++ b/erts/emulator/drivers/common/gzio.c
@@ -19,726 +19,16 @@
#include <unistd.h>
#endif
#include <ctype.h>
+
#include "erl_driver.h"
-#include "erl_efile.h"
#include "sys.h"
-#ifdef __WIN32__
-#ifndef HAVE_CONFLICTING_FREAD_DECLARATION
-#define HAVE_CONFLICTING_FREAD_DECLARATION
-#endif
-#define FILENAMES_16BIT 1
-#endif
-
-#ifdef STDC
-# define zstrerror(errnum) strerror(errnum)
-#else
-# define zstrerror(errnum) ""
-#endif
-
#include "gzio_zutil.h"
#include "erl_zlib.h"
#include "gzio.h"
-/********struct internal_state {int dummy;}; / * for buggy compilers */
-
-#define Z_BUFSIZE 4096
-
-#define ALLOC(size) driver_alloc(size)
-#define TRYFREE(p) {if (p) driver_free(p);}
-
static int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
-/* gzip flag byte */
-#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
-#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
-#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
-#define COMMENT 0x10 /* bit 4 set: file comment present */
-#define RESERVED 0xE0 /* bits 5..7: reserved */
-
-typedef struct gz_stream {
- z_stream stream;
- int z_err; /* error code for last stream operation */
- int z_eof; /* set if end of input file */
-#ifdef UNIX
- int file; /* .gz file descriptor */
-#else
- FILE *file; /* .gz file */
-#endif
- Byte *inbuf; /* input buffer */
- Byte *outbuf; /* output buffer */
- uLong crc; /* crc32 of uncompressed data */
- char *msg; /* error message */
- char *path; /* path name for debugging only */
- int transparent; /* 1 if input file is not a .gz file */
- char mode; /* 'w' or 'r' */
- int position; /* Position (for seek) */
- int (*destroy)(struct gz_stream*); /* Function to destroy
- * this structure. */
-} gz_stream;
-
-local ErtsGzFile gz_open (const char *path, const char *mode);
-local int get_byte (gz_stream *s);
-local void check_header (gz_stream *s);
-local int destroy (gz_stream *s);
-local uLong getLong (gz_stream *s);
-
-#ifdef UNIX
-/*
- * In Solaris 8 and earlier, fopen() and its friends cannot handle
- * file descriptors larger than 255. Therefore, we use read()/write()
- * on all Unix systems.
- */
-# define ERTS_GZWRITE(File, Buf, Count) write((File), (Buf), (Count))
-# define ERTS_GZREAD(File, Buf, Count) read((File), (Buf), (Count))
-#else
-/*
- * On all other operating systems, using fopen(), fread()/fwrite(), since
- * there is not guaranteed to exist any read()/write() (not part of
- * ANSI/ISO-C).
- */
-# define ERTS_GZWRITE(File, Buf, Count) fwrite((Buf), 1, (Count), (File))
-# define ERTS_GZREAD(File, Buf, Count) fread((Buf), 1, (Count), (File))
-#endif
-
-/*
- * Ripped from efile_drv.c
- */
-
-#ifdef FILENAMES_16BIT
-# define FILENAME_BYTELEN(Str) filename_len_16bit(Str)
-# define FILENAME_COPY(To,From) filename_cpy_16bit((To),(From))
-# define FILENAME_CHARSIZE 2
-
- static int filename_len_16bit(const char *str)
- {
- const char *p = str;
- while(*p != '\0' || p[1] != '\0') {
- p += 2;
- }
- return (p - str);
- }
-
- static void filename_cpy_16bit(char *to, const char *from)
- {
- while(*from != '\0' || from[1] != '\0') {
- *to++ = *from++;
- *to++ = *from++;
- }
- *to++ = *from++;
- *to++ = *from++;
- }
-
-#else
-# define FILENAME_BYTELEN(Str) strlen(Str)
-# define FILENAME_COPY(To,From) strcpy(To,From)
-# define FILENAME_CHARSIZE 1
-#endif
-
-/* ===========================================================================
- Opens a gzip (.gz) file for reading or writing. The mode parameter
- is as in fopen ("rb" or "wb"). The file is given either by file descriptor
- or path name (if fd == -1).
- gz_open return NULL if the file could not be opened or if there was
- insufficient memory to allocate the (de)compression state; errno
- can be checked to distinguish the two cases (if errno is zero, the
- zlib error is Z_MEM_ERROR).
-*/
-local ErtsGzFile gz_open (path, mode)
- const char *path;
- const char *mode;
-{
- int err;
- int level = Z_DEFAULT_COMPRESSION; /* compression level */
- char *p = (char*)mode;
- gz_stream *s;
- char fmode[80]; /* copy of mode, without the compression level */
- char *m = fmode;
-
- if (!path || !mode) return Z_NULL;
-
- s = (gz_stream *)ALLOC(sizeof(gz_stream));
- if (!s) return Z_NULL;
-
- erl_zlib_alloc_init(&s->stream);
- s->stream.next_in = s->inbuf = Z_NULL;
- s->stream.next_out = s->outbuf = Z_NULL;
- s->stream.avail_in = s->stream.avail_out = 0;
-#ifdef UNIX
- s->file = -1;
-#else
- s->file = NULL;
-#endif
- s->z_err = Z_OK;
- s->z_eof = 0;
- s->crc = crc32(0L, Z_NULL, 0);
- s->msg = NULL;
- s->transparent = 0;
- s->position = 0;
- s->destroy = destroy;
-
- s->path = (char*)ALLOC(FILENAME_BYTELEN(path)+FILENAME_CHARSIZE);
- if (s->path == NULL) {
- return s->destroy(s), (ErtsGzFile)Z_NULL;
- }
- FILENAME_COPY(s->path, path); /* do this early for debugging */
-
- s->mode = '\0';
- do {
- if (*p == 'r')
- s->mode = 'r';
- if (*p == 'w' || *p == 'a')
- s->mode = 'w';
- if (isdigit((int)*p)) {
- level = *p - '0';
- } else {
- *m++ = *p; /* Copy the mode */
- }
- } while (*p++ && m < fmode + sizeof(fmode) - 1);
- *m = '\0';
- if (s->mode == '\0')
- return s->destroy(s), (ErtsGzFile)Z_NULL;
-
- if (s->mode == 'w') {
- err = deflateInit2(&(s->stream), level,
- Z_DEFLATED, MAX_WBITS+16, DEF_MEM_LEVEL, 0);
- /* windowBits is passed < 0 to suppress zlib header */
-
- s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
-
- if (err != Z_OK || s->outbuf == Z_NULL) {
- return s->destroy(s), (ErtsGzFile)Z_NULL;
- }
- } else {
- /*
- * It is tempting to use the built-in support in zlib
- * for handling GZIP headers, but unfortunately it
- * cannot handle multiple GZIP headers (which occur when
- * several GZIP files have been concatenated).
- */
-
- err = inflateInit2(&(s->stream), -MAX_WBITS);
- s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE);
-
- if (err != Z_OK || s->inbuf == Z_NULL) {
- return s->destroy(s), (ErtsGzFile)Z_NULL;
- }
- }
- s->stream.avail_out = Z_BUFSIZE;
-
- errno = 0;
-#if defined(FILENAMES_16BIT)
- {
- FILE* efile_wfopen(const WCHAR* name, const WCHAR* mode);
- WCHAR wfmode[80];
- int i = 0;
- int j;
- for(j = 0; fmode[j] != '\0'; ++j) {
- wfmode[i++] = (WCHAR) fmode[j];
- }
- wfmode[i++] = L'\0';
- s->file = efile_wfopen((WCHAR *)path, wfmode);
- if (s->file == NULL) {
- return s->destroy(s), (ErtsGzFile)Z_NULL;
- }
- }
-#elif defined(UNIX)
- if (s->mode == 'r') {
- s->file = open(path, O_RDONLY);
- } else {
- s->file = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666);
- }
- if (s->file == -1) {
- return s->destroy(s), (ErtsGzFile)Z_NULL;
- }
-#else
- s->file = fopen(path, fmode);
- if (s->file == NULL) {
- return s->destroy(s), (ErtsGzFile)Z_NULL;
- }
-#endif
- if (s->mode == 'r') {
- check_header(s); /* skip the .gz header */
- }
- return (ErtsGzFile)s;
-}
-
-/* ===========================================================================
- Rewind a gzfile back to the beginning.
-*/
-
-local int gz_rewind (gz_stream *s)
-{
- TRYFREE(s->msg);
-
-#ifdef UNIX
- lseek(s->file, 0L, SEEK_SET);
-#else
- fseek(s->file, 0L, SEEK_SET);
-#endif
- inflateReset(&(s->stream));
- s->stream.next_in = Z_NULL;
- s->stream.next_out = Z_NULL;
- s->stream.avail_in = s->stream.avail_out = 0;
- s->z_err = Z_OK;
- s->z_eof = 0;
- s->crc = crc32(0L, Z_NULL, 0);
- s->msg = NULL;
- s->position = 0;
- s->stream.next_in = s->inbuf;
-
- s->stream.avail_out = Z_BUFSIZE;
-
- check_header(s); /* skip the .gz header */
- return 1;
-}
-
-/* ===========================================================================
- Opens a gzip (.gz) file for reading or writing.
-*/
-ErtsGzFile erts_gzopen (path, mode)
- const char *path;
- const char *mode;
-{
- return gz_open (path, mode);
-}
-
-
-/* ===========================================================================
- Read a byte from a gz_stream; update next_in and avail_in. Return EOF
- for end of file.
- IN assertion: the stream s has been sucessfully opened for reading.
-*/
-local int get_byte(s)
- gz_stream *s;
-{
- if (s->z_eof) return EOF;
- if (s->stream.avail_in == 0) {
-#ifdef UNIX
- ssize_t res;
- errno = 0;
- res = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE);
- if (res == 0) {
- s->stream.avail_in = 0;
- s->z_eof = 1;
- return EOF;
- } else if (res < 0) {
- s->stream.avail_in = 0;
- s->z_eof = 1;
- s->z_err = Z_ERRNO;
- return EOF;
- } else {
- s->stream.avail_in = (uInt) res;
- }
-#else
- errno = 0;
- s->stream.avail_in = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE);
- if (s->stream.avail_in == 0) {
- s->z_eof = 1;
- if (s->file && ferror(s->file))
- s->z_err = Z_ERRNO;
- return EOF;
- }
-#endif
- s->stream.next_in = s->inbuf;
- }
- s->stream.avail_in--;
- return *(s->stream.next_in)++;
-}
-
-/* ===========================================================================
- Check the gzip header of a gz_stream opened for reading. Set the stream
- mode to transparent if the gzip magic header is not present; set s->err
- to Z_DATA_ERROR if the magic header is present but the rest of the header
- is incorrect.
- IN assertion: the stream s has already been created sucessfully;
- s->stream.avail_in is zero for the first time, but may be non-zero
- for concatenated .gz files.
-*/
-local void check_header(s)
- gz_stream *s;
-{
- int method; /* method byte */
- int flags; /* flags byte */
- uInt len;
- int c;
-
- /* Check the gzip magic header */
- for (len = 0; len < 2; len++) {
- c = get_byte(s);
- if (c != gz_magic[len]) {
- if (len != 0) s->stream.avail_in++, s->stream.next_in--;
- if (c != EOF) {
- s->stream.avail_in++, s->stream.next_in--;
- s->transparent = 1;
- }
- s->z_err = s->stream.avail_in != 0 ? Z_OK : Z_STREAM_END;
- return;
- }
- }
- method = get_byte(s);
- flags = get_byte(s);
- if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
- s->z_err = Z_DATA_ERROR;
- return;
- }
-
- /* Discard time, xflags and OS code: */
- for (len = 0; len < 6; len++) (void)get_byte(s);
-
- if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
- len = (uInt)get_byte(s);
- len += ((uInt)get_byte(s))<<8;
- /* len is garbage if EOF but the loop below will quit anyway */
- while (len-- != 0 && get_byte(s) != EOF) ;
- }
- if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
- while ((c = get_byte(s)) != 0 && c != EOF) ;
- }
- if ((flags & COMMENT) != 0) { /* skip the .gz file comment */
- while ((c = get_byte(s)) != 0 && c != EOF) ;
- }
- if ((flags & HEAD_CRC) != 0) { /* skip the header crc */
- for (len = 0; len < 2; len++) (void)get_byte(s);
- }
- s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK;
-}
-
- /* ===========================================================================
- * Cleanup then free the given gz_stream. Return a zlib error code.
- Try freeing in the reverse order of allocations.
- */
-local int destroy (s)
- gz_stream *s;
-{
- int err = Z_OK;
-
- if (!s) return Z_STREAM_ERROR;
-
- TRYFREE(s->msg);
-
- if (s->stream.state != NULL) {
- if (s->mode == 'w') {
- err = deflateEnd(&(s->stream));
- } else if (s->mode == 'r') {
- err = inflateEnd(&(s->stream));
- }
- }
-#ifdef UNIX
- if (s->file != -1 && close(s->file)) {
- err = Z_ERRNO;
- }
-#else
- if (s->file != NULL && fclose(s->file)) {
- err = Z_ERRNO;
- }
-#endif
- if (s->z_err < 0) err = s->z_err;
-
- TRYFREE(s->inbuf);
- TRYFREE(s->outbuf);
- TRYFREE(s->path);
- TRYFREE(s);
- return err;
-}
-
-/* ===========================================================================
- Reads the given number of uncompressed bytes from the compressed file.
- gzread returns the number of bytes actually read (0 for end of file).
-*/
-int
-erts_gzread(ErtsGzFile file, voidp buf, unsigned len)
-{
- gz_stream *s = (gz_stream*)file;
- Bytef *start = buf; /* starting point for crc computation */
- Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */
-
- if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR;
-
- if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1;
- if (s->z_err == Z_STREAM_END) return 0; /* EOF */
-
- s->stream.next_out = next_out = buf;
- s->stream.avail_out = len;
-
- while (s->stream.avail_out != 0) {
-
- if (s->transparent) {
- /* Copy first the lookahead bytes: */
- uInt n = s->stream.avail_in;
- if (n > s->stream.avail_out) n = s->stream.avail_out;
- if (n > 0) {
- zmemcpy(s->stream.next_out, s->stream.next_in, n);
- next_out += n;
- s->stream.next_out = next_out;
- s->stream.next_in += n;
- s->stream.avail_out -= n;
- s->stream.avail_in -= n;
- }
- if (s->stream.avail_out > 0) {
- s->stream.avail_out -= ERTS_GZREAD(s->file, next_out,
- s->stream.avail_out);
- }
- len -= s->stream.avail_out;
- s->stream.total_in += (uLong)len;
- s->stream.total_out += (uLong)len;
- if (len == 0) s->z_eof = 1;
- s->position += (int)len;
- return (int)len;
- }
- if (s->stream.avail_in == 0 && !s->z_eof) {
-#ifdef UNIX
- ssize_t res;
- errno = 0;
- res = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE);
- if (res == 0) {
- s->stream.avail_in = 0;
- s->z_eof = 1;
- return EOF;
- } else if (res < 0) {
- s->stream.avail_in = 0;
- s->z_eof = 1;
- s->z_err = Z_ERRNO;
- return EOF;
- } else {
- s->stream.avail_in = (uInt) res;
- }
-#else
- errno = 0;
- s->stream.avail_in = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE);
- if (s->stream.avail_in == 0) {
- s->z_eof = 1;
- if (s->file && ferror(s->file)) {
- s->z_err = Z_ERRNO;
- break;
- }
- }
-#endif
- s->stream.next_in = s->inbuf;
- }
- s->z_err = inflate(&(s->stream), Z_NO_FLUSH);
-
- if (s->z_err == Z_STREAM_END) {
- /* Check CRC and original size */
- s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
- start = s->stream.next_out;
-
- if (getLong(s) != s->crc) {
- s->z_err = Z_DATA_ERROR;
- } else {
- (void)getLong(s);
- /* The uncompressed length returned by above getlong() may
- * be different from s->stream.total_out) in case of
- * concatenated .gz files. Check for such files:
- */
- check_header(s);
- if (s->z_err == Z_OK) {
- uLong total_in = s->stream.total_in;
- uLong total_out = s->stream.total_out;
-
- inflateReset(&(s->stream));
- s->stream.total_in = total_in;
- s->stream.total_out = total_out;
- s->crc = crc32(0L, Z_NULL, 0);
- }
- }
- }
- if (s->z_err != Z_OK || s->z_eof) break;
- }
- s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
-
- s->position += (int)(len - s->stream.avail_out);
-
- return (int)(len - s->stream.avail_out);
-}
-
-/* ===========================================================================
- Writes the given number of uncompressed bytes into the compressed file.
- gzwrite returns the number of bytes actually written (0 in case of error).
-*/
-int
-erts_gzwrite(ErtsGzFile file, voidp buf, unsigned len)
-{
- gz_stream *s = (gz_stream*)file;
-
- if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
-
- s->stream.next_in = buf;
- s->stream.avail_in = len;
-
- while (s->stream.avail_in != 0) {
-
- if (s->stream.avail_out == 0) {
-
- s->stream.next_out = s->outbuf;
- if (ERTS_GZWRITE(s->file, s->outbuf, Z_BUFSIZE) != Z_BUFSIZE) {
- s->z_err = Z_ERRNO;
- break;
- }
- s->stream.avail_out = Z_BUFSIZE;
- }
- s->z_err = deflate(&(s->stream), Z_NO_FLUSH);
- if (s->z_err != Z_OK) break;
- }
- s->position += (int)(len - s->stream.avail_in);
- return (int)(len - s->stream.avail_in);
-}
-
-/*
- * For use by Erlang file driver.
- *
- * XXX Limitations:
- * - SEEK_END is not allowed (length of file is not known).
- * - When writing, only forward seek is supported.
- */
-
-int
-erts_gzseek(ErtsGzFile file, int offset, int whence)
-{
- int pos;
- gz_stream* s = (gz_stream *) file;
-
- switch (whence) {
- case EFILE_SEEK_SET: whence = SEEK_SET; break;
- case EFILE_SEEK_CUR: whence = SEEK_CUR; break;
- case EFILE_SEEK_END: whence = SEEK_END; break;
- default:
- errno = EINVAL;
- return -1;
- }
-
- if (s == NULL) {
- errno = EINVAL;
- return -1;
- }
- if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) {
- errno = EIO;
- return -1;
- }
-
- switch (whence) {
- case SEEK_SET: pos = offset; break;
- case SEEK_CUR: pos = s->position+offset; break;
- case SEEK_END:
- default:
- errno = EINVAL; return -1;
- }
-
- if (pos == s->position) {
- return pos;
- }
-
- if (pos < s->position) {
- if (s->mode == 'w') {
- errno = EINVAL;
- return -1;
- }
- gz_rewind(s);
- }
-
- while (s->position < pos) {
- char buf[512];
- int n;
- int save_pos = s->position;
-
- n = pos - s->position;
- if (n > sizeof(buf))
- n = sizeof(buf);
-
- if (s->mode == 'r') {
- erts_gzread(file, buf, n);
- } else {
- memset(buf, '\0', n);
- erts_gzwrite(file, buf, n);
- }
- if (save_pos == s->position) break;
- }
-
- return s->position;
-}
-
-/* ===========================================================================
- Flushes all pending output into the compressed file. The parameter
- flush is as in the deflate() function.
- gzflush should be called only when strictly necessary because it can
- degrade compression.
-*/
-int
-erts_gzflush(ErtsGzFile file, int flush)
-{
- uInt len;
- int done = 0;
- gz_stream *s = (gz_stream*)file;
-
- if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
-
- s->stream.avail_in = 0; /* should be zero already anyway */
-
- for (;;) {
- len = Z_BUFSIZE - s->stream.avail_out;
-
- if (len != 0) {
- if ((uInt)ERTS_GZWRITE(s->file, s->outbuf, len) != len) {
- s->z_err = Z_ERRNO;
- return Z_ERRNO;
- }
- s->stream.next_out = s->outbuf;
- s->stream.avail_out = Z_BUFSIZE;
- }
- if (done) break;
- s->z_err = deflate(&(s->stream), flush);
-
- /* deflate has finished flushing only when it hasn't used up
- * all the available space in the output buffer:
- */
- done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END);
-
- if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break;
- }
-#ifndef UNIX
- fflush(s->file);
-#endif
- return s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
-}
-
-/* ===========================================================================
- Reads a long in LSB order from the given gz_stream. Sets
-*/
-local uLong getLong (s)
- gz_stream *s;
-{
- uLong x = (uLong)get_byte(s);
- int c;
-
- x += ((uLong)get_byte(s))<<8;
- x += ((uLong)get_byte(s))<<16;
- c = get_byte(s);
- if (c == EOF) s->z_err = Z_DATA_ERROR;
- x += ((uLong)c)<<24;
- return x;
-}
-
-/* ===========================================================================
- Flushes all pending output if necessary, closes the compressed file
- and deallocates all the (de)compression state.
-*/
-int
-erts_gzclose(ErtsGzFile file)
-{
- int err;
- gz_stream *s = (gz_stream*)file;
-
- if (s == NULL) return Z_STREAM_ERROR;
-
- if (s->mode == 'w') {
- err = erts_gzflush (file, Z_FINISH);
- if (err != Z_OK) return s->destroy(s);
- }
- return s->destroy(s);
-}
-
-
/* ===========================================================================
Uncompresses the buffer given and returns a pointer to a binary.
If the buffer was not compressed with gzip, the buffer contents
diff --git a/erts/emulator/drivers/common/gzio.h b/erts/emulator/drivers/common/gzio.h
index ee0ebe7bd8..20433a1a17 100644
--- a/erts/emulator/drivers/common/gzio.h
+++ b/erts/emulator/drivers/common/gzio.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1999-2016. All Rights Reserved.
+ * Copyright Ericsson AB 1999-2018. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,13 +20,5 @@
#include "zlib.h"
-typedef struct erts_gzFile* ErtsGzFile;
-
-ErtsGzFile erts_gzopen (const char *path, const char *mode);
-int erts_gzread(ErtsGzFile file, voidp buf, unsigned len);
-int erts_gzwrite(ErtsGzFile file, voidp buf, unsigned len);
-int erts_gzseek(ErtsGzFile, int, int);
-int erts_gzflush(ErtsGzFile file, int flush);
-int erts_gzclose(ErtsGzFile file);
ErlDrvBinary* erts_gzinflate_buffer(char*, uLong);
ErlDrvBinary* erts_gzdeflate_buffer(char*, uLong);
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 1885338ce5..47eb5df7dd 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2016. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2018. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -38,6 +38,7 @@
#include <ctype.h>
#include <sys/types.h>
#include <errno.h>
+#include <stdint.h>
#define IDENTITY(c) c
#define STRINGIFY_1(b) IDENTITY(#b)
@@ -63,6 +64,20 @@
#include <sys/un.h>
#endif
+#ifdef HAVE_SENDFILE
+#if defined(__linux__) || (defined(__sun) && defined(__SVR4))
+ #include <sys/sendfile.h>
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+ /* Need to define __BSD_VISIBLE in order to expose prototype of sendfile */
+ #define __BSD_VISIBLE 1
+ #include <sys/socket.h>
+#endif
+#endif
+
+#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
+ #define __DARWIN__ 1
+#endif
+
/* All platforms fail on malloc errors. */
#define FATAL_MALLOC
@@ -591,7 +606,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
(((unsigned char*) (s))[1] << 8) | \
(((unsigned char*) (s))[0]))
-#ifdef HAVE_SYS_UN_H
+#if defined(HAVE_SYS_UN_H) || defined(SO_BINDTODEVICE)
/* strnlen doesn't exist everywhere */
static size_t my_strnlen(const char *s, size_t maxlen)
@@ -602,14 +617,6 @@ static size_t my_strnlen(const char *s, size_t maxlen)
return i;
}
-/* Check that some character in the buffer != '\0' */
-static int is_nonzero(const char *s, size_t n)
-{
- size_t i;
- for (i = 0; i < n; i++) if (s[i] != '\0') return !0;
- return 0;
-}
-
#endif
#ifdef VALGRIND
@@ -618,6 +625,26 @@ static int is_nonzero(const char *s, size_t n)
# define VALGRIND_MAKE_MEM_DEFINED(ptr,size)
#endif
+#ifndef __WIN32__
+/* Calculate CMSG_NXTHDR without having a struct msghdr*.
+ * CMSG_LEN only caters for alignment for start of data.
+ * To get how much to advance we need to use CMSG_SPACE
+ * on the payload length. To get the payload length we
+ * take the calculated cmsg->cmsg_len and subtract the
+ * header length. To get the header length we use
+ * the pointer difference from the cmsg start pointer
+ * to the CMSG_DATA(cmsg) pointer.
+ */
+#define LEN_CMSG_DATA(cmsg) \
+ ((cmsg)->cmsg_len - ((char*)CMSG_DATA(cmsg) - (char*)(cmsg)))
+#define NXT_CMSG_HDR(cmsg) \
+ ((struct cmsghdr*)(((char*)(cmsg)) + CMSG_SPACE(LEN_CMSG_DATA(cmsg))))
+#endif
+
+#if !defined(IPV6_PKTOPTIONS) && defined(IPV6_2292PKTOPTIONS)
+#define IPV6_PKTOPTIONS IPV6_2292PKTOPTIONS
+#endif
+
/*
Magic errno value used locally for return of {error, system_limit}
- the emulator definition of SYSTEM_LIMIT is not available here.
@@ -709,6 +736,7 @@ static int is_nonzero(const char *s, size_t n)
#define TCP_REQ_RECV 42
#define TCP_REQ_UNRECV 43
#define TCP_REQ_SHUTDOWN 44
+#define TCP_REQ_SENDFILE 45
/* UDP and SCTP requests */
#define PACKET_REQ_RECV 60 /* Common for UDP and SCTP */
/* #define SCTP_REQ_LISTEN 61 MERGED Different from TCP; not for UDP */
@@ -728,9 +756,10 @@ static int is_nonzero(const char *s, size_t n)
#define TCP_ADDF_PENDING_SHUTDOWN \
(TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR)
#define TCP_ADDF_SHOW_ECONNRESET 64 /* Tell user about incoming RST */
-#define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occured on send or shutdown */
+#define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occurred on send or shutdown */
#define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */
#define TCP_ADDF_LINGER_ZERO 512 /* Discard driver queue on port close */
+#define TCP_ADDF_SENDFILE 1024 /* Send from an fd instead of the driver queue */
/* *_REQ_* replies */
#define INET_REP_ERROR 0
@@ -778,6 +807,13 @@ static int is_nonzero(const char *s, size_t n)
#define INET_LOPT_TCP_SHOW_ECONNRESET 39 /* tell user about incoming RST */
#define INET_LOPT_LINE_DELIM 40 /* Line delimiting char */
#define INET_OPT_TCLASS 41 /* IPv6 transport class */
+#define INET_OPT_BIND_TO_DEVICE 42 /* get/set network device the socket is bound to */
+#define INET_OPT_RECVTOS 43 /* IP_RECVTOS ancillary data */
+#define INET_OPT_RECVTCLASS 44 /* IPV6_RECVTCLASS ancillary data */
+#define INET_OPT_PKTOPTIONS 45 /* IP(V6)_PKTOPTIONS get ancillary data */
+#define INET_OPT_TTL 46 /* IP_TTL */
+#define INET_OPT_RECVTTL 47 /* IP_RECVTTL ancillary data */
+#define TCP_OPT_NOPUSH 48 /* super-Nagle, aka TCP_CORK */
/* SCTP options: a separate range, from 100: */
#define SCTP_OPT_RTOINFO 100
#define SCTP_OPT_ASSOCINFO 101
@@ -853,6 +889,11 @@ static int is_nonzero(const char *s, size_t n)
#define SCTP_FLAG_SACDELAY_ENABLE (32 /* am_sackdelay_enable */)
#define SCTP_FLAG_SACDELAY_DISABLE (64 /* am_sackdelay_disable */)
+/* Flags for recv_cmsgflags */
+#define INET_CMSG_RECVTOS (1 << 0) /* am_recvtos, am_tos */
+#define INET_CMSG_RECVTCLASS (1 << 1) /* am_recvtclass, am_tclass */
+#define INET_CMSG_RECVTTL (1 << 2) /* am_recvttl, am_ttl */
+
/*
** End of interface constants.
**--------------------------------------------------------------------------*/
@@ -907,9 +948,21 @@ static int is_nonzero(const char *s, size_t n)
#ifdef HAVE_SCTP
#define PACKET_ERL_DRV_TERM_DATA_LEN 512
#else
+#ifndef __WIN32__
+/* Assume we have recvmsg() and might need room for ancillary data */
+#define PACKET_ERL_DRV_TERM_DATA_LEN 64
+#else
#define PACKET_ERL_DRV_TERM_DATA_LEN 32
#endif
+#endif
+typedef struct _tcp_descriptor tcp_descriptor;
+
+#if defined(TCP_CORK)
+#define INET_TCP_NOPUSH TCP_CORK
+#elif defined(TCP_NOPUSH) && !defined(__DARWIN__)
+#define INET_TCP_NOPUSH TCP_NOPUSH
+#endif
#define BIN_REALLOC_MARGIN(x) ((x)/4) /* 25% */
@@ -959,16 +1012,19 @@ typedef struct _multi_timer_data {
struct _multi_timer_data *prev;
} MultiTimerData;
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
- ErlDrvTermData caller, unsigned timeout,
- void (*timeout_fun)(ErlDrvData drv_data,
- ErlDrvTermData caller));
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ ErlDrvTermData caller, unsigned timeout,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data);
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p);
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p);
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller);
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port);
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port);
typedef struct {
int id; /* id used to identify reply */
@@ -1025,6 +1081,7 @@ typedef struct {
inet_async_op* oph; /* queue head or NULL */
inet_async_op* opt; /* queue tail or NULL */
inet_async_op op_queue[INET_MAX_ASYNC]; /* call queue */
+ int op_ref; /* queue reference generator */
int active; /* 0 = passive, 1 = active, 2 = active once */
Sint16 active_count; /* counter for {active,N} */
@@ -1074,6 +1131,7 @@ typedef struct {
char *netns; /* Socket network namespace name
as full file path */
#endif
+ int recv_cmsgflags; /* Which ancillary data to expect */
} inet_descriptor;
@@ -1143,7 +1201,6 @@ static int packet_inet_init(void);
static void packet_inet_stop(ErlDrvData);
static void packet_inet_command(ErlDrvData, char*, ErlDrvSizeT);
static void packet_inet_drv_input(ErlDrvData data, ErlDrvEvent event);
-static void packet_inet_drv_output(ErlDrvData data, ErlDrvEvent event);
static ErlDrvData udp_inet_start(ErlDrvPort, char* command);
#ifdef HAVE_SCTP
static ErlDrvData sctp_inet_start(ErlDrvPort, char* command);
@@ -1168,7 +1225,7 @@ static struct erl_drv_entry udp_inet_driver_entry =
NULL,
#else
packet_inet_drv_input,
- packet_inet_drv_output,
+ NULL,
#endif
"udp_inet",
NULL,
@@ -1203,7 +1260,7 @@ static struct erl_drv_entry sctp_inet_driver_entry =
NULL,
#else
packet_inet_drv_input,
- packet_inet_drv_output,
+ NULL,
#endif
"sctp_inet",
NULL,
@@ -1226,7 +1283,7 @@ static struct erl_drv_entry sctp_inet_driver_entry =
};
#endif
-typedef struct {
+struct _tcp_descriptor {
inet_descriptor inet; /* common data structure (DON'T MOVE) */
int high; /* high watermark */
int low; /* low watermark */
@@ -1242,8 +1299,24 @@ typedef struct {
int http_state; /* 0 = response|request 1=headers fields */
inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */
inet_async_multi_op *multi_last;
- MultiTimerData *mtd; /* Timer structures for multiple accept */
-} tcp_descriptor;
+ MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd_cache; /* A cache for timer allocations */
+#ifdef HAVE_SENDFILE
+ struct {
+ ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time
+ * sendfile was issued, which must be sent
+ * before issuing the sendfile call itself. */
+ int dup_file_fd; /* The file handle to send from; this is
+ * duplicated when sendfile is issued to
+ * reduce (but not eliminate) the impact of a
+ * nasty race, so we have to remember to close
+ * it. */
+ Uint64 bytes_sent;
+ Uint64 offset;
+ Uint64 length;
+ } sendfile;
+#endif
+};
/* send function */
static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len);
@@ -1253,9 +1326,16 @@ static int tcp_deliver(tcp_descriptor* desc, int len);
static int tcp_shutdown_error(tcp_descriptor* desc, int err);
+#ifdef HAVE_SENDFILE
+static int tcp_inet_sendfile(tcp_descriptor* desc);
+static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error);
+#endif
+
static int tcp_inet_output(tcp_descriptor* desc, HANDLE event);
static int tcp_inet_input(tcp_descriptor* desc, HANDLE event);
+static void tcp_desc_close(tcp_descriptor*);
+
#ifdef HAVE_UDP
typedef struct {
inet_descriptor inet; /* common data structure (DON'T MOVE) */
@@ -1267,14 +1347,12 @@ typedef struct {
static int packet_inet_input(udp_descriptor* udesc, HANDLE event);
-static int packet_inet_output(udp_descriptor* udesc, HANDLE event);
#endif
/* convert descriptor pointer to inet_descriptor pointer */
#define INETP(d) (&(d)->inet)
-static int async_ref = 0; /* async reference id generator */
-#define NEW_ASYNC_ID() ((async_ref++) & 0xffff)
+#define NEW_ASYNC_ID(desc) ((desc)->op_ref++ & 0xffff)
/* check for transition from active to passive */
#define INET_CHECK_ACTIVE_TO_PASSIVE(inet) \
@@ -1310,6 +1388,11 @@ static ErlDrvTermData am_udp_error;
#ifdef HAVE_SYS_UN_H
static ErlDrvTermData am_local;
#endif
+#ifndef __WIN32__
+static ErlDrvTermData am_tos;
+static ErlDrvTermData am_tclass;
+static ErlDrvTermData am_ttl;
+#endif
#ifdef HAVE_SCTP
static ErlDrvTermData am_sctp;
static ErlDrvTermData am_sctp_passive;
@@ -1330,10 +1413,15 @@ static ErlDrvTermData am_sndbuf;
static ErlDrvTermData am_reuseaddr;
static ErlDrvTermData am_dontroute;
static ErlDrvTermData am_priority;
-static ErlDrvTermData am_tos;
-static ErlDrvTermData am_tclass;
+static ErlDrvTermData am_recvtos;
+static ErlDrvTermData am_recvtclass;
+static ErlDrvTermData am_recvttl;
static ErlDrvTermData am_ipv6_v6only;
static ErlDrvTermData am_netns;
+static ErlDrvTermData am_bind_to_device;
+#endif
+#ifdef HAVE_SENDFILE
+static ErlDrvTermData am_sendfile;
#endif
static char str_eafnosupport[] = "eafnosupport";
@@ -1518,8 +1606,10 @@ static void *realloc_wrapper(void *current, ErlDrvSizeT size){
# define ASSOC_ID_LEN 4
# define LOAD_ASSOC_ID LOAD_UINT
# define LOAD_ASSOC_ID_CNT LOAD_UINT_CNT
-# define SCTP_ANC_BUFF_SIZE INET_DEF_BUFFER/2 /* XXX: not very good... */
+#else
+# define IS_SCTP(desc) 0
#endif
+# define ANC_BUFF_SIZE INET_DEF_BUFFER/2 /* XXX: not very good... */
#ifdef HAVE_UDP
static int load_address(ErlDrvTermData* spec, int i, char* buf)
@@ -1755,6 +1845,7 @@ static void release_buffer(ErlDrvBinary* buf)
#ifdef HAVE_UDP
static ErlDrvBinary* realloc_buffer(ErlDrvBinary* buf, ErlDrvSizeT newsz)
{
+ DEBUGF(("realloc_buffer: %ld -> %ld\r\n", (buf==NULL) ? 0 : buf->orig_size, newsz));
return driver_realloc_binary(buf, newsz);
}
#endif
@@ -1925,7 +2016,7 @@ static void enq_multi_op(tcp_descriptor *desc, char *buf, int req,
ErlDrvTermData caller, MultiTimerData *timeout,
ErlDrvMonitor *monitorp)
{
- int id = NEW_ASYNC_ID();
+ int id = NEW_ASYNC_ID(INETP(desc));
enq_old_multi_op(desc,id,req,caller,timeout,monitorp);
if (buf != NULL)
put_int16(id, buf);
@@ -1994,7 +2085,7 @@ static int remove_multi_op(tcp_descriptor *desc, int *id_p, int *req_p,
static int enq_async_w_tmo(inet_descriptor* desc, char* buf, int req, unsigned timeout,
ErlDrvMonitor *monitorp)
{
- int id = NEW_ASYNC_ID();
+ int id = NEW_ASYNC_ID(desc);
inet_async_op* opp;
if ((opp = desc->oph) == NULL) /* queue empty */
@@ -2203,13 +2294,16 @@ static int inet_reply_ok(inet_descriptor* desc)
ErlDrvTermData caller = desc->caller;
int i = 0;
+ desc->caller = 0;
+ if (is_not_internal_pid(caller))
+ return 0;
+
i = LOAD_ATOM(spec, i, am_inet_reply);
i = LOAD_PORT(spec, i, desc->dport);
i = LOAD_ATOM(spec, i, am_ok);
i = LOAD_TUPLE(spec, i, 3);
ASSERT(i == sizeof(spec)/sizeof(*spec));
- desc->caller = 0;
return erl_drv_send_term(desc->dport, caller, spec, i);
}
@@ -2716,6 +2810,66 @@ static int inet_async_data(inet_descriptor* desc, const char* buf, int len)
}
}
+#ifndef __WIN32__
+static int load_cmsg_int(ErlDrvTermData *spec, int i,
+ struct cmsghdr *cmsg) {
+ union u {
+ byte uint8;
+ Uint16 uint16;
+ Uint32 uint32;
+ Uint64 uint64;
+ } *p;
+ p = (union u*) CMSG_DATA(cmsg);
+ switch (LEN_CMSG_DATA(cmsg) * CHAR_BIT) {
+ case 8:
+ return LOAD_INT(spec, i, p->uint8);
+ case 16:
+ return LOAD_INT(spec, i, p->uint16);
+
+ case 32:
+ return LOAD_INT(spec, i, p->uint32);
+
+ case 64:
+ return LOAD_INT(spec, i, p->uint64);
+ }
+ return LOAD_INT(spec, i, 0);
+}
+
+static int parse_ancillary_data_item(ErlDrvTermData *spec, int i,
+ struct cmsghdr *cmsg, int *n) {
+#define LOAD_CMSG_INT(proto, type, am) \
+ if (cmsg->cmsg_level == (proto) && \
+ cmsg->cmsg_type == (type)) { \
+ i = LOAD_ATOM(spec, i, (am)); \
+ i = load_cmsg_int(spec, i, cmsg); \
+ i = LOAD_TUPLE(spec, i, 2); \
+ (*n)++; \
+ return i; \
+ }
+#if defined(IPPROTO_IP) && defined(IP_TOS)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_TOS, am_tos);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
+ LOAD_CMSG_INT(IPPROTO_IPV6, IPV6_TCLASS, am_tclass);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_TTL)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_TTL, am_ttl);
+#endif
+ /* BSD uses the RECV* names in CMSG fields */
+#if defined(IPPROTO_IP) && defined(IP_RECVTOS)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_RECVTOS, am_tos);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_RECVTCLASS)
+ LOAD_CMSG_INT(IPPROTO_IPV6, IPV6_RECVTCLASS, am_tclass);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_RECVTTL)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_RECVTTL, am_ttl);
+#endif
+#undef LOAD_CMSG_INT
+ return i;
+}
+#endif /* #ifndef __WIN32__ */
+
#ifdef HAVE_SCTP
/*
** SCTP-related atoms:
@@ -2847,11 +3001,18 @@ static int sctp_parse_ancillary_data
for (cmsg = frst_msg; cmsg != NULL; cmsg = CMSG_NXTHDR(mptr,cmsg))
{
struct sctp_sndrcvinfo * sri;
-
+#ifndef __WIN32
+ int old_s;
+
+ /* Parse ancillary data common to UDP */
+ old_s = s;
+ i = parse_ancillary_data_item(spec, i, cmsg, &s);
+ if (s > old_s) continue;
/* Skip other possible ancillary data, e.g. from IPv6: */
if (cmsg->cmsg_level != IPPROTO_SCTP ||
cmsg->cmsg_type != SCTP_SNDRCV)
continue;
+#endif
if (((char*)cmsg + cmsg->cmsg_len) - (char*)frst_msg >
mptr->msg_controllen)
@@ -3191,6 +3352,23 @@ static int sctp_parse_async_event
}
#endif /* HAVE_SCTP */
+#ifndef __WIN32__
+static int udp_parse_ancillary_data(ErlDrvTermData *spec, int i,
+ struct msghdr *mptr) {
+ struct cmsghdr *cmsg;
+ int n;
+
+ n = 0;
+ for (cmsg = CMSG_FIRSTHDR(mptr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR(mptr, cmsg)) {
+ i = parse_ancillary_data_item(spec, i, cmsg, &n);
+ }
+ i = LOAD_NIL(spec, i);
+ return LOAD_LIST(spec, i, n+1);
+}
+#endif /* ifndef __WIN32__ */
+
/*
** passive mode reply:
** for UDP:
@@ -3213,7 +3391,7 @@ static int sctp_parse_async_event
static int
inet_async_binary_data
(inet_descriptor* desc, unsigned int phsz,
- ErlDrvBinary * bin, int offs, int len, void * extra)
+ ErlDrvBinary * bin, int offs, int len, void *mp)
{
unsigned int hsz = desc->hsz + phsz;
ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN];
@@ -3246,9 +3424,10 @@ inet_async_binary_data
if (IS_SCTP(desc))
{ /* For SCTP we always have desc->hsz==0 (i.e., no application-level
headers are used), so hsz==phsz (see above): */
- struct msghdr* mptr;
int sz;
-
+ struct msghdr *mptr;
+
+ mptr = mp;
ASSERT (hsz == phsz && hsz != 0);
sz = len - hsz; /* Size of the msg data proper, w/o the addr */
@@ -3256,7 +3435,6 @@ inet_async_binary_data
i = LOAD_STRING(spec, i, bin->orig_bytes+offs, hsz);
/* Put in the list (possibly empty) of Ancillary Data: */
- mptr = (struct msghdr *) extra;
i = sctp_parse_ancillary_data (spec, i, mptr);
/* Then: Data or Event (Notification)? */
@@ -3285,20 +3463,32 @@ inet_async_binary_data
}
else
#endif /* HAVE_SCTP */
- /* Generic case. Both Addr and Data (or a single list of them together) are
- returned: */
+ {
+ /* Generic case. Both Addr and Data
+ * (or a single list of them together) are returned: */
- if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
- /* INET_MODE_LIST => [H1,H2,...Hn] */
- i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
- }
- else {
- /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] or [Binary]: */
- int sz = len - hsz;
- i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
- if (hsz > 0)
- i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
+ if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
+ /* INET_MODE_LIST => [H1,H2,...Hn] */
+ i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
+ }
+ else {
+ /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] or [Binary]: */
+ int sz = len - hsz;
+ i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
+ if (hsz > 0)
+ i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
+ }
+
+#ifndef __WIN32__
+ if (mp) {
+ /* We got ancillary data from an UDP recvmsg.
+ * Insert an additional tuple level {[F|AddrData],AncData} */
+ i = udp_parse_ancillary_data(spec, i, (struct msghdr*)mp);
+ i = LOAD_TUPLE(spec, i, 2);
+ }
+#endif
}
+
/* Close up the {ok, ...} or {error, ...} tuple: */
i = LOAD_TUPLE(spec, i, 2);
@@ -3430,8 +3620,9 @@ static int tcp_error_message(tcp_descriptor* desc, int err)
** [AddrLen, H2,...,HSz] are msg headers for UDP AF_UNIX only
** Data : List() | Binary()
*/
-static int packet_binary_message
- (inet_descriptor* desc, ErlDrvBinary* bin, int offs, int len, void* extra)
+static int packet_binary_message(inet_descriptor* desc,
+ ErlDrvBinary* bin, int offs, int len,
+ void *mp)
{
unsigned int hsz = desc->hsz;
ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN];
@@ -3456,8 +3647,14 @@ static int packet_binary_message
# ifdef HAVE_SCTP
if (!IS_SCTP(desc))
- {
# endif
+ {
+#ifndef __WIN32__
+ if (mp) i = udp_parse_ancillary_data(spec, i, (struct msghdr*)mp);
+#endif
+ /* We got ancillary data from an UDP recvmsg.
+ * Insert an additional tuple level {AncData,[F|AddrData]}
+ */
if ((desc->mode == INET_MODE_LIST) || (hsz > len))
/* INET_MODE_LIST, or only headers => [H1,H2,...Hn] */
i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
@@ -3469,16 +3666,24 @@ static int packet_binary_message
if (hsz > 0)
i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
}
-# ifdef HAVE_SCTP
+ /* Close up the outer 5-or-6-tuple */
+#ifndef __WIN32__
+ if (mp) i = LOAD_TUPLE(spec, i, 6);
+ else
+#endif
+ i = LOAD_TUPLE(spec, i, 5);
}
+# ifdef HAVE_SCTP
else
- { /* For SCTP we always have desc->hsz==0 (i.e., no application-level
+ {
+ struct msghdr *mptr;
+
+ mptr = mp;
+ /* For SCTP we always have desc->hsz==0 (i.e., no application-level
headers are used): */
- struct msghdr* mptr;
ASSERT(hsz == 0);
/* Put in the list (possibly empty) of Ancillary Data: */
- mptr = (struct msghdr *) extra;
i = sctp_parse_ancillary_data (spec, i, mptr);
/* Then: Data or Event (Notification)? */
@@ -3504,11 +3709,11 @@ static int packet_binary_message
/* Close up the {[AncilData], Event_OR_Data} tuple: */
i = LOAD_TUPLE (spec, i, 2);
+ /* Close up the outer 5-tuple: */
+ i = LOAD_TUPLE(spec, i, 5);
}
# endif /* HAVE_SCTP */
- /* Close up the outer 5-tuple: */
- i = LOAD_TUPLE(spec, i, 5);
ASSERT(i <= PACKET_ERL_DRV_TERM_DATA_LEN);
return erl_drv_output_term(desc->dport, spec, i);
}
@@ -3642,19 +3847,19 @@ tcp_reply_binary_data(tcp_descriptor* desc, ErlDrvBinary* bin, int offs, int len
static int
packet_reply_binary_data(inet_descriptor* desc, unsigned int hsz,
ErlDrvBinary * bin, int offs, int len,
- void * extra)
+ void *mp)
{
int code;
if (desc->active == INET_PASSIVE)
/* "inet" is actually for both UDP and SCTP, as well as TCP! */
- return inet_async_binary_data(desc, hsz, bin, offs, len, extra);
+ return inet_async_binary_data(desc, hsz, bin, offs, len, mp);
else
{ /* INET_ACTIVE or INET_ONCE: */
if (desc->deliver == INET_DELIVER_PORT)
code = inet_port_binary_data(desc, bin, offs, len);
else
- code = packet_binary_message(desc, bin, offs, len, extra);
+ code = packet_binary_message(desc, bin, offs, len, mp);
if (code < 0)
return code;
INET_CHECK_ACTIVE_TO_PASSIVE(desc);
@@ -3721,10 +3926,12 @@ static void inet_init_sctp(void) {
INIT_ATOM(reuseaddr);
INIT_ATOM(dontroute);
INIT_ATOM(priority);
- INIT_ATOM(tos);
- INIT_ATOM(tclass);
+ INIT_ATOM(recvtos);
+ INIT_ATOM(recvtclass);
+ INIT_ATOM(recvttl);
INIT_ATOM(ipv6_v6only);
INIT_ATOM(netns);
+ INIT_ATOM(bind_to_device);
/* Option names */
INIT_ATOM(sctp_rtoinfo);
@@ -3864,6 +4071,11 @@ static int inet_init()
#endif
INIT_ATOM(empty_out_q);
INIT_ATOM(ssl_tls);
+#ifndef __WIN32__
+ INIT_ATOM(tos);
+ INIT_ATOM(tclass);
+ INIT_ATOM(ttl);
+#endif
INIT_ATOM(http_eoh);
INIT_ATOM(http_header);
@@ -3877,6 +4089,10 @@ static int inet_init()
INIT_ATOM(https);
INIT_ATOM(scheme);
+#ifdef HAVE_SENDFILE
+ INIT_ATOM(sendfile);
+#endif
+
/* add TCP, UDP and SCTP drivers */
add_driver_entry(&tcp_inet_driver_entry);
#ifdef HAVE_UDP
@@ -4018,13 +4234,30 @@ static char* inet_set_address(int family, inet_address* dst,
int n;
if (*len == 0) return str_einval;
n = *((unsigned char*)(*src)); /* Length field */
- if ((*len < 1+n) || (sizeof(dst->sal.sun_path) < n+1)) {
+ if (*len < 1+n) return str_einval;
+ if (n +
+#ifdef __linux__
+ /* Make sure the address gets zero terminated
+ * except when the first byte is \0 because then it is
+ * sort of zero terminated although the zero termination
+ * comes before the address...
+ * This fix handles Linux's nonportable
+ * abstract socket address extension.
+ */
+ ((*len) > 1 && (*src)[1] == '\0' ? 0 : 1)
+#else
+ 1
+#endif
+ > sizeof(dst->sal.sun_path)) {
return str_einval;
}
sys_memzero((char*)dst, sizeof(struct sockaddr_un));
dst->sal.sun_family = family;
sys_memcpy(dst->sal.sun_path, (*src)+1, n);
*len = offsetof(struct sockaddr_un, sun_path) + n;
+#ifndef NO_SA_LEN
+ dst->sal.sun_len = *len;
+#endif
*src += 1 + n;
return NULL;
}
@@ -4132,8 +4365,8 @@ static char *inet_set_faddress(int family, inet_address* dst,
/* Get a inaddr structure
** src = inaddr structure
-** *len is the lenght of structure
** dst is filled with [F,P1,P0,X1,....]
+** *len is the length of structure
** where F is the family code (coded)
** and *len is the length of dst on return
** (suitable to deliver to erlang)
@@ -4169,15 +4402,16 @@ static int inet_get_address(char* dst, inet_address* src, unsigned int* len)
if (*len < offsetof(struct sockaddr_un, sun_path)) return -1;
n = *len - offsetof(struct sockaddr_un, sun_path);
if (255 < n) return -1;
- /* Portability fix: Assume that the address is a zero terminated
- * string, except when the first byte is \0 i.e the
- * string length is 0. Then use the reported length instead.
- * This fix handles Linux's abstract socket address
- * nonportable extension.
- */
m = my_strnlen(src->sal.sun_path, n);
- if ((m == 0) && is_nonzero(src->sal.sun_path, n))
- m = n;
+#ifdef __linux__
+ /* Assume that the address is a zero terminated string,
+ * except when the first byte is \0 i.e the string length is 0,
+ * then use the reported length instead.
+ * This fix handles Linux's nonportable
+ * abstract socket address extension.
+ */
+ if (m == 0) m = n;
+#endif
dst[0] = INET_AF_LOCAL;
dst[1] = (char) ((unsigned char) m);
sys_memcpy(dst+2, src->sal.sun_path, m);
@@ -4234,15 +4468,16 @@ inet_address_to_erlang(char *dst, inet_address **src, SOCKLEN_T sz) {
if (sz < offsetof(struct sockaddr_un, sun_path)) return -1;
n = sz - offsetof(struct sockaddr_un, sun_path);
if (255 < n) return -1;
- /* Portability fix: Assume that the address is a zero terminated
- * string, except when the first byte is \0 i.e the
- * string length is 0. Then use the reported length instead.
- * This fix handles Linux's abstract socket address
- * nonportable extension.
- */
m = my_strnlen((*src)->sal.sun_path, n);
- if ((m == 0) && is_nonzero((*src)->sal.sun_path, n))
- m = n;
+#ifdef __linux__
+ /* Assume that the address is a zero terminated string,
+ * except when the first byte is \0 i.e the string length is 0,
+ * Then use the reported length instead.
+ * This fix handles Linux's nonportable
+ * abstract socket address extension.
+ */
+ if (m == 0) m = n;
+#endif
if (dst) {
dst[0] = INET_AF_LOCAL;
dst[1] = (char) ((unsigned char) m);
@@ -4320,6 +4555,12 @@ static void desc_close(inet_descriptor* desc)
desc->event = INVALID_EVENT; /* closed by stop_select callback */
desc->s = INVALID_SOCKET;
desc->event_mask = 0;
+
+ /* mark as disconnected in case when socket is left lingering due to
+ * {exit_on_close, false} option in gen_tcp socket creation. Next
+ * write to socket should produce {error, enotconn} and send a
+ * message {tcp_error,#Port<>,econnreset} */
+ desc->state &= ~INET_STATE_CONNECTED;
}
}
@@ -4953,6 +5194,71 @@ static int hwaddr_libdlpi_lookup(const char *ifnm,
}
#endif
+#ifdef HAVE_GETIFADDRS
+/* Returns 0 for success and errno() for failure */
+static int call_getifaddrs(inet_descriptor* desc_p, struct ifaddrs **ifa_pp)
+{
+ int result, save_errno;
+#ifdef HAVE_SETNS
+ int current_ns;
+
+ current_ns = 0;
+ if (desc_p->netns != NULL) {
+ int new_ns;
+ /* Temporarily change network namespace for this thread
+ * over the getifaddrs() call
+ */
+ current_ns = open("/proc/self/ns/net", O_RDONLY);
+ if (current_ns == INVALID_SOCKET)
+ return sock_errno();
+ new_ns = open(desc_p->netns, O_RDONLY);
+ if (new_ns == INVALID_SOCKET) {
+ save_errno = sock_errno();
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ return save_errno;
+ }
+ if (setns(new_ns, CLONE_NEWNET) != 0) {
+ save_errno = sock_errno();
+ while (close(new_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ return save_errno;
+ }
+ else {
+ while (close(new_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ }
+ }
+#endif
+ save_errno = 0;
+ result = getifaddrs(ifa_pp);
+ if (result < 0)
+ save_errno = sock_errno();
+#ifdef HAVE_SETNS
+ if (desc_p->netns != NULL) {
+ /* Restore network namespace */
+ if (setns(current_ns, CLONE_NEWNET) != 0) {
+ /* XXX Failed to restore network namespace.
+ * What to do? Tidy up and return an error...
+ * Note that the thread now might still be in the set namespace.
+ * Can this even happen? Should the emulator be aborted?
+ */
+ if (result >= 0) {
+ /* We got a result but have to waste it */
+ save_errno = sock_errno();
+ freeifaddrs(*ifa_pp);
+ }
+ }
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ }
+#endif
+ return save_errno;
+}
+#endif /* #ifdef HAVE_GETIFADDRS */
+
/* FIXME: temporary hack */
#ifndef IFHWADDRLEN
#define IFHWADDRLEN 6
@@ -5030,8 +5336,8 @@ static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc,
struct sockaddr_dl *sdlp;
int found = 0;
- if (getifaddrs(&ifa) == -1)
- goto error;
+ if (call_getifaddrs(desc, &ifa) != 0)
+ goto error;
for (ifp = ifa; ifp; ifp = ifp->ifa_next) {
if ((ifp->ifa_addr->sa_family == AF_LINK) &&
@@ -5053,8 +5359,8 @@ static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc,
sys_memcpy(sptr,
sdlp->sdl_data + sdlp->sdl_nlen,
sdlp->sdl_alen);
- freeifaddrs(ifa);
sptr += sdlp->sdl_alen;
+ freeifaddrs(ifa);
#endif
break;
}
@@ -5749,6 +6055,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
ErlDrvSizeT buf_size;
char *buf_p;
char *buf_alloc_p;
+ int save_errno;
buf_size = GETIFADDRS_BUFSZ;
buf_alloc_p = ALLOC(GETIFADDRS_BUFSZ);
@@ -5783,9 +6090,9 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
} \
} while (0)
- if (getifaddrs(&ifa_p) < 0) {
- return ctl_error(sock_errno(), rbuf_pp, rsize);
- }
+ if ((save_errno = call_getifaddrs(desc_p, &ifa_p)) != 0)
+ return ctl_error(save_errno, rbuf_pp, rsize);
+
ifa_free_p = ifa_p;
*buf_p++ = INET_REP_OK;
for (; ifa_p; ifa_p = ifa_p->ifa_next) {
@@ -5867,7 +6174,8 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
but ditto for the other worked and that was actually the requested
option, failure was still reported to erlang. */
-#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY)
+#if defined(IP_TOS) && defined(IPPROTO_IP) \
+ && defined(SO_PRIORITY) && !defined(__WIN32__)
static int setopt_prio_tos_trick
(int fd, int proto, int type, char* arg_ptr, int arg_sz, int propagate)
{
@@ -5889,14 +6197,14 @@ static int setopt_prio_tos_trick
res_prio = sock_getopt(fd, SOL_SOCKET, SO_PRIORITY,
(char *) &tmp_ival_prio, &tmp_arg_sz_prio);
- res_tos = sock_getopt(fd, SOL_IP, IP_TOS,
+ res_tos = sock_getopt(fd, IPPROTO_IP, IP_TOS,
(char *) &tmp_ival_tos, &tmp_arg_sz_tos);
res = sock_setopt(fd, proto, type, arg_ptr, arg_sz);
if (res == 0) {
if (type != SO_PRIORITY) {
if (type != IP_TOS && res_tos == 0) {
res_tos = sock_setopt(fd,
- SOL_IP,
+ IPPROTO_IP,
IP_TOS,
(char *) &tmp_ival_tos,
tmp_arg_sz_tos);
@@ -5940,12 +6248,15 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
int proto;
int opt;
struct linger li_val;
-#ifdef HAVE_MULTICAST_SUPPORT
+#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
struct ip_mreq mreq_val;
#endif
int ival;
char* arg_ptr;
int arg_sz;
+#ifdef SO_BINDTODEVICE
+ char ifname[IFNAMSIZ];
+#endif
enum PacketParseType old_htype = desc->htype;
int old_active = desc->active;
int propagate; /* Set to 1 if failure to set this option
@@ -5960,6 +6271,8 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
/* XXX { int i; for(i=0;i<len;++i) fprintf(stderr,"0x%02X, ", (unsigned) ptr[i]); fprintf(stderr,"\r\n");} */
while(len >= 5) {
+ int recv_cmsgflags;
+
opt = *ptr++;
ival = get_int32(ptr);
ptr += 4;
@@ -5968,6 +6281,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
arg_sz = sizeof(ival);
proto = SOL_SOCKET;
propagate = 0;
+ recv_cmsgflags = desc->recv_cmsgflags;
switch(opt) {
case INET_LOPT_HEADER:
@@ -6218,28 +6532,80 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, ival));
break;
#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented */
continue;
#endif
case INET_OPT_TOS:
-#if defined(IP_TOS) && defined(SOL_IP)
- proto = SOL_IP;
+#if defined(IP_TOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
type = IP_TOS;
propagate = 1;
DEBUGF(("inet_set_opts(%ld): s=%d, IP_TOS=%d\r\n",
(long)desc->port, desc->s, ival));
break;
#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented. */
continue;
#endif
-#if defined(IPV6_TCLASS) && defined(SOL_IPV6)
+#if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
case INET_OPT_TCLASS:
- proto = SOL_IPV6;
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
propagate = 1;
DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_TCLASS=%d\r\n",
(long)desc->port, desc->s, ival));
break;
#endif
+#if defined(IP_TTL) && defined(IPPROTO_IP)
+ case INET_OPT_TTL:
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ propagate = 1;
+ DEBUGF(("inet_set_opts(%ld): s=%d, IP_TTL=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
+#if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTOS:
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ propagate = 1;
+ recv_cmsgflags =
+ ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTOS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTOS);
+ DEBUGF(("inet_set_opts(%ld): s=%d, IP_RECVTOS=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
+#if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ case INET_OPT_RECVTCLASS:
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ propagate = 1;
+ recv_cmsgflags =
+ ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTCLASS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTCLASS);
+ DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_RECVTCLASS=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
+#if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTTL:
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ propagate = 1;
+ recv_cmsgflags =
+ ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTTL) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTTL);
+ DEBUGF(("inet_set_opts(%ld): s=%d, IP_RECVTTL=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
case TCP_OPT_NODELAY:
proto = IPPROTO_TCP;
@@ -6248,7 +6614,20 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, ival));
break;
-#ifdef HAVE_MULTICAST_SUPPORT
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ DEBUGF(("inet_set_opts(%ld): s=%d, t=%d TCP_NOPUSH=%d\r\n",
+ (long)desc->port, desc->s, type, ival));
+ break;
+#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
+#endif
+
+#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
proto = IPPROTO_IP;
@@ -6294,10 +6673,10 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
arg_sz = sizeof(mreq_val);
break;
-#endif /* HAVE_MULTICAST_SUPPORT */
+#endif /* defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) */
case INET_OPT_IPV6_V6ONLY:
-#if HAVE_DECL_IPV6_V6ONLY
+#if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
proto = IPPROTO_IPV6;
type = IPV6_V6ONLY;
propagate = 1;
@@ -6331,14 +6710,39 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
len -= arg_sz;
break;
+#ifdef SO_BINDTODEVICE
+ case INET_OPT_BIND_TO_DEVICE:
+ if (ival < 0) return -1;
+ if (len < ival) return -1;
+ if (ival > sizeof(ifname)) {
+ return -1;
+ }
+ memcpy(ifname, ptr, ival);
+ ifname[ival] = '\0';
+ ptr += ival;
+ len -= ival;
+
+ proto = SOL_SOCKET;
+ type = SO_BINDTODEVICE;
+ arg_ptr = (char*)&ifname;
+ arg_sz = sizeof(ifname);
+ propagate = 1; /* We do want to know if this fails */
+
+ DEBUGF(("inet_set_opts(%ld): s=%d, SO_BINDTODEVICE=%s\r\n",
+ (long)desc->port, desc->s, ifname));
+ break;
+#endif
+
default:
return -1;
}
-#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY)
+#if defined(IP_TOS) && defined(IPPROTO_IP) \
+ && defined(SO_PRIORITY) && !defined(__WIN32__)
res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz, propagate);
#else
res = sock_setopt (desc->s, proto, type, arg_ptr, arg_sz);
#endif
+ if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
if (propagate && res != 0) {
return -1;
}
@@ -6346,7 +6750,12 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, res));
if (type == SO_RCVBUF) {
/* make sure we have desc->bufsz >= SO_RCVBUF */
- if (ival > desc->bufsz)
+ if (ival > (1 << 16) && desc->stype == SOCK_DGRAM && !IS_SCTP(desc))
+ /* For UDP we don't want to automatically
+ set the buffer size to be larger than
+ the theoretical max MTU */
+ desc->bufsz = 1 << 16;
+ else if (ival > desc->bufsz)
desc->bufsz = ival;
}
}
@@ -6463,6 +6872,9 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
# ifdef SCTP_DELAYED_ACK_TIME
struct sctp_assoc_value av; /* Not in SOLARIS10 */
# endif
+# ifdef SO_BINDTODEVICE
+ char ifname[IFNAMSIZ];
+# endif
}
arg;
@@ -6472,10 +6884,14 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
while (curr < ptr + len)
{
+ int recv_cmsgflags;
/* Get the Erlang-encoded option type -- always 1 byte: */
- int eopt = *curr;
+ int eopt;
+
+ eopt = *curr;
curr++;
+ recv_cmsgflags = desc->recv_cmsgflags;
/* Get the option value. XXX: The condition (curr < ptr + len)
does not preclude us from reading from beyond the buffer end,
if the Erlang part of the driver specifies its input wrongly!
@@ -6656,28 +7072,32 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
break;
}
# else
- continue; /* Option not supported -- ignore it */
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
# endif
case INET_OPT_TOS:
-# if defined(IP_TOS) && defined(SOL_IP)
+# if defined(IP_TOS) && defined(IPPROTO_IP)
{
arg.ival= get_int32 (curr); curr += 4;
- proto = SOL_IP;
+ proto = IPPROTO_IP;
type = IP_TOS;
arg_ptr = (char*) (&arg.ival);
arg_sz = sizeof ( arg.ival);
break;
}
# else
- continue; /* Option not supported -- ignore it */
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
# endif
-# if defined(IPV6_TCLASS) && defined(SOL_IPV6)
+# if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
case INET_OPT_TCLASS:
{
arg.ival= get_int32 (curr); curr += 4;
- proto = SOL_IPV6;
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
arg_ptr = (char*) (&arg.ival);
arg_sz = sizeof ( arg.ival);
@@ -6685,9 +7105,69 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
}
# endif
+# if defined(IP_TTL) && defined(IPPROTO_IP)
+ case INET_OPT_TTL:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ break;
+ }
+# endif
+
+# if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTOS:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ recv_cmsgflags =
+ arg.ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTOS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTOS);
+ break;
+ }
+# endif
+
+# if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ case INET_OPT_RECVTCLASS:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ recv_cmsgflags =
+ arg.ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTCLASS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTCLASS);
+ break;
+ }
+# endif
+
+# if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTTL:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ recv_cmsgflags =
+ arg.ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTTL) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTTL);
+ break;
+ }
+# endif
+
case INET_OPT_IPV6_V6ONLY:
-# if HAVE_DECL_IPV6_V6ONLY
+# if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
{
arg.ival= get_int32 (curr); curr += 4;
proto = IPPROTO_IPV6;
@@ -6702,6 +7182,23 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
continue; /* Option not supported -- ignore it */
# endif
+#ifdef SO_BINDTODEVICE
+ case INET_OPT_BIND_TO_DEVICE:
+ arg_sz = get_int32(curr); curr += 4;
+ CHKLEN(curr, arg_sz);
+ if (arg_sz >= sizeof(arg.ifname))
+ return -1;
+ memcpy(arg.ifname, curr, arg_sz);
+ arg.ifname[arg_sz] = '\0';
+ curr += arg_sz;
+
+ proto = SOL_SOCKET;
+ type = SO_BINDTODEVICE;
+ arg_ptr = (char*) (&arg.ifname);
+ arg_sz = sizeof ( arg.ifname);
+ break;
+#endif
+
case SCTP_OPT_AUTOCLOSE:
{
arg.ival= get_int32 (curr); curr += 4;
@@ -6920,13 +7417,15 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
*/
return -1;
}
-#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY)
+#if defined(IP_TOS) && defined(IPPROTO_IP) \
+ && defined(SO_PRIORITY) && !defined(__WIN32__)
res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz, 1);
#else
res = sock_setopt (desc->s, proto, type, arg_ptr, arg_sz);
#endif
/* The return values of "sock_setopt" can only be 0 or -1: */
ASSERT(res == 0 || res == -1);
+ if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
if (res == -1)
{ /* Got an error, DO NOT continue with other options. However, on
Solaris 10, we DO allow SO_SNDBUF and SO_RCVBUF to fail, assu-
@@ -6947,6 +7446,35 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
}
#endif /* HAVE_SCTP */
+#ifndef __WIN32__
+static void put_cmsg_int32(struct cmsghdr *cmsg, char *ptr) {
+ union u {
+ byte uint8;
+ Uint16 uint16;
+ Uint32 uint32;
+ Uint64 uint64;
+ } *p;
+ p = (union u*) CMSG_DATA(cmsg);
+ switch (LEN_CMSG_DATA(cmsg) * CHAR_BIT) {
+ case 8:
+ put_int32((Uint32) p->uint8, ptr);
+ break;
+ case 16:
+ put_int32((Uint32) p->uint16, ptr);
+ break;
+ case 32:
+ put_int32(p->uint32, ptr);
+ break;
+ case 64:
+ put_int32((Uint32) p->uint64, ptr);
+ break;
+ default:
+ put_int32(0, ptr);
+ }
+ return;
+}
+#endif
+
/* load all option values into the buf and reply
** return total length of reply filled into ptr
** ptr should point to a buffer with 9*len +1 to be safe!!
@@ -6967,6 +7495,9 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
ErlDrvSizeT dest_used = 0;
ErlDrvSizeT dest_allocated = destlen;
char *orig_dest = *dest;
+#ifdef SO_BINDTODEVICE
+ char ifname[IFNAMSIZ];
+#endif
/* Ptr is a name parameter */
#define RETURN_ERROR() \
@@ -7178,8 +7709,8 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
continue;
#endif
case INET_OPT_TOS:
-#if defined(IP_TOS) && defined(SOL_IP)
- proto = SOL_IP;
+#if defined(IP_TOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
type = IP_TOS;
break;
#else
@@ -7188,14 +7719,50 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
continue;
#endif
case INET_OPT_TCLASS:
-#if defined(IPV6_TCLASS) && defined(SOL_IPV6)
- proto = SOL_IPV6;
+#if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
break;
#else
TRUNCATE_TO(0,ptr);
continue;
#endif
+ case INET_OPT_TTL:
+#if defined(IP_TTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
+ case INET_OPT_RECVTOS:
+#if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
+ case INET_OPT_RECVTCLASS:
+#if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
+ case INET_OPT_RECVTTL:
+#if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
case INET_OPT_REUSEADDR:
type = SO_REUSEADDR;
break;
@@ -7221,8 +7788,18 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
proto = IPPROTO_TCP;
type = TCP_NODELAY;
break;
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ break;
+#else
+ *ptr++ = opt;
+ put_int32(0, ptr);
+ continue;
+#endif
-#ifdef HAVE_MULTICAST_SUPPORT
+#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
proto = IPPROTO_IP;
type = IP_MULTICAST_TTL;
@@ -7241,10 +7818,10 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
arg_ptr = (char*) &li_val;
type = SO_LINGER;
break;
-#endif /* HAVE_MULTICAST_SUPPORT */
+#endif /* defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) */
case INET_OPT_IPV6_V6ONLY:
-#if HAVE_DECL_IPV6_V6ONLY
+#if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
proto = IPPROTO_IPV6;
type = IPV6_V6ONLY;
break;
@@ -7302,6 +7879,113 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
put_int32(arg_sz,ptr);
continue;
}
+
+#ifdef SO_BINDTODEVICE
+ case INET_OPT_BIND_TO_DEVICE:
+ arg_sz = sizeof(ifname);
+ TRUNCATE_TO(0,ptr);
+ PLACE_FOR(5 + arg_sz,ptr);
+ arg_ptr = ptr + 5;
+ if (IS_SOCKET_ERROR(sock_getopt(desc->s,SOL_SOCKET,SO_BINDTODEVICE,
+ arg_ptr,&arg_sz))) {
+ TRUNCATE_TO(0,ptr);
+ continue;
+ }
+ arg_sz = my_strnlen(arg_ptr, arg_sz);
+ TRUNCATE_TO(arg_sz + 5,ptr);
+ *ptr++ = opt;
+ put_int32(arg_sz,ptr);
+ ptr += arg_sz;
+ continue;
+#endif
+
+#ifndef __WIN32__
+ /* Winsock does not have struct cmsghdr */
+ case INET_OPT_PKTOPTIONS: {
+ struct cmsghdr *cmsg, *cmsg_top;
+ SOCKLEN_T cmsg_sz;
+ union {
+ /* Ensure alignment */
+ struct cmsghdr hdr;
+ /* Room for (IP_TOS | IPV6_TCLASS) + IP_TTL */
+ char buf[2*CMSG_SPACE(sizeof(int))];
+ } cmsgbuf;
+ /* Select between IPv4 or IPv6 PKTOPTIONS
+ * depending on the socket protocol family
+ */
+ switch (desc->sfamily) {
+#if defined(IPPROTO_IP) && defined(IP_PKTOPTIONS)
+ case AF_INET: {
+ proto = IPPROTO_IP;
+ type = IP_PKTOPTIONS;
+ }
+ break;
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_PKTOPTIONS) && defined(AF_INET6)
+ case AF_INET6: {
+ proto = IPPROTO_IPV6;
+ type = IPV6_PKTOPTIONS;
+ }
+ break;
+#endif
+ default: {
+ RETURN_ERROR();
+ }
+ } /* switch */
+ TRUNCATE_TO(0, ptr);
+ /* Fetch a cmsg buffer from the socket */
+ cmsg_sz = sizeof(cmsgbuf.buf);
+ if (IS_SOCKET_ERROR(sock_getopt(desc->s, proto, type,
+ cmsgbuf.buf, &cmsg_sz))) {
+ continue;
+ }
+ /* Reply with Opt/8, Length/32, [COpt/8, Value/32]*
+ * i.e opt, total length and then all returned
+ * cmsg options and values
+ */
+ PLACE_FOR(1+4, ptr);
+ *ptr++ = opt;
+ arg_ptr = ptr; /* Where to put total length */
+ arg_sz = 0; /* Total length */
+ for (cmsg_top = (struct cmsghdr*)(cmsgbuf.buf + cmsg_sz),
+ cmsg = (struct cmsghdr*)cmsgbuf.buf;
+ cmsg < cmsg_top;
+ cmsg = NXT_CMSG_HDR(cmsg)) {
+#define PUT_CMSG_INT32(CMSG_LEVEL, CMSG_TYPE, OPT) \
+ if ((cmsg->cmsg_level == CMSG_LEVEL) && \
+ (cmsg->cmsg_type == CMSG_TYPE)) { \
+ PLACE_FOR(1+4, ptr); \
+ *ptr++ = OPT; \
+ put_cmsg_int32(cmsg, ptr); \
+ arg_sz += 1+4; \
+ continue; \
+ }
+#if defined(IPPROTO_IP) && defined(IP_TOS)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_TOS, INET_OPT_TOS);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
+ PUT_CMSG_INT32(IPPROTO_IPV6, IPV6_TCLASS, INET_OPT_TCLASS);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_TTL)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_TTL, INET_OPT_TTL);
+#endif
+ /* BSD uses the RECV* names in CMSG fields */
+#if defined(IPPROTO_IP) && defined(IP_RECVTOS)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTOS, INET_OPT_TOS);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_RECVTCLASS)
+ PUT_CMSG_INT32(IPPROTO_IPV6, IPV6_RECVTCLASS, INET_OPT_TCLASS);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_RECVTTL)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTTL, INET_OPT_TTL);
+#endif
+#undef PUT_CMSG_INT32
+ }
+ put_int32(arg_sz, arg_ptr); /* Put total length */
+ continue;
+ }
+#endif /* #ifdef __WIN32__ */
+
default:
RETURN_ERROR();
}
@@ -7361,6 +8045,7 @@ static int load_paddrinfo (ErlDrvTermData * spec, int i,
return i;
}
+
/*
** "sctp_fill_opts": Returns {ok, Results}, or an error:
*/
@@ -7583,6 +8268,25 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
i = LOAD_TUPLE (spec, i, 2);
break;
}
+
+#ifdef SO_BINDTODEVICE
+ /* The following option returns a binary: */
+ case INET_OPT_BIND_TO_DEVICE: {
+ char ifname[IFNAMSIZ];
+ unsigned int sz = sizeof(ifname);
+
+ if (sock_getopt(desc->s, SOL_SOCKET, SO_BINDTODEVICE,
+ &ifname, &sz) < 0) continue;
+ /* Fill in the response: */
+ PLACE_FOR(spec, i,
+ LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT);
+ i = LOAD_ATOM (spec, i, am_bind_to_device);
+ i = LOAD_BUF2BINARY(spec, i, ifname, my_strnlen(ifname, sz));
+ i = LOAD_TUPLE (spec, i, 2);
+ break;
+ }
+#endif
+
/* The following options just return an integer value: */
case INET_OPT_RCVBUF :
case INET_OPT_SNDBUF :
@@ -7591,6 +8295,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
case INET_OPT_PRIORITY :
case INET_OPT_TOS :
case INET_OPT_TCLASS :
+ case INET_OPT_TTL :
case INET_OPT_IPV6_V6ONLY:
case SCTP_OPT_AUTOCLOSE:
case SCTP_OPT_MAXSEG :
@@ -7598,6 +8303,9 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
case SCTP_OPT_NODELAY :
case SCTP_OPT_DISABLE_FRAGMENTS:
case SCTP_OPT_I_WANT_MAPPED_V4_ADDR:
+ case INET_OPT_RECVTOS :
+ case INET_OPT_RECVTCLASS :
+ case INET_OPT_RECVTTL :
{
int res = 0;
unsigned int sz = sizeof(res);
@@ -7653,8 +8361,8 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
}
case INET_OPT_TOS:
{
-# if defined(IP_TOS) && defined(SOL_IP)
- proto = SOL_IP;
+# if defined(IP_TOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
type = IP_TOS;
is_int = 1;
tag = am_tos;
@@ -7666,8 +8374,8 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
}
case INET_OPT_TCLASS:
{
-# if defined(IPV6_TCLASS) && defined(SOL_IPV6)
- proto = SOL_IPV6;
+# if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
is_int = 1;
tag = am_tclass;
@@ -7677,8 +8385,60 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
continue;
# endif
}
+ case INET_OPT_TTL:
+ {
+# if defined(IP_TTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ is_int = 1;
+ tag = am_ttl;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
+ case INET_OPT_RECVTOS:
+ {
+# if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ is_int = 0;
+ tag = am_recvtos;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
+ case INET_OPT_RECVTCLASS:
+ {
+# if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ is_int = 0;
+ tag = am_recvtclass;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
+ case INET_OPT_RECVTTL:
+ {
+# if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ is_int = 0;
+ tag = am_recvttl;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
case INET_OPT_IPV6_V6ONLY:
-# if HAVE_DECL_IPV6_V6ONLY
+# if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
{
proto = IPPROTO_IPV6;
type = IPV6_V6ONLY;
@@ -8310,6 +9070,7 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol)
desc->delimiter = '\n'; /* line delimiting char */
desc->oph = NULL;
desc->opt = NULL;
+ desc->op_ref = 0;
desc->peer_ptr = NULL;
desc->name_ptr = NULL;
@@ -8342,13 +9103,15 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol)
desc->netns = NULL;
#endif
+ desc->recv_cmsgflags = 0;
+
return (ErlDrvData)desc;
}
-
-#ifndef MAXHOSTNAMELEN
-#define MAXHOSTNAMELEN 256
-#endif
+/* MAXHOSTNAMELEN could be 64 or 255 depending
+on the platform. Instead, use INET_MAXHOSTNAMELEN
+which is always 255 across all platforms */
+#define INET_MAXHOSTNAMELEN 255
/*
** common TCP/UDP/SCTP control command
@@ -8525,13 +9288,14 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf,
}
case INET_REQ_GETHOSTNAME: { /* get host name */
- char tbuf[MAXHOSTNAMELEN];
+ char tbuf[INET_MAXHOSTNAMELEN + 1];
DEBUGF(("inet_ctl(%ld): GETHOSTNAME\r\n", (long)desc->port));
if (len != 0)
return ctl_error(EINVAL, rbuf, rsize);
- if (IS_SOCKET_ERROR(sock_hostname(tbuf, MAXHOSTNAMELEN)))
+ /* gethostname requires len to be max(hostname) + 1 */
+ if (IS_SOCKET_ERROR(sock_hostname(tbuf, INET_MAXHOSTNAMELEN + 1)))
return ctl_error(sock_errno(), rbuf, rsize);
return ctl_reply(INET_REP_OK, tbuf, strlen(tbuf), rbuf, rsize);
}
@@ -8584,6 +9348,7 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf,
else {
ptr = &peer;
sz = sizeof(peer);
+ sys_memzero((char *) &peer, sz);
if (IS_SOCKET_ERROR
(sock_peer
(desc->s, (struct sockaddr*)ptr, &sz)))
@@ -9016,6 +9781,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args)
desc->tcp_add_flags = 0;
desc->http_state = 0;
desc->mtd = NULL;
+ desc->mtd_cache = NULL;
desc->multi_first = desc->multi_last = NULL;
DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port));
return (ErlDrvData) desc;
@@ -9119,15 +9885,14 @@ static void tcp_close_check(tcp_descriptor* desc)
driver_demonitor_process(desc->inet.port, &monitor);
send_async_error(desc->inet.dport, id, caller, am_closed);
}
- clean_multi_timers(&(desc->mtd), desc->inet.port);
}
-
else if (desc->inet.state == INET_STATE_CONNECTING) {
async_error_am(INETP(desc), am_closed);
}
else if (desc->inet.state == INET_STATE_CONNECTED) {
async_error_am_all(INETP(desc), am_closed);
}
+ clean_multi_timers(desc, desc->inet.port);
}
/*
@@ -9138,16 +9903,47 @@ static void tcp_inet_stop(ErlDrvData e)
tcp_descriptor* desc = (tcp_descriptor*)e;
DEBUGF(("tcp_inet_stop(%ld) {s=%d\r\n",
(long)desc->inet.port, desc->inet.s));
+
tcp_close_check(desc);
- /* free input buffer & output buffer */
- if (desc->i_buf != NULL)
- release_buffer(desc->i_buf);
- desc->i_buf = NULL; /* net_mess2 may call this function recursively when
- faulty messages arrive on dist ports*/
+ tcp_clear_input(desc);
+
DEBUGF(("tcp_inet_stop(%ld) }\r\n", (long)desc->inet.port));
inet_stop(INETP(desc));
}
+/* Closes a tcp descriptor without leaving things hanging; the VM keeps trying
+ * to flush IO queues as long as it contains anything even after the port has
+ * been closed from the erlang side, which is desired behavior (Think escripts
+ * writing to files) but pretty hopeless if the underlying fd has been set to
+ * INVALID_SOCKET through desc_close.
+ *
+ * This function should be used in place of desc_close/erl_inet_close in all
+ * TCP-related operations. Note that this only closes the desc cleanly; it
+ * will be freed through tcp_inet_stop later on. */
+static void tcp_desc_close(tcp_descriptor* desc)
+{
+#ifdef HAVE_SENDFILE
+ if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
+ close(desc->sendfile.dup_file_fd);
+ }
+#endif
+
+ tcp_clear_input(desc);
+ tcp_clear_output(desc);
+
+ erl_inet_close(INETP(desc));
+}
+
+static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(!desc->inet.active);
+ sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
+ desc->i_remain = 0;
+ async_error_am(INETP(desc), am_timeout);
+}
+
/* TCP requests from Erlang */
static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
char* buf, ErlDrvSizeT len,
@@ -9318,12 +10114,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
if (time_left <= 0) {
time_left = 1;
}
- omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller,
+ omtd = add_multi_timer(desc, desc->inet.port, ocaller,
time_left, &tcp_inet_multi_timeout);
}
enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor);
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -9338,7 +10134,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_xerror("noproc", rbuf, rsize);
}
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -9392,7 +10188,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
case INET_REQ_CLOSE:
DEBUGF(("tcp_inet_ctl(%ld): CLOSE\r\n", (long)desc->inet.port));
tcp_close_check(desc);
- erl_inet_close(INETP(desc));
+ tcp_desc_close(desc);
return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
@@ -9435,7 +10231,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
async_error_am(INETP(desc), am_timeout);
else {
if (timeout != INET_INFINITY)
- driver_set_timer(desc->inet.port, timeout);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ timeout, &tcp_inet_recv_timeout);
if (!INETP(desc)->is_ignored)
sock_select(INETP(desc),(FD_READ|FD_CLOSE),1);
else
@@ -9480,6 +10277,59 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
}
}
+
+ case TCP_REQ_SENDFILE: {
+#ifdef HAVE_SENDFILE
+ const ErlDrvSizeT required_len =
+ sizeof(desc->sendfile.dup_file_fd) +
+ sizeof(Uint64) * 2;
+
+ int raw_file_fd;
+
+ DEBUGF(("tcp_inet_ctl(%ld): SENDFILE\r\n", (long)desc->inet.port));
+
+ if (len != required_len) {
+ return ctl_error(EINVAL, rbuf, rsize);
+ } else if (!IS_CONNECTED(INETP(desc))) {
+ return ctl_error(ENOTCONN, rbuf, rsize);
+ }
+
+ sys_memcpy(&raw_file_fd, buf, sizeof(raw_file_fd));
+ buf += sizeof(raw_file_fd);
+
+ desc->sendfile.dup_file_fd = dup(raw_file_fd);
+
+ if(desc->sendfile.dup_file_fd == -1) {
+ return ctl_error(errno, rbuf, rsize);
+ }
+
+ desc->sendfile.offset = get_int64(buf);
+ buf += sizeof(Uint64);
+
+ desc->sendfile.length = get_int64(buf);
+ buf += sizeof(Uint64);
+
+ ASSERT(desc->sendfile.offset >= 0);
+ ASSERT(desc->sendfile.length >= 0);
+
+ desc->sendfile.ioq_skip = driver_sizeq(desc->inet.port);
+ desc->sendfile.bytes_sent = 0;
+
+ desc->inet.caller = driver_caller(desc->inet.port);
+ desc->tcp_add_flags |= TCP_ADDF_SENDFILE;
+
+ /* See if we can finish sending without selecting & rescheduling. */
+ if (tcp_inet_sendfile(desc) == 0) {
+ if(desc->sendfile.length > 0) {
+ sock_select(INETP(desc), FD_WRITE, 1);
+ }
+ }
+ return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
+#else
+ return ctl_error(ENOTSUP, rbuf, rsize);
+#endif
+ }
+
default:
DEBUGF(("tcp_inet_ctl(%ld): %u\r\n", (long)desc->inet.port, cmd));
return inet_ctl(INETP(desc), cmd, buf, len, rbuf, rsize);
@@ -9487,12 +10337,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
}
+static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(IS_BUSY(INETP(desc)));
+ ASSERT(desc->busy_on_send);
+ desc->inet.caller = desc->inet.busy_caller;
+ desc->inet.state &= ~INET_F_BUSY;
+ desc->busy_on_send = 0;
+ set_busy_port(desc->inet.port, 0);
+ inet_reply_error_am(INETP(desc), am_timeout);
+ if (desc->send_timeout_close) {
+ tcp_desc_close(desc);
+ }
+}
+
/*
** tcp_inet_timeout:
** called when timer expire:
** TCP socket may be:
**
-** a) receiving -- deselect
+** a) receiving -- send timeout
** b) connecting -- close socket
** c) accepting -- reset listener
**
@@ -9506,31 +10371,14 @@ static void tcp_inet_timeout(ErlDrvData e)
DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n",
(long)desc->inet.port, desc->inet.s));
if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */
- fire_multi_timers(&(desc->mtd), desc->inet.port, e);
+ fire_multi_timers(desc, desc->inet.port, e);
} else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) {
- if (desc->busy_on_send) {
- ASSERT(IS_BUSY(INETP(desc)));
- desc->inet.caller = desc->inet.busy_caller;
- desc->inet.state &= ~INET_F_BUSY;
- desc->busy_on_send = 0;
- set_busy_port(desc->inet.port, 0);
- inet_reply_error_am(INETP(desc), am_timeout);
- if (desc->send_timeout_close) {
- erl_inet_close(INETP(desc));
- }
- }
- else {
- /* assume recv timeout */
- ASSERT(!desc->inet.active);
- sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
- desc->i_remain = 0;
- async_error_am(INETP(desc), am_timeout);
- }
+ fire_multi_timers(desc, desc->inet.port, e);
}
else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) {
/* assume connect timeout */
/* close the socket since it's not usable (see man pages) */
- erl_inet_close(INETP(desc));
+ tcp_desc_close(desc);
async_error_am(INETP(desc), am_timeout);
}
else if ((state & INET_STATE_ACCEPTING) == INET_STATE_ACCEPTING) {
@@ -9619,12 +10467,27 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev)
static void tcp_inet_flush(ErlDrvData e)
{
tcp_descriptor* desc = (tcp_descriptor*)e;
- if (!(desc->inet.event_mask & FD_WRITE)) {
- /* Discard send queue to avoid hanging port (OTP-7615) */
- tcp_clear_output(desc);
+ int discard_output;
+
+ /* Discard send queue to avoid hanging port (OTP-7615) */
+ discard_output = !(desc->inet.event_mask & FD_WRITE);
+
+ discard_output |= desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO;
+
+#ifdef HAVE_SENDFILE
+ /* The old file driver aborted when it was stopped during sendfile, so
+ * we'll clear the flag and discard all output. */
+ if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
+ close(desc->sendfile.dup_file_fd);
+
+ discard_output = 1;
+ }
+#endif
+
+ if (discard_output) {
+ tcp_clear_output(desc);
}
- if (desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO)
- tcp_clear_output(desc);
}
static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
@@ -9640,7 +10503,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
return;
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
if (desc->multi_first == NULL) {
sock_select(INETP(desc),FD_ACCEPT,0);
@@ -9671,6 +10534,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
#ifdef DEBUG
long port = (long) desc->inet.port; /* Used after driver_exit() */
#endif
+ int blocking_send = 0;
DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n",
port, desc->inet.s, __FILE__, __LINE__));
if (IS_BUSY(INETP(desc))) {
@@ -9678,7 +10542,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port));
}
@@ -9686,15 +10550,29 @@ static int tcp_recv_closed(tcp_descriptor* desc)
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port));
+ blocking_send = 1;
+ }
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, ENOTCONN);
+ blocking_send = 1;
}
+#endif
+ if (!blocking_send) {
+ /* No blocking send op to reply to right now.
+ * If next op is a send, make sure it returns {error,closed}
+ * rather than {error,enotconn}.
+ */
+ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
+ }
+
if (!desc->inet.active) {
- /* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ /* We must cancel any timer here ! */
+ clean_multi_timers(desc, INETP(desc)->port);
/* passive mode do not terminate port ! */
tcp_clear_input(desc);
if (desc->inet.exitf) {
- tcp_clear_output(desc);
- desc_close(INETP(desc));
+ tcp_desc_close(desc);
} else {
desc_close_read(INETP(desc));
}
@@ -9725,19 +10603,24 @@ static int tcp_recv_error(tcp_descriptor* desc, int err)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
}
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, err);
+ }
+#endif
if (!desc->inet.active) {
/* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ clean_multi_timers(desc, INETP(desc)->port);
tcp_clear_input(desc);
if (desc->inet.exitf) {
- desc_close(INETP(desc));
+ tcp_desc_close(desc);
} else {
desc_close_read(INETP(desc));
}
@@ -9839,13 +10722,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
if (len == 0) {
/* empty buffer or waiting for more input */
if ((desc->i_buf == NULL) || (desc->i_remain > 0))
- return count;
+ return 0;
if ((n = tcp_remain(desc, &len)) != 0) {
if (n < 0) /* packet error */
return n;
if (len > 0) /* more data pending */
desc->i_remain = len;
- return count;
+ return 0;
}
}
@@ -9897,9 +10780,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
len = 0;
if (!desc->inet.active) {
- if (!desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- }
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout);
sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
if (desc->i_buf != NULL)
tcp_restart_input(desc);
@@ -9925,7 +10806,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
int len;
int nread;
- if (desc->i_buf == NULL) { /* allocte a read buffer */
+ if (desc->i_buf == NULL) { /* allocate a read buffer */
int sz = (request_len > 0) ? request_len : desc->inet.bufsz;
if ((desc->i_buf = alloc_buffer(sz)) == NULL)
@@ -9998,10 +10879,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start);
}
else {
- if ((nread = tcp_remain(desc, &len)) < 0)
+ nread = tcp_remain(desc, &len);
+ if (nread < 0)
return tcp_recv_error(desc, EMSGSIZE);
else if (nread == 0)
- return tcp_deliver(desc, len);
+ return tcp_deliver(desc, len);
else if (len > 0)
desc->i_remain = len; /* set remain */
}
@@ -10320,7 +11202,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
driver_demonitor_process(desc->inet.port, &monitor);
@@ -10379,8 +11261,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
if (IS_BUSY(INETP(desc))) {
desc->inet.caller = desc->inet.busy_caller;
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- desc->busy_on_send = 0;
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
+ desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
@@ -10395,29 +11277,31 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n",
(long)desc->inet.port, __FILE__, __LINE__));
if (desc->inet.active) {
+ ErlDrvTermData err_atom;
if (show_econnreset) {
tcp_error_message(desc, err);
- tcp_closed_message(desc);
- inet_reply_error(INETP(desc), err);
+ err_atom = error_atom(err);
} else {
- tcp_closed_message(desc);
- inet_reply_error_am(INETP(desc), am_closed);
+ err_atom = am_closed;
}
+ tcp_closed_message(desc);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE))
+ inet_reply_error_am(INETP(desc), err_atom);
+
if (desc->inet.exitf)
driver_exit(desc->inet.port, 0);
else
- desc_close(INETP(desc));
+ tcp_desc_close(desc);
} else {
- tcp_clear_output(desc);
- tcp_clear_input(desc);
tcp_close_check(desc);
- erl_inet_close(INETP(desc));
if (desc->inet.caller) {
- if (show_econnreset)
- inet_reply_error(INETP(desc), err);
- else
- inet_reply_error_am(INETP(desc), am_closed);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) {
+ if (show_econnreset)
+ inet_reply_error(INETP(desc), err);
+ else
+ inet_reply_error_am(INETP(desc), am_closed);
+ }
}
else {
/* No blocking send op to reply to right now.
@@ -10426,6 +11310,7 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
*/
desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
}
+ tcp_desc_close(desc);
/*
* Make sure that the next receive operation gets an {error,closed}
@@ -10482,6 +11367,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err)
return tcp_send_or_shutdown_error(desc, err);
}
+static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy)
+{
+ tcp_descriptor *desc = (tcp_descriptor*)data;
+ (void)tcp_inet_output(desc, INETP(desc)->s);
+}
+
/*
** Send non-blocking vector data
*/
@@ -10522,7 +11413,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
ev->size += h_len;
}
- if ((sz = driver_sizeq(ix)) > 0) {
+ sz = driver_sizeq(ix);
+
+ if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) {
driver_enqv(ix, ev, 0);
if (sz+ev->size >= desc->high) {
DEBUGF(("tcp_sendv(%ld): s=%d, sender forced busy\r\n",
@@ -10532,7 +11425,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -10547,7 +11442,10 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
INETP(desc)->is_ignored |= INET_IGNORE_WRITE;
n = 0;
} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
- n = 0;
+ driver_enqv(ix, ev, 0);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ 0, &tcp_inet_delay_send);
+ return 0;
} else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov,
vsize, &n, 0))) {
if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
@@ -10616,8 +11514,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
inet_output_count(INETP(desc), len+h_len);
+ sz = driver_sizeq(ix);
- if ((sz = driver_sizeq(ix)) > 0) {
+ if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) {
if (h_len > 0)
driver_enq(ix, buf, h_len);
driver_enq(ix, ptr, len);
@@ -10629,7 +11528,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -10707,6 +11608,247 @@ static void tcp_inet_drv_input(ErlDrvData data, ErlDrvEvent event)
(void)tcp_inet_input((tcp_descriptor*)data, (HANDLE)event);
}
+#ifdef HAVE_SENDFILE
+static int tcp_sendfile_completed(tcp_descriptor* desc) {
+ ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 +
+ LOAD_ATOM_CNT * 2 + LOAD_UINT_CNT * 2];
+ Uint32 sent_low, sent_high;
+ int i;
+
+ desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
+ close(desc->sendfile.dup_file_fd);
+
+ /* While we flushed the output queue prior to sending the file, we've
+ * deferred clearing busy status until now as there's no point in doing so
+ * while we still have a file to send.
+ *
+ * The watermark is checked since more data may have been added while we
+ * were sending the file. */
+
+ if (driver_sizeq(desc->inet.port) <= desc->low) {
+ if (IS_BUSY(INETP(desc))) {
+ desc->inet.caller = desc->inet.busy_caller;
+ desc->inet.state &= ~INET_F_BUSY;
+
+ set_busy_port(desc->inet.port, 0);
+
+ /* if we have a timer then cancel and send ok to client */
+ if (desc->busy_on_send) {
+ cancel_multi_timer(desc, INETP(desc)->port,
+ &tcp_inet_send_timeout);
+ desc->busy_on_send = 0;
+ }
+
+ inet_reply_ok(INETP(desc));
+ }
+ }
+
+ if (driver_sizeq(desc->inet.port) == 0) {
+ sock_select(INETP(desc), FD_WRITE, 0);
+ send_empty_out_q_msgs(INETP(desc));
+
+ if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) {
+ tcp_shutdown_async(desc);
+ }
+ }
+
+ sent_low = ((Uint64)desc->sendfile.bytes_sent >> 0) & 0xFFFFFFFF;
+ sent_high = ((Uint64)desc->sendfile.bytes_sent >> 32) & 0xFFFFFFFF;
+
+ i = LOAD_ATOM(spec, 0, am_sendfile);
+ i = LOAD_PORT(spec, i, desc->inet.dport);
+ i = LOAD_ATOM(spec, i, am_ok);
+ i = LOAD_UINT(spec, i, sent_low);
+ i = LOAD_UINT(spec, i, sent_high);
+ i = LOAD_TUPLE(spec, i, 3);
+ i = LOAD_TUPLE(spec, i, 3);
+
+ ASSERT(i == sizeof(spec)/sizeof(*spec));
+
+ return erl_drv_output_term(desc->inet.dport, spec, i);
+}
+
+static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error) {
+ ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 + LOAD_ATOM_CNT * 3];
+ int i;
+
+ /* We don't clean up sendfile state here, as that's done in tcp_desc_close
+ * following normal error handling. All we do here is report the failure. */
+
+ i = LOAD_ATOM(spec, 0, am_sendfile);
+ i = LOAD_PORT(spec, i, desc->inet.dport);
+ i = LOAD_ATOM(spec, i, am_error);
+
+ switch (socket_error) {
+ case ECONNRESET:
+ case ENOTCONN:
+ case EPIPE:
+ i = LOAD_ATOM(spec, i, am_closed);
+ break;
+ default:
+ i = LOAD_ATOM(spec, i, error_atom(socket_error));
+ }
+
+ i = LOAD_TUPLE(spec, i, 2);
+ i = LOAD_TUPLE(spec, i, 3);
+
+ ASSERT(i == sizeof(spec)/sizeof(*spec));
+
+ return erl_drv_output_term(desc->inet.dport, spec, i);
+}
+
+static int tcp_inet_sendfile(tcp_descriptor* desc) {
+ ErlDrvPort ix = desc->inet.port;
+ int result = 0;
+ ssize_t n;
+
+ DEBUGF(("tcp_inet_sendfile(%ld) {s=%d\r\n", (long)ix, desc->inet.s));
+
+ /* If there was any data in the queue by the time sendfile was issued,
+ * we'll need to skip it first. Note that we don't clear busy status until
+ * we're finished sending the file. */
+ while (desc->sendfile.ioq_skip > 0) {
+ ssize_t bytes_to_send;
+ SysIOVec* iov;
+ int vsize;
+
+ ASSERT(driver_sizeq(ix) >= desc->sendfile.ioq_skip);
+
+ if ((iov = driver_peekq(ix, &vsize)) == NULL) {
+ ERTS_INTERNAL_ERROR("ioq empty when sendfile.ioq_skip > 0");
+ }
+
+ bytes_to_send = MIN(desc->sendfile.ioq_skip, iov[0].iov_len);
+ n = sock_send(desc->inet.s, iov[0].iov_base, bytes_to_send, 0);
+
+ if (!IS_SOCKET_ERROR(n)) {
+ desc->sendfile.ioq_skip -= n;
+ driver_deq(ix, n);
+ } else if (sock_errno() == ERRNO_BLOCK) {
+#ifdef __WIN32__
+ desc->inet.send_would_block = 1;
+#endif
+ goto done;
+ } else if (sock_errno() != EINTR) {
+ goto socket_error;
+ }
+ }
+
+ while (desc->sendfile.length > 0) {
+ /* For some reason the maximum ssize_t cannot be used as the max size.
+ * 1GB seems to work on all platforms */
+ const Sint64 SENDFILE_CHUNK_SIZE = ((1UL << 30) - 1);
+
+ ssize_t bytes_to_send = MIN(SENDFILE_CHUNK_SIZE, desc->sendfile.length);
+ off_t offset = desc->sendfile.offset;
+
+#if defined(__linux__)
+ n = sendfile(desc->inet.s, desc->sendfile.dup_file_fd, &offset,
+ bytes_to_send);
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__DARWIN__)
+ {
+ off_t bytes_sent;
+ int error;
+
+ #if defined(__DARWIN__)
+ bytes_sent = bytes_to_send;
+
+ error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset,
+ &bytes_sent, NULL, 0);
+ n = bytes_sent;
+ #else
+ error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset,
+ bytes_to_send, NULL, &bytes_sent, 0);
+ n = bytes_sent;
+ #endif
+
+ if(error < 0) {
+ /* EAGAIN/EINTR report partial success by setting bytes_sent,
+ * so we have to skip error handling if nonzero, and skip EOF
+ * handling if zero, as it's possible that we didn't manage to
+ * send anything at all before being interrupted by a
+ * signal. */
+ if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) {
+ n = -1;
+ }
+ }
+ }
+#elif defined(__sun) && defined(__SVR4) && defined(HAVE_SENDFILEV)
+ {
+ sendfilevec_t sfvec[1];
+ size_t bytes_sent;
+ ssize_t error;
+
+ sfvec[0].sfv_fd = desc->sendfile.dup_file_fd;
+ sfvec[0].sfv_len = bytes_to_send;
+ sfvec[0].sfv_off = offset;
+ sfvec[0].sfv_flag = 0;
+
+ error = sendfilev(desc->inet.s, sfvec, 1, &bytes_sent);
+ n = bytes_sent;
+
+ if(error < 0) {
+ if(errno == EINVAL) {
+ /* On some solaris versions (I've seen it on SunOS 5.10),
+ * using a sfv_len larger than the filesize will result in
+ * a (-1 && errno == EINVAL). We translate this to a
+ * successful send of the data.*/
+ } else {
+ /* EAGAIN/EINTR behavior is identical to *BSD. */
+ if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) {
+ n = -1;
+ }
+ }
+ }
+ }
+#else
+ #error "Unsupported sendfile syscall; update configure test."
+#endif
+
+ if (n > 0) {
+ desc->sendfile.bytes_sent += n;
+ desc->sendfile.offset += n;
+ desc->sendfile.length -= n;
+ } else if (n == 0) {
+ /* EOF. */
+ desc->sendfile.length = 0;
+ break;
+ } else if (IS_SOCKET_ERROR(n) && sock_errno() != EINTR) {
+ if (sock_errno() != ERRNO_BLOCK) {
+ goto socket_error;
+ }
+
+#ifdef __WIN32__
+ desc->inet.send_would_block = 1;
+#endif
+ break;
+ }
+ }
+
+ if (desc->sendfile.length == 0) {
+ tcp_sendfile_completed(desc);
+ }
+
+ goto done;
+
+socket_error: {
+ int socket_errno = sock_errno();
+
+ DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n",
+ (long)desc->inet.port, socket_errno, errno));
+
+ tcp_sendfile_aborted(desc, socket_errno);
+ result = tcp_send_error(desc, socket_errno);
+
+ goto done;
+ }
+
+done:
+ DEBUGF(("tcp_inet_sendfile(%ld) }\r\n", (long)desc->inet.port));
+ return result;
+}
+#endif /* HAVE_SENDFILE */
+
/* socket ready for ouput:
** 1. INET_STATE_CONNECTING => non block connect ?
** 2. INET_STATE_CONNECTED => write output
@@ -10734,10 +11876,11 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
#ifndef SO_ERROR
{
- int sz = sizeof(desc->inet.remote);
- int code = sock_peer(desc->inet.s,
- (struct sockaddr*) &desc->inet.remote, &sz);
-
+ int sz, code;
+ sz = sizeof(desc->inet.remote);
+ sys_memzero((char *) &desc->inet.remote, sz);
+ code = sock_peer(desc->inet.s,
+ (struct sockaddr*) &desc->inet.remote, &sz);
if (IS_SOCKET_ERROR(code)) {
desc->inet.state = INET_STATE_OPEN; /* restore state */
ret = async_error(INETP(desc), sock_errno());
@@ -10766,7 +11909,14 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
async_ok(INETP(desc));
}
else if (IS_CONNECTED(INETP(desc))) {
- for (;;) {
+
+#ifdef HAVE_SENDFILE
+ if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ return tcp_inet_sendfile(desc);
+ }
+#endif
+
+ for (;;) {
int vsize;
ssize_t n;
SysIOVec* iov;
@@ -10792,6 +11942,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
#ifdef __WIN32__
desc->inet.send_would_block = 1;
#endif
+ /* If DELAY_SEND is set ready_output may have
+ been called without doing select so we do
+ a select in order to get into the correct
+ state */
+ if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND)
+ sock_select(INETP(desc), FD_WRITE, 1);
goto done;
} else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning
0 when sending packets with
@@ -10817,7 +11973,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
set_busy_port(desc->inet.port, 0);
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
inet_reply_ok(INETP(desc));
@@ -11172,24 +12328,20 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf,
(desc->sfamily, &remote, &buf, &len)) != NULL)
return ctl_xerror(xerror, rbuf, rsize);
- sock_select(desc, FD_CONNECT, 1);
code = sock_connect(desc->s, &remote.sa, len);
if (IS_SOCKET_ERROR(code) && (sock_errno() == EINPROGRESS)) {
/* XXX: Unix only -- WinSock would have a different cond! */
- desc->state = INET_STATE_CONNECTING;
if (timeout != INET_INFINITY)
driver_set_timer(desc->port, timeout);
enq_async(desc, tbuf, INET_REQ_CONNECT);
+ async_ok(desc);
}
else if (code == 0) { /* OK we are connected */
- sock_select(desc, FD_CONNECT, 0);
- desc->state = INET_STATE_CONNECTED;
enq_async(desc, tbuf, INET_REQ_CONNECT);
async_ok(desc);
}
else {
- sock_select(desc, FD_CONNECT, 0);
return ctl_error(sock_errno(), rbuf, rsize);
}
return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
@@ -11417,10 +12569,10 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len)
if (IS_SCTP(desc))
{
ErlDrvSizeT data_len;
- struct iovec iov[1]; /* For real data */
- struct msghdr mhdr; /* Message wrapper */
- struct sctp_sndrcvinfo *sri; /* The actual ancilary data */
- union { /* For ancilary data */
+ struct iovec iov[1]; /* For real data */
+ struct msghdr mhdr; /* Message wrapper */
+ struct sctp_sndrcvinfo *sri; /* The actual ancillary data */
+ union { /* For ancillary data */
struct cmsghdr hdr;
char ancd[CMSG_SPACE(sizeof(*sri))];
} cmsg;
@@ -11430,12 +12582,12 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len)
return;
}
- /* The ancilary data */
+ /* The ancillary data */
sri = (struct sctp_sndrcvinfo *) (CMSG_DATA(&cmsg.hdr));
/* Get the "sndrcvinfo" from the buffer, advancing the "ptr": */
ptr = sctp_get_sendparams(sri, ptr);
- /* The ancilary data wrapper */
+ /* The ancillary data wrapper */
cmsg.hdr.cmsg_level = IPPROTO_SCTP;
cmsg.hdr.cmsg_type = SCTP_SNDRCV;
cmsg.hdr.cmsg_len = CMSG_LEN(sizeof(*sri));
@@ -11450,7 +12602,7 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len)
iov[0].iov_base = ptr; /* The real data */
mhdr.msg_iov = iov;
mhdr.msg_iovlen = 1;
- mhdr.msg_control = cmsg.ancd; /* For ancilary data */
+ mhdr.msg_control = cmsg.ancd; /* For ancillary data */
mhdr.msg_controllen = cmsg.hdr.cmsg_len;
VALGRIND_MAKE_MEM_DEFINED(mhdr.msg_control, mhdr.msg_controllen); /*suppress "uninitialised bytes"*/
mhdr.msg_flags = 0; /* Not used with "sendmsg" */
@@ -11534,10 +12686,12 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
char abuf[sizeof(inet_address)]; /* buffer address; enough??? */
int packet_count = udesc->read_packets;
int count = 0; /* number of packets delivered to owner */
-#ifdef HAVE_SCTP
+#ifndef __WIN32__
struct msghdr mhdr; /* Top-level msg structure */
struct iovec iov[1]; /* Data or Notification Event */
- char ancd[SCTP_ANC_BUFF_SIZE]; /* Ancillary Data */
+ char ancd[ANC_BUFF_SIZE]; /* Ancillary Data */
+#endif
+#ifdef HAVE_SCTP
int short_recv = 0;
#endif
@@ -11547,15 +12701,11 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
sys_memzero((char *) &other, sizeof(other));
/* udesc->i_buf is only kept between SCTP fragments */
- if (udesc->i_buf == NULL) {
- udesc->i_bufsz = desc->bufsz + len;
- if ((udesc->i_buf = alloc_buffer(udesc->i_bufsz)) == NULL)
- return packet_error(udesc, ENOMEM);
- /* pointer to message start */
- udesc->i_ptr = udesc->i_buf->orig_bytes + len;
- } else {
- ErlDrvBinary* tmp;
+#ifdef HAVE_SCTP
+ if (udesc->i_buf != NULL) {
+ ErlDrvBinary* tmp;
int bufsz;
+ ASSERT(IS_SCTP(desc));
bufsz = desc->bufsz + (udesc->i_ptr - udesc->i_buf->orig_bytes);
if ((tmp = realloc_buffer(udesc->i_buf, bufsz)) == NULL) {
release_buffer(udesc->i_buf);
@@ -11567,6 +12717,15 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_buf = tmp;
udesc->i_bufsz = bufsz;
}
+ } else
+#endif
+ {
+ ASSERT(udesc->i_buf == NULL);
+ udesc->i_bufsz = desc->bufsz + len;
+ if ((udesc->i_buf = alloc_buffer(udesc->i_bufsz)) == NULL)
+ return packet_error(udesc, ENOMEM);
+ /* pointer to message start */
+ udesc->i_ptr = udesc->i_buf->orig_bytes + len;
}
/* Note: On Windows NT, recvfrom() fails if the socket is connected. */
@@ -11581,7 +12740,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
mhdr.msg_iov = iov;
mhdr.msg_iovlen = 1;
mhdr.msg_control = ancd;
- mhdr.msg_controllen = SCTP_ANC_BUFF_SIZE;
+ mhdr.msg_controllen = ANC_BUFF_SIZE;
mhdr.msg_flags = 0; /* To be filled by "recvmsg" */
/* Do the actual SCTP receive: */
@@ -11596,6 +12755,24 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
other = desc->remote;
goto check_result;
}
+#ifndef __WIN32__
+ /* recvmsg() does not exist in the Winsock API */
+ if (desc->recv_cmsgflags) {
+ /* Use recvmsg() */
+ iov->iov_base = udesc->i_ptr;
+ iov->iov_len = desc->bufsz;
+ mhdr.msg_name = &other;
+ mhdr.msg_namelen = len;
+ mhdr.msg_iov = iov;
+ mhdr.msg_iovlen = 1;
+ mhdr.msg_control = ancd;
+ mhdr.msg_controllen = ANC_BUFF_SIZE;
+ mhdr.msg_flags = 0;
+ n = sock_recvmsg(desc->s, &mhdr, 0);
+ len = mhdr.msg_namelen;
+ goto check_result;
+ }
+#endif
n = sock_recvfrom(desc->s, udesc->i_ptr, desc->bufsz,
0, &other.sa, &len);
check_result:
@@ -11608,7 +12785,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_buf = NULL;
if (!desc->active) {
async_error(desc, err);
- driver_cancel_timer(desc->port);
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
}
else {
@@ -11625,6 +12802,14 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
) {
sock_select(desc,FD_READ,1);
}
+#ifdef HAVE_SCTP
+ if (!short_recv) {
+#endif
+ release_buffer(udesc->i_buf);
+ udesc->i_buf = NULL;
+#ifdef HAVE_SCTP
+ }
+#endif
return count; /* strange, not ready */
}
@@ -11640,7 +12825,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
{
/* message received */
int code;
- void * extra = NULL;
+ void *mp;
char * ptr;
int nsz;
@@ -11671,21 +12856,25 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_ptr = NULL; /* not used from here */
}
}
+ mp = NULL;
#ifdef HAVE_SCTP
- if (IS_SCTP(desc)) extra = &mhdr;
+ if (IS_SCTP(desc)) mp = &mhdr;
+#endif
+#ifndef __WIN32__
+ if (desc->recv_cmsgflags) mp = &mhdr;
#endif
/* Actual parsing and return of the data received, occur here: */
code = packet_reply_binary_data(desc, len, udesc->i_buf,
(sizeof(other) - len),
nsz,
- extra);
+ mp);
free_buffer(udesc->i_buf);
udesc->i_buf = NULL;
if (code < 0)
return count;
count++;
if (!desc->active) {
- driver_cancel_timer(desc->port); /* possibly cancel */
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
return count; /* passive mode (read one packet only) */
}
@@ -11704,77 +12893,6 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
return count;
}
-static void packet_inet_drv_output(ErlDrvData e, ErlDrvEvent event)
-{
- (void) packet_inet_output((udp_descriptor*)e, (HANDLE)event);
-}
-
-/* UDP/SCTP socket ready for output:
-** This is a Back-End for Non-Block SCTP Connect (INET_STATE_CONNECTING)
-*/
-static int packet_inet_output(udp_descriptor* udesc, HANDLE event)
-{
- inet_descriptor* desc = INETP(udesc);
- int ret = 0;
- ErlDrvPort ix = desc->port;
-
- DEBUGF(("packet_inet_output(%ld) {s=%d\r\n",
- (long)desc->port, desc->s));
-
- if (desc->state == INET_STATE_CONNECTING) {
- sock_select(desc, FD_CONNECT, 0);
-
- driver_cancel_timer(ix); /* posssibly cancel a timer */
-#ifndef __WIN32__
- /*
- * XXX This is strange. This *should* work on Windows NT too,
- * but doesn't. An bug in Winsock 2.0 for Windows NT?
- *
- * See "Unix Netwok Programming", W.R.Stevens, p 412 for a
- * discussion about Unix portability and non blocking connect.
- */
-
-#ifndef SO_ERROR
- {
- int sz = sizeof(desc->remote);
- int code = sock_peer(desc->s,
- (struct sockaddr*) &desc->remote, &sz);
-
- if (IS_SOCKET_ERROR(code)) {
- desc->state = INET_STATE_OPEN; /* restore state */
- ret = async_error(desc, sock_errno());
- goto done;
- }
- }
-#else
- {
- int error = 0; /* Has to be initiated, we check it */
- unsigned int sz = sizeof(error); /* even if we get -1 */
- int code = sock_getopt(desc->s, SOL_SOCKET, SO_ERROR,
- (void *)&error, &sz);
-
- if ((code < 0) || error) {
- desc->state = INET_STATE_OPEN; /* restore state */
- ret = async_error(desc, error);
- goto done;
- }
- }
-#endif /* SO_ERROR */
-#endif /* !__WIN32__ */
-
- desc->state = INET_STATE_CONNECTED;
- async_ok(desc);
- }
- else {
- sock_select(desc,FD_CONNECT,0);
-
- DEBUGF(("packet_inet_output(%ld): bad state: %04x\r\n",
- (long)desc->port, desc->state));
- }
- done:
- DEBUGF(("packet_inet_output(%ld) }\r\n", (long)desc->port));
- return ret;
-}
#endif
/*---------------------------------------------------------------------------*/
@@ -11835,55 +12953,69 @@ make_noninheritable_handle(SOCKET s)
* Multi-timers
*/
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data)
{
ErlDrvTime next_timeout;
- if (!*first) {
+ MultiTimerData *curr = desc->mtd;
+ if (!curr) {
ASSERT(0);
return;
}
#ifdef DEBUG
{
ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC);
- ASSERT(chk >= (*first)->when);
+ ASSERT(chk >= curr->when);
}
#endif
do {
- MultiTimerData *save = *first;
- *first = save->next;
+ MultiTimerData *save = curr;
+
(*(save->timeout_function))(data,save->caller);
- FREE(save);
- if (*first == NULL) {
+
+ curr = curr->next;
+
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = save;
+ else
+ FREE(save);
+
+ if (curr == NULL) {
+ desc->mtd = NULL;
return;
}
- (*first)->prev = NULL;
- next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ curr->prev = NULL;
+ next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
} while (next_timeout <= 0);
+ desc->mtd = curr;
driver_set_timer(port, (unsigned long) next_timeout);
}
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port)
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port)
{
- MultiTimerData *p;
- if (*first) {
+ if (desc->mtd) {
driver_cancel_timer(port);
}
- while (*first) {
- p = *first;
- *first = p->next;
- FREE(p);
+ while (desc->mtd) {
+ MultiTimerData *p = desc->mtd;
+ desc->mtd = p->next;
+ FREE(p);
+ }
+ desc->mtd = NULL;
+ if (desc->mtd_cache) {
+ FREE(desc->mtd_cache);
+ desc->mtd_cache = NULL;
}
}
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p)
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p)
{
if (p->prev != NULL) {
p->prev->next = p->next;
} else {
driver_cancel_timer(port);
- *first = p->next;
- if (*first) {
- ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ desc->mtd = p->next;
+ if (desc->mtd) {
+ ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
if (ntmo < 0)
ntmo = 0;
driver_set_timer(port, (unsigned long) ntmo);
@@ -11892,36 +13024,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim
if (p->next != NULL) {
p->next->prev = p->prev;
}
- FREE(p);
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = p;
+ else
+ FREE(p);
+}
+
+/* Cancel a timer based on the timeout_fun */
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller))
+{
+ MultiTimerData *timer = desc->mtd;
+ while(timer && timer->timeout_function != timeout_fun) {
+ timer = timer->next;
+ }
+ if (timer) {
+ remove_multi_timer(desc, port, timer);
+ }
}
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvTermData caller, unsigned timeout,
void (*timeout_fun)(ErlDrvData drv_data,
ErlDrvTermData caller))
{
MultiTimerData *mtd, *p, *s;
- mtd = ALLOC(sizeof(MultiTimerData));
- mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1;
+
+ /* Use cached timer if available */
+ if (desc->mtd_cache != NULL) {
+ mtd = desc->mtd_cache;
+ desc->mtd_cache = NULL;
+ } else
+ mtd = ALLOC(sizeof(MultiTimerData));
+
+ if (timeout)
+ mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout);
+ else
+ mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */
+
mtd->timeout_function = timeout_fun;
mtd->caller = caller;
mtd->next = mtd->prev = NULL;
- for(p = *first,s = NULL; p != NULL; s = p, p = p->next) {
+
+ /* Find correct slot in timer linked list */
+ for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) {
if (p->when >= mtd->when) {
break;
}
}
+ /* Insert in linked list */
if (!p) {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
}
} else {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
@@ -11929,10 +13092,8 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
mtd->next = p;
p->prev = mtd;
}
+ /* Possibly set new timer */
if (!s) {
- if (mtd->next) {
- driver_cancel_timer(port);
- }
driver_set_timer(port,timeout);
}
return mtd;
diff --git a/erts/emulator/drivers/common/zlib_drv.c b/erts/emulator/drivers/common/zlib_drv.c
deleted file mode 100644
index 440ba956d8..0000000000
--- a/erts/emulator/drivers/common/zlib_drv.c
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * %CopyrightBegin%
- *
- * Copyright Ericsson AB 2003-2016. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * %CopyrightEnd%
- */
-
-/*
- * ZLib interface for erlang
- *
- */
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-#include <stdio.h>
-#include <zlib.h>
-#include <errno.h>
-#include <string.h>
-
-#include "erl_driver.h"
-
-
-#define DEFLATE_INIT 1
-#define DEFLATE_INIT2 2
-#define DEFLATE_SETDICT 3
-#define DEFLATE_RESET 4
-#define DEFLATE_END 5
-#define DEFLATE_PARAMS 6
-#define DEFLATE 7
-
-#define INFLATE_INIT 8
-#define INFLATE_INIT2 9
-#define INFLATE_SETDICT 10
-#define INFLATE_SYNC 11
-#define INFLATE_RESET 12
-#define INFLATE_END 13
-#define INFLATE 14
-
-#define CRC32_0 15
-#define CRC32_1 16
-#define CRC32_2 17
-
-#define SET_BUFSZ 18
-#define GET_BUFSZ 19
-#define GET_QSIZE 20
-
-#define ADLER32_1 21
-#define ADLER32_2 22
-
-#define CRC32_COMBINE 23
-#define ADLER32_COMBINE 24
-
-#define INFLATE_CHUNK 25
-
-
-#define DEFAULT_BUFSZ 4000
-
-/* This flag is used in the same places, where zlib return codes
- * (Z_OK, Z_STREAM_END, Z_NEED_DICT) are. So, we need to set it to
- * relatively large value to avoid possible value clashes in future.
- * */
-#define INFLATE_HAS_MORE 100
-
-static int zlib_init(void);
-static ErlDrvData zlib_start(ErlDrvPort port, char* buf);
-static void zlib_stop(ErlDrvData e);
-static void zlib_flush(ErlDrvData e);
-static ErlDrvSSizeT zlib_ctl(ErlDrvData drv_data, unsigned int command, char *buf,
- ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen);
-static void zlib_outputv(ErlDrvData drv_data, ErlIOVec *ev);
-
-ErlDrvEntry zlib_driver_entry = {
- zlib_init,
- zlib_start,
- zlib_stop,
- NULL, /* output */
- NULL, /* ready_input */
- NULL, /* ready_output */
- "zlib_drv",
- NULL, /* finish */
- NULL, /* handle */
- zlib_ctl,
- NULL, /* timeout */
- zlib_outputv,
- NULL, /* read_async */
- zlib_flush,
- NULL, /* call */
- NULL, /* event */
- ERL_DRV_EXTENDED_MARKER,
- ERL_DRV_EXTENDED_MAJOR_VERSION,
- ERL_DRV_EXTENDED_MINOR_VERSION,
- ERL_DRV_FLAG_USE_PORT_LOCKING,
- NULL, /* handle2 */
- NULL, /* process_exit */
-};
-
-typedef enum {
- ST_NONE = 0,
- ST_DEFLATE = 1,
- ST_INFLATE = 2
-} ZLibState;
-
-
-typedef struct {
- z_stream s;
- ZLibState state;
- ErlDrvBinary* bin;
- int binsz;
- int binsz_need;
- uLong crc;
- int inflate_eos_seen;
- int want_crc; /* 1 if crc is calculated on clear text */
- ErlDrvPort port; /* the associcated port */
-} ZLibData;
-
-static int zlib_inflate(ZLibData* d, int flush);
-static int zlib_deflate(ZLibData* d, int flush);
-
-#if defined(__WIN32__)
-static int i32(char* buf)
-#else
-static __inline__ int i32(char* buf)
-#endif
-{
- return (int) (
- (((int)((unsigned char*)buf)[0]) << 24) |
- (((int)((unsigned char*)buf)[1]) << 16) |
- (((int)((unsigned char*)buf)[2]) << 8) |
- (((int)((unsigned char*)buf)[3]) << 0));
-}
-
-static char* zlib_reason(int code, int* err)
-{
- switch(code) {
- case Z_OK:
- *err = 0;
- return "ok";
- case Z_STREAM_END:
- *err = 0;
- return "stream_end";
- case Z_ERRNO:
- *err = 1;
- return erl_errno_id(errno);
- case Z_STREAM_ERROR:
- *err = 1;
- return "stream_error";
- case Z_DATA_ERROR:
- *err = 1;
- return "data_error";
- case Z_MEM_ERROR:
- *err = 1;
- return "mem_error";
- case Z_BUF_ERROR:
- *err = 1;
- return "buf_error";
- case Z_VERSION_ERROR:
- *err = 1;
- return "version_error";
- default:
- *err = 1;
- return "unknown_error";
- }
-}
-
-
-static ErlDrvSSizeT zlib_return(int code, char** rbuf, ErlDrvSizeT rlen)
-{
- int msg_code = 0; /* 0=ok, 1=error */
- char* dst = *rbuf;
- char* src;
- ErlDrvSizeT len = 0;
-
- src = zlib_reason(code, &msg_code);
- *dst++ = msg_code;
- rlen--;
- len = 1;
-
- while((rlen > 0) && *src) {
- *dst++ = *src++;
- rlen--;
- len++;
- }
- return len;
-}
-
-static ErlDrvSSizeT zlib_value2(int msg_code, int value,
- char** rbuf, ErlDrvSizeT rlen)
-{
- char* dst = *rbuf;
-
- if (rlen < 5) {
- return -1;
- }
- *dst++ = msg_code;
- *dst++ = (value >> 24) & 0xff;
- *dst++ = (value >> 16) & 0xff;
- *dst++ = (value >> 8) & 0xff;
- *dst++ = value & 0xff;
- return 5;
-}
-
-static ErlDrvSSizeT zlib_value(int value, char** rbuf, ErlDrvSizeT rlen)
-{
- return zlib_value2(2, value, rbuf, rlen);
-}
-
-static int zlib_output_init(ZLibData* d)
-{
- if (d->bin != NULL)
- driver_free_binary(d->bin);
- if ((d->bin = driver_alloc_binary(d->binsz_need)) == NULL)
- return -1;
- d->binsz = d->binsz_need;
- d->s.next_out = (unsigned char*)d->bin->orig_bytes;
- d->s.avail_out = d->binsz;
- return 0;
-}
-
-/*
- * Send compressed or uncompressed data
- * and restart output procesing
- */
-static int zlib_output(ZLibData* d)
-{
- if (d->bin != NULL) {
- int len = d->binsz - d->s.avail_out;
- if (len > 0) {
- if (driver_output_binary(d->port, NULL, 0, d->bin, 0, len) < 0)
- return -1;
- }
- driver_free_binary(d->bin);
- d->bin = NULL;
- d->binsz = 0;
- }
- return zlib_output_init(d);
-}
-
-static int zlib_inflate(ZLibData* d, int flush)
-{
- int res = Z_OK;
-
- if ((d->bin == NULL) && (zlib_output_init(d) < 0)) {
- errno = ENOMEM;
- return Z_ERRNO;
- }
-
- while ((driver_sizeq(d->port) > 0) && (res != Z_STREAM_END)) {
- int vlen;
- SysIOVec* iov = driver_peekq(d->port, &vlen);
- int len;
- int possibly_more_output = 0;
-
- d->s.next_in = iov[0].iov_base;
- d->s.avail_in = iov[0].iov_len;
- while((possibly_more_output || (d->s.avail_in > 0)) && (res != Z_STREAM_END)) {
- res = inflate(&d->s, Z_NO_FLUSH);
- if (res == Z_NEED_DICT) {
- /* Essential to eat the header bytes that zlib has looked at */
- len = iov[0].iov_len - d->s.avail_in;
- driver_deq(d->port, len);
- return res;
- }
- if (res == Z_BUF_ERROR) {
- /* Was possible more output, but actually not */
- res = Z_OK;
- }
- else if (res < 0) {
- return res;
- }
- if (d->s.avail_out != 0) {
- possibly_more_output = 0;
- } else {
- if (d->want_crc)
- d->crc = crc32(d->crc, (unsigned char*)d->bin->orig_bytes,
- d->binsz - d->s.avail_out);
- zlib_output(d);
- possibly_more_output = 1;
- }
- }
- len = iov[0].iov_len - d->s.avail_in;
- driver_deq(d->port, len);
- }
-
- if (d->want_crc) {
- d->crc = crc32(d->crc, (unsigned char*) d->bin->orig_bytes,
- d->binsz - d->s.avail_out);
- }
- zlib_output(d);
- if (res == Z_STREAM_END) {
- d->inflate_eos_seen = 1;
- }
- return res;
-}
-
-static int zlib_inflate_chunk(ZLibData* d)
-{
- int res = Z_OK;
-
- if ((d->bin == NULL) && (zlib_output_init(d) < 0)) {
- errno = ENOMEM;
- return Z_ERRNO;
- }
-
- while ((driver_sizeq(d->port) > 0) && (d->s.avail_out > 0) &&
- (res != Z_STREAM_END)) {
- int vlen;
- SysIOVec* iov = driver_peekq(d->port, &vlen);
- int len;
-
- d->s.next_in = iov[0].iov_base;
- d->s.avail_in = iov[0].iov_len;
- while((d->s.avail_in > 0) && (d->s.avail_out > 0) && (res != Z_STREAM_END)) {
- res = inflate(&d->s, Z_NO_FLUSH);
- if (res == Z_NEED_DICT) {
- /* Essential to eat the header bytes that zlib has looked at */
- len = iov[0].iov_len - d->s.avail_in;
- driver_deq(d->port, len);
- return res;
- }
- if (res == Z_BUF_ERROR) {
- /* Was possible more output, but actually not */
- res = Z_OK;
- }
- else if (res < 0) {
- return res;
- }
- }
- len = iov[0].iov_len - d->s.avail_in;
- driver_deq(d->port, len);
- }
-
- /* We are here because all input was consumed or EOS reached or output
- * buffer is full */
- if (d->want_crc) {
- d->crc = crc32(d->crc, (unsigned char*) d->bin->orig_bytes,
- d->binsz - d->s.avail_out);
- }
- zlib_output(d);
- if ((res == Z_OK) && (d->s.avail_in > 0))
- res = INFLATE_HAS_MORE;
- else if (res == Z_STREAM_END) {
- d->inflate_eos_seen = 1;
- }
- return res;
-}
-
-static int zlib_deflate(ZLibData* d, int flush)
-{
- int res = Z_OK;
-
- if ((d->bin == NULL) && (zlib_output_init(d) < 0)) {
- errno = ENOMEM;
- return Z_ERRNO;
- }
-
- while ((driver_sizeq(d->port) > 0) && (res != Z_STREAM_END)) {
- int vlen;
- SysIOVec* iov = driver_peekq(d->port, &vlen);
- int len;
-
- d->s.next_in = iov[0].iov_base;
- d->s.avail_in = iov[0].iov_len;
-
- while((d->s.avail_in > 0) && (res != Z_STREAM_END)) {
- if ((res = deflate(&d->s, Z_NO_FLUSH)) < 0) {
- return res;
- }
- if (d->s.avail_out == 0) {
- zlib_output(d);
- }
- }
- len = iov[0].iov_len - d->s.avail_in;
- if (d->want_crc) {
- d->crc = crc32(d->crc, iov[0].iov_base, len);
- }
- driver_deq(d->port, len);
- }
-
- if (flush != Z_NO_FLUSH) {
- if ((res = deflate(&d->s, flush)) < 0) {
- return res;
- }
- if (flush == Z_FINISH) {
- while (d->s.avail_out < d->binsz) {
- zlib_output(d);
- if (res == Z_STREAM_END) {
- break;
- }
- if ((res = deflate(&d->s, flush)) < 0) {
- return res;
- }
- }
- } else {
- while (d->s.avail_out == 0) {
- zlib_output(d);
- if ((res = deflate(&d->s, flush)) < 0) {
- return res;
- }
- }
- if (d->s.avail_out < d->binsz) {
- zlib_output(d);
- }
- }
- }
- return res;
-}
-
-
-
-static void* zlib_alloc(void* data, unsigned int items, unsigned int size)
-{
- return (void*) driver_alloc(items*size);
-}
-
-static void zlib_free(void* data, void* addr)
-{
- driver_free(addr);
-}
-
-static int zlib_init()
-{
- return 0;
-}
-
-static ErlDrvData zlib_start(ErlDrvPort port, char* buf)
-{
- ZLibData* d;
-
- if ((d = (ZLibData*) driver_alloc(sizeof(ZLibData))) == NULL)
- return ERL_DRV_ERROR_GENERAL;
-
- memset(&d->s, 0, sizeof(z_stream));
-
- d->s.zalloc = zlib_alloc;
- d->s.zfree = zlib_free;
- d->s.opaque = d;
- d->s.data_type = Z_BINARY;
-
- d->port = port;
- d->state = ST_NONE;
- d->bin = NULL;
- d->binsz = 0;
- d->binsz_need = DEFAULT_BUFSZ;
- d->crc = crc32(0L, Z_NULL, 0);
- d->inflate_eos_seen = 0;
- d->want_crc = 0;
- return (ErlDrvData)d;
-}
-
-
-static void zlib_stop(ErlDrvData e)
-{
- ZLibData* d = (ZLibData*)e;
-
- if (d->state == ST_DEFLATE)
- deflateEnd(&d->s);
- else if (d->state == ST_INFLATE)
- inflateEnd(&d->s);
-
- if (d->bin != NULL)
- driver_free_binary(d->bin);
-
- driver_free(d);
-}
-
-static void zlib_flush(ErlDrvData drv_data)
-{
- ZLibData* d = (ZLibData*) drv_data;
-
- driver_deq(d->port, driver_sizeq(d->port));
-}
-
-static ErlDrvSSizeT zlib_ctl(ErlDrvData drv_data, unsigned int command, char *buf,
- ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen)
-{
- ZLibData* d = (ZLibData*)drv_data;
- int res;
-
- switch(command) {
- case DEFLATE_INIT:
- if (len != 4) goto badarg;
- if (d->state != ST_NONE) goto badarg;
- res = deflateInit(&d->s, i32(buf));
- if (res == Z_OK) {
- d->state = ST_DEFLATE;
- d->want_crc = 0;
- d->crc = crc32(0L, Z_NULL, 0);
- }
- return zlib_return(res, rbuf, rlen);
-
- case DEFLATE_INIT2: {
- int wbits;
-
- if (len != 20) goto badarg;
- if (d->state != ST_NONE) goto badarg;
- wbits = i32(buf+8);
- res = deflateInit2(&d->s, i32(buf), i32(buf+4), wbits,
- i32(buf+12), i32(buf+16));
- if (res == Z_OK) {
- d->state = ST_DEFLATE;
- d->want_crc = (wbits < 0);
- d->crc = crc32(0L, Z_NULL, 0);
- }
- return zlib_return(res, rbuf, rlen);
- }
-
- case DEFLATE_SETDICT:
- if (d->state != ST_DEFLATE) goto badarg;
- res = deflateSetDictionary(&d->s, (unsigned char*)buf, len);
- if (res == Z_OK) {
- return zlib_value(d->s.adler, rbuf, rlen);
- } else {
- return zlib_return(res, rbuf, rlen);
- }
-
- case DEFLATE_RESET:
- if (len != 0) goto badarg;
- if (d->state != ST_DEFLATE) goto badarg;
- driver_deq(d->port, driver_sizeq(d->port));
- res = deflateReset(&d->s);
- return zlib_return(res, rbuf, rlen);
-
- case DEFLATE_END:
- if (len != 0) goto badarg;
- if (d->state != ST_DEFLATE) goto badarg;
- driver_deq(d->port, driver_sizeq(d->port));
- res = deflateEnd(&d->s);
- d->state = ST_NONE;
- return zlib_return(res, rbuf, rlen);
-
- case DEFLATE_PARAMS:
- if (len != 8) goto badarg;
- if (d->state != ST_DEFLATE) goto badarg;
- res = deflateParams(&d->s, i32(buf), i32(buf+4));
- return zlib_return(res, rbuf, rlen);
-
- case DEFLATE:
- if (d->state != ST_DEFLATE) goto badarg;
- if (len != 4) goto badarg;
- res = zlib_deflate(d, i32(buf));
- return zlib_return(res, rbuf, rlen);
-
- case INFLATE_INIT:
- if (len != 0) goto badarg;
- if (d->state != ST_NONE) goto badarg;
- res = inflateInit(&d->s);
- if (res == Z_OK) {
- d->state = ST_INFLATE;
- d->inflate_eos_seen = 0;
- d->want_crc = 0;
- d->crc = crc32(0L, Z_NULL, 0);
- }
- return zlib_return(res, rbuf, rlen);
-
- case INFLATE_INIT2: {
- int wbits;
-
- if (len != 4) goto badarg;
- if (d->state != ST_NONE) goto badarg;
- wbits = i32(buf);
- res = inflateInit2(&d->s, wbits);
- if (res == Z_OK) {
- d->state = ST_INFLATE;
- d->inflate_eos_seen = 0;
- d->want_crc = (wbits < 0);
- d->crc = crc32(0L, Z_NULL, 0);
- }
- return zlib_return(res, rbuf, rlen);
- }
-
- case INFLATE_SETDICT:
- if (d->state != ST_INFLATE) goto badarg;
- res = inflateSetDictionary(&d->s, (unsigned char*)buf, len);
- return zlib_return(res, rbuf, rlen);
-
- case INFLATE_SYNC:
- if (d->state != ST_INFLATE) goto badarg;
- if (len != 0) goto badarg;
- if (driver_sizeq(d->port) == 0) {
- res = Z_BUF_ERROR;
- } else {
- int vlen;
- SysIOVec* iov = driver_peekq(d->port, &vlen);
-
- d->s.next_in = iov[0].iov_base;
- d->s.avail_in = iov[0].iov_len;
- res = inflateSync(&d->s);
- }
- return zlib_return(res, rbuf, rlen);
-
- case INFLATE_RESET:
- if (d->state != ST_INFLATE) goto badarg;
- if (len != 0) goto badarg;
- driver_deq(d->port, driver_sizeq(d->port));
- res = inflateReset(&d->s);
- d->inflate_eos_seen = 0;
- return zlib_return(res, rbuf, rlen);
-
- case INFLATE_END:
- if (d->state != ST_INFLATE) goto badarg;
- if (len != 0) goto badarg;
- driver_deq(d->port, driver_sizeq(d->port));
- res = inflateEnd(&d->s);
- if (res == Z_OK && d->inflate_eos_seen == 0) {
- res = Z_DATA_ERROR;
- }
- d->state = ST_NONE;
- return zlib_return(res, rbuf, rlen);
-
- case INFLATE:
- if (d->state != ST_INFLATE) goto badarg;
- if (len != 4) goto badarg;
- res = zlib_inflate(d, i32(buf));
- if (res == Z_NEED_DICT) {
- return zlib_value2(3, d->s.adler, rbuf, rlen);
- } else {
- return zlib_return(res, rbuf, rlen);
- }
-
- case INFLATE_CHUNK:
- if (d->state != ST_INFLATE) goto badarg;
- if (len != 0) goto badarg;
- res = zlib_inflate_chunk(d);
- if (res == INFLATE_HAS_MORE) {
- return zlib_value2(4, 0, rbuf, rlen);
- } else if (res == Z_NEED_DICT) {
- return zlib_value2(3, d->s.adler, rbuf, rlen);
- } else {
- return zlib_return(res, rbuf, rlen);
- }
-
- case GET_QSIZE:
- return zlib_value(driver_sizeq(d->port), rbuf, rlen);
-
- case GET_BUFSZ:
- return zlib_value(d->binsz_need, rbuf, rlen);
-
- case SET_BUFSZ: {
- int need;
- if (len != 4) goto badarg;
- need = i32(buf);
- if ((need < 16) || (need > 0x00ffffff))
- goto badarg;
- if (d->binsz_need != need) {
- d->binsz_need = need;
- if (d->bin != NULL) {
- if (d->s.avail_out == d->binsz) {
- driver_free_binary(d->bin);
- d->bin = NULL;
- d->binsz = 0;
- }
- else
- zlib_output(d);
- }
- }
- return zlib_return(Z_OK, rbuf, rlen);
- }
-
- case CRC32_0:
- return zlib_value(d->crc, rbuf, rlen);
-
- case CRC32_1: {
- uLong crc = crc32(0L, Z_NULL, 0);
- crc = crc32(crc, (unsigned char*) buf, len);
- return zlib_value(crc, rbuf, rlen);
- }
-
- case CRC32_2: {
- uLong crc;
- if (len < 4) goto badarg;
- crc = (unsigned int) i32(buf);
- crc = crc32(crc, (unsigned char*) buf+4, len-4);
- return zlib_value(crc, rbuf, rlen);
- }
-
- case ADLER32_1: {
- uLong adler = adler32(0L, Z_NULL, 0);
- adler = adler32(adler, (unsigned char*) buf, len);
- return zlib_value(adler, rbuf, rlen);
- }
-
- case ADLER32_2: {
- uLong adler;
- if (len < 4) goto badarg;
- adler = (unsigned int) i32(buf);
- adler = adler32(adler, (unsigned char*) buf+4, len-4);
- return zlib_value(adler, rbuf, rlen);
- }
-
- case CRC32_COMBINE: {
- uLong crc, crc1, crc2, len2;
- if (len != 12) goto badarg;
- crc1 = (unsigned int) i32(buf);
- crc2 = (unsigned int) i32(buf+4);
- len2 = (unsigned int) i32(buf+8);
- crc = crc32_combine(crc1, crc2, len2);
- return zlib_value(crc, rbuf, rlen);
- }
-
- case ADLER32_COMBINE: {
- uLong adler, adler1, adler2, len2;
- if (len != 12) goto badarg;
- adler1 = (unsigned int) i32(buf);
- adler2 = (unsigned int) i32(buf+4);
- len2 = (unsigned int) i32(buf+8);
- adler = adler32_combine(adler1, adler2, len2);
- return zlib_value(adler, rbuf, rlen);
- }
- }
-
- badarg:
- errno = EINVAL;
- return zlib_return(Z_ERRNO, rbuf, rlen);
-}
-
-
-
-static void zlib_outputv(ErlDrvData drv_data, ErlIOVec *ev)
-{
- ZLibData* d = (ZLibData*) drv_data;
-
- driver_enqv(d->port, ev, 0);
-}