aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/drivers/ose
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2013-03-14 15:42:19 +0100
committerLukas Larsson <[email protected]>2014-02-24 15:15:55 +0100
commit200fbe924466720bd2a8c5eb05b05d67b0a2414c (patch)
treee78056a1247d75978646b629cd0e01688e65ff58 /erts/emulator/drivers/ose
parentfdcdaca338849d7f63d4300e489318f6ee275d82 (diff)
downloadotp-200fbe924466720bd2a8c5eb05b05d67b0a2414c.tar.gz
otp-200fbe924466720bd2a8c5eb05b05d67b0a2414c.tar.bz2
otp-200fbe924466720bd2a8c5eb05b05d67b0a2414c.zip
Added support for ENEA OSE
This port has support for both non-smp and smp. It contains a new way to do io checking in which erts_poll_wait receives the payload of the polled entity. This has implications for all linked-in drivers.
Diffstat (limited to 'erts/emulator/drivers/ose')
-rw-r--r--erts/emulator/drivers/ose/ose_efile.c1090
1 files changed, 1090 insertions, 0 deletions
diff --git a/erts/emulator/drivers/ose/ose_efile.c b/erts/emulator/drivers/ose/ose_efile.c
new file mode 100644
index 0000000000..8cd34a7bb0
--- /dev/null
+++ b/erts/emulator/drivers/ose/ose_efile.c
@@ -0,0 +1,1090 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 1997-2012. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+/*
+ * Purpose: Provides file and directory operations for OSE.
+ */
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#if defined(HAVE_POSIX_FALLOCATE) && !defined(__sun) && !defined(__sun__)
+#define _XOPEN_SOURCE 600
+#endif
+#if !defined(_GNU_SOURCE) && defined(HAVE_LINUX_FALLOC_H)
+#define _GNU_SOURCE
+#endif
+#include "sys.h"
+#include "erl_driver.h"
+#include "erl_efile.h"
+/*#include <utime.h>*/
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_SYS_UIO_H
+#include <sys/types.h>
+#include <sys/uio.h>
+#endif
+#if defined(HAVE_SENDFILE) && (defined(__linux__) || (defined(__sun) && defined(__SVR4)))
+#include <sys/sendfile.h>
+#endif
+
+#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
+#define DARWIN 1
+#endif
+
+#if defined(DARWIN) || defined(HAVE_LINUX_FALLOC_H) || defined(HAVE_POSIX_FALLOCATE)
+#include <fcntl.h>
+#endif
+
+#ifdef HAVE_LINUX_FALLOC_H
+#include <linux/falloc.h>
+#endif
+
+#ifdef SUNOS4
+# define getcwd(buf, size) getwd(buf)
+#endif
+
+#ifdef __OSE__
+#include "sys/stat.h"
+#include "dirent.h"
+#endif
+
+/* Find a definition of MAXIOV, that is used in the code later. */
+#if defined IOV_MAX
+#define MAXIOV IOV_MAX
+#elif defined UIO_MAXIOV
+#define MAXIOV UIO_MAXIOV
+#else
+#define MAXIOV 16
+#endif
+
+
+/*
+ * Macros for testing file types.
+ */
+
+#define ISDIR(st) (((st).st_mode & S_IFMT) == S_IFDIR)
+#define ISREG(st) (((st).st_mode & S_IFMT) == S_IFREG)
+#define ISDEV(st) \
+ (((st).st_mode&S_IFMT) == S_IFCHR || ((st).st_mode&S_IFMT) == S_IFBLK)
+#define ISLNK(st) (((st).st_mode & S_IFLNK) == S_IFLNK)
+#ifdef NO_UMASK
+#define FILE_MODE 0644
+#define DIR_MODE 0755
+#else
+#define FILE_MODE 0666
+#define DIR_MODE 0777
+#endif
+
+#define IS_DOT_OR_DOTDOT(s) \
+ (s[0] == '.' && (s[1] == '\0' || (s[1] == '.' && s[2] == '\0')))
+
+static int check_error(int result, Efile_error* errInfo);
+
+static int
+check_error(int result, Efile_error *errInfo)
+{
+ if (result < 0) {
+ errInfo->posix_errno = errInfo->os_errno = errno;
+ return 0;
+ }
+ return 1;
+}
+
+int
+efile_mkdir(Efile_error* errInfo, /* Where to return error codes. */
+ char* name) /* Name of directory to create. */
+{
+#ifdef NO_MKDIR_MODE
+ return check_error(mkdir(name), errInfo);
+#else
+ return check_error(mkdir(name, DIR_MODE), errInfo);
+#endif
+}
+
+int
+efile_rmdir(Efile_error* errInfo, /* Where to return error codes. */
+ char* name) /* Name of directory to delete. */
+{
+ if (rmdir(name) == 0) {
+ return 1;
+ }
+ if (errno == ENOTEMPTY) {
+ errno = EEXIST;
+ }
+ if (errno == EEXIST) {
+ int saved_errno = errno;
+ struct stat file_stat;
+ struct stat cwd_stat;
+
+ /*
+ * The error code might be wrong if this is the current directory.
+ */
+
+ if (stat(name, &file_stat) == 0 && stat(".", &cwd_stat) == 0 &&
+ file_stat.st_ino == cwd_stat.st_ino &&
+ file_stat.st_dev == cwd_stat.st_dev) {
+ saved_errno = EINVAL;
+ }
+ errno = saved_errno;
+ }
+ return check_error(-1, errInfo);
+}
+
+int
+efile_delete_file(Efile_error* errInfo, /* Where to return error codes. */
+ char* name) /* Name of file to delete. */
+{
+ if (unlink(name) == 0) {
+ return 1;
+ }
+ if (errno == EISDIR) { /* Linux sets the wrong error code. */
+ errno = EPERM;
+ }
+ return check_error(-1, errInfo);
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * Changes the name of an existing file or directory, from src to dst.
+ * If src and dst refer to the same file or directory, does nothing
+ * and returns success. Otherwise if dst already exists, it will be
+ * deleted and replaced by src subject to the following conditions:
+ * If src is a directory, dst may be an empty directory.
+ * If src is a file, dst may be a file.
+ * In any other situation where dst already exists, the rename will
+ * fail.
+ *
+ * Results:
+ * If the directory was successfully created, returns 1.
+ * Otherwise the return value is 0 and errno is set to
+ * indicate the error. Some possible values for errno are:
+ *
+ * EACCES: src or dst parent directory can't be read and/or written.
+ * EEXIST: dst is a non-empty directory.
+ * EINVAL: src is a root directory or dst is a subdirectory of src.
+ * EISDIR: dst is a directory, but src is not.
+ * ENOENT: src doesn't exist, or src or dst is "".
+ * ENOTDIR: src is a directory, but dst is not.
+ * EXDEV: src and dst are on different filesystems.
+ *
+ * Side effects:
+ * The implementation of rename may allow cross-filesystem renames,
+ * but the caller should be prepared to emulate it with copy and
+ * delete if errno is EXDEV.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+efile_rename(Efile_error* errInfo, /* Where to return error codes. */
+ char* src, /* Original name. */
+ char* dst) /* New name. */
+{
+ if (rename(src, dst) == 0) {
+ return 1;
+ }
+ if (errno == ENOTEMPTY) {
+ errno = EEXIST;
+ }
+#if defined (sparc)
+ /*
+ * SunOS 4.1.4 reports overwriting a non-empty directory with a
+ * directory as EINVAL instead of EEXIST (first rule out the correct
+ * EINVAL result code for moving a directory into itself). Must be
+ * conditionally compiled because realpath() is only defined on SunOS.
+ */
+
+ if (errno == EINVAL) {
+ char srcPath[MAXPATHLEN], dstPath[MAXPATHLEN];
+ DIR *dirPtr;
+ struct dirent *dirEntPtr;
+
+#ifdef PURIFY
+ memset(srcPath, '\0', sizeof(srcPath));
+ memset(dstPath, '\0', sizeof(dstPath));
+#endif
+
+ if ((realpath(src, srcPath) != NULL)
+ && (realpath(dst, dstPath) != NULL)
+ && (strncmp(srcPath, dstPath, strlen(srcPath)) != 0)) {
+ dirPtr = opendir(dst);
+ if (dirPtr != NULL) {
+ while ((dirEntPtr = readdir(dirPtr)) != NULL) {
+ if ((strcmp(dirEntPtr->d_name, ".") != 0) &&
+ (strcmp(dirEntPtr->d_name, "..") != 0)) {
+ errno = EEXIST;
+ closedir(dirPtr);
+ return check_error(-1, errInfo);
+ }
+ }
+ closedir(dirPtr);
+ }
+ }
+ errno = EINVAL;
+ }
+#endif /* sparc */
+
+ if (strcmp(src, "/") == 0) {
+ /*
+ * Alpha reports renaming / as EBUSY and Linux reports it as EACCES,
+ * instead of EINVAL.
+ */
+
+ errno = EINVAL;
+ }
+
+ /*
+ * DEC Alpha OSF1 V3.0 returns EACCES when attempting to move a
+ * file across filesystems and the parent directory of that file is
+ * not writable. Most other systems return EXDEV. Does nothing to
+ * correct this behavior.
+ */
+
+ return check_error(-1, errInfo);
+}
+
+int
+efile_chdir(Efile_error* errInfo, /* Where to return error codes. */
+ char* name) /* Name of directory to make current. */
+{
+ return check_error(chdir(name), errInfo);
+}
+
+
+int
+efile_getdcwd(Efile_error* errInfo, /* Where to return error codes. */
+ int drive, /* 0 - current, 1 - A, 2 - B etc. */
+ char* buffer, /* Where to return the current
+ directory. */
+ size_t size) /* Size of buffer. */
+{
+ if (drive == 0) {
+ if (getcwd(buffer, size) == NULL)
+ return check_error(-1, errInfo);
+
+#ifdef SIMSPARCSOLARIS
+ /* We get "host:" prepended to the dirname - remove!. */
+ {
+ int i = 0;
+ int j = 0;
+ while ((buffer[i] != ':') && (buffer[i] != '\0')) i++;
+ if (buffer[i] == ':') {
+ i++;
+ while ((buffer[j++] = buffer[i++]) != '\0');
+ }
+ }
+#endif
+ return 1;
+ }
+
+ /*
+ * Drives other than 0 is not supported on Unix.
+ */
+
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+}
+
+int
+efile_readdir(Efile_error* errInfo, /* Where to return error codes. */
+ char* name, /* Name of directory to open. */
+ EFILE_DIR_HANDLE* p_dir_handle, /* Pointer to directory
+ handle of
+ open directory.*/
+ char* buffer, /* Pointer to buffer for
+ one filename. */
+ size_t *size) /* in-out Size of buffer, length
+ of name. */
+{
+ DIR *dp; /* Pointer to directory structure. */
+ struct dirent* dirp; /* Pointer to directory entry. */
+
+ /*
+ * If this is the first call, we must open the directory.
+ */
+
+ if (*p_dir_handle == NULL) {
+ dp = opendir(name);
+ if (dp == NULL)
+ return check_error(-1, errInfo);
+ *p_dir_handle = (EFILE_DIR_HANDLE) dp;
+ }
+
+ /*
+ * Retrieve the name of the next file using the directory handle.
+ */
+
+ dp = *((DIR **)((void *)p_dir_handle));
+ for (;;) {
+ dirp = readdir(dp);
+ if (dirp == NULL) {
+ closedir(dp);
+ return 0;
+ }
+ if (IS_DOT_OR_DOTDOT(dirp->d_name))
+ continue;
+ buffer[0] = '\0';
+ strncat(buffer, dirp->d_name, (*size)-1);
+ *size = strlen(dirp->d_name);
+ return 1;
+ }
+}
+
+int
+efile_openfile(Efile_error* errInfo, /* Where to return error codes. */
+ char* name, /* Name of directory to open. */
+ int flags, /* Flags to user for opening. */
+ int* pfd, /* Where to store the file
+ descriptor. */
+ Sint64 *pSize) /* Where to store the size of the
+ file. */
+{
+ struct stat statbuf;
+ int fd;
+ int mode; /* Open mode. */
+
+ if (stat(name, &statbuf) >= 0 && !ISREG(statbuf)) {
+ /*
+ * For UNIX only, here is some ugly code to allow
+ * /dev/null to be opened as a file.
+ *
+ * Assumption: The i-node number for /dev/null cannot be zero.
+ */
+ static ino_t dev_null_ino = 0;
+
+ if (dev_null_ino == 0) {
+ struct stat nullstatbuf;
+
+ if (stat("/dev/null", &nullstatbuf) >= 0) {
+ dev_null_ino = nullstatbuf.st_ino;
+ }
+ }
+ if (!(dev_null_ino && statbuf.st_ino == dev_null_ino)) {
+ errno = EISDIR;
+ return check_error(-1, errInfo);
+ }
+ }
+
+ switch (flags & (EFILE_MODE_READ|EFILE_MODE_WRITE)) {
+ case EFILE_MODE_READ:
+ mode = O_RDONLY;
+ break;
+ case EFILE_MODE_WRITE:
+ if (flags & EFILE_NO_TRUNCATE)
+ mode = O_WRONLY | O_CREAT;
+ else
+ mode = O_WRONLY | O_CREAT | O_TRUNC;
+ break;
+ case EFILE_MODE_READ_WRITE:
+ mode = O_RDWR | O_CREAT;
+ break;
+ default:
+ errno = EINVAL;
+ return check_error(-1, errInfo);
+ }
+
+
+ if (flags & EFILE_MODE_APPEND) {
+ mode &= ~O_TRUNC;
+ mode |= O_APPEND;
+ }
+
+ if (flags & EFILE_MODE_EXCL) {
+ mode |= O_EXCL;
+ }
+
+ fd = open(name, mode, FILE_MODE);
+
+ if (!check_error(fd, errInfo))
+ return 0;
+
+ *pfd = fd;
+ if (pSize) {
+ *pSize = statbuf.st_size;
+ }
+ return 1;
+}
+
+int
+efile_may_openfile(Efile_error* errInfo, char *name) {
+ struct stat statbuf; /* Information about the file */
+ int result;
+
+ result = stat(name, &statbuf);
+ if (!check_error(result, errInfo))
+ return 0;
+ if (!ISREG(statbuf)) {
+ errno = EISDIR;
+ return check_error(-1, errInfo);
+ }
+ return 1;
+}
+
+void
+efile_closefile(int fd)
+{
+ close(fd);
+}
+
+int
+efile_fdatasync(Efile_error *errInfo, /* Where to return error codes. */
+ int fd) /* File descriptor for file to sync data. */
+{
+#ifdef HAVE_FDATASYNC
+ return check_error(fdatasync(fd), errInfo);
+#else
+ return efile_fsync(errInfo, fd);
+#endif
+}
+
+int
+efile_fsync(Efile_error *errInfo, /* Where to return error codes. */
+ int fd) /* File descriptor for file to sync. */
+{
+#ifdef NO_FSYNC
+ undefined fsync /* XXX: Really? */
+#else
+#if defined(DARWIN) && defined(F_FULLFSYNC)
+ return check_error(fcntl(fd, F_FULLFSYNC), errInfo);
+#else
+ return check_error(fsync(fd), errInfo);
+#endif /* DARWIN */
+#endif /* NO_FSYNC */
+}
+
+int
+efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo,
+ char* name, int info_for_link)
+{
+ struct stat statbuf; /* Information about the file */
+ int result;
+
+ if (info_for_link) {
+#ifndef __OSE__
+ result = lstat(name, &statbuf);
+#else
+ result = stat(name, &statbuf);
+#endif
+ } else {
+ result = stat(name, &statbuf);
+ }
+ if (!check_error(result, errInfo)) {
+ return 0;
+ }
+
+#if SIZEOF_OFF_T == 4
+ pInfo->size_high = 0;
+#else
+ pInfo->size_high = (Uint32)(statbuf.st_size >> 32);
+#endif
+ pInfo->size_low = (Uint32)statbuf.st_size;
+
+#ifdef NO_ACCESS
+ /* Just look at read/write access for owner. */
+
+ pInfo->access = ((statbuf.st_mode >> 6) & 07) >> 1;
+
+#else
+ pInfo->access = FA_NONE;
+ if (access(name, R_OK) == 0)
+ pInfo->access |= FA_READ;
+ if (access(name, W_OK) == 0)
+ pInfo->access |= FA_WRITE;
+
+#endif
+
+ if (ISDEV(statbuf))
+ pInfo->type = FT_DEVICE;
+ else if (ISDIR(statbuf))
+ pInfo->type = FT_DIRECTORY;
+ else if (ISREG(statbuf))
+ pInfo->type = FT_REGULAR;
+ else if (ISLNK(statbuf))
+ pInfo->type = FT_SYMLINK;
+ else
+ pInfo->type = FT_OTHER;
+
+ pInfo->accessTime = statbuf.st_atime;
+ pInfo->modifyTime = statbuf.st_mtime;
+ pInfo->cTime = statbuf.st_ctime;
+
+ pInfo->mode = statbuf.st_mode;
+ pInfo->links = statbuf.st_nlink;
+ pInfo->major_device = statbuf.st_dev;
+#ifndef __OSE__
+ pInfo->minor_device = statbuf.st_rdev;
+#endif
+ pInfo->inode = statbuf.st_ino;
+ pInfo->uid = statbuf.st_uid;
+ pInfo->gid = statbuf.st_gid;
+
+ return 1;
+}
+
+int
+efile_write_info(Efile_error *errInfo, Efile_info *pInfo, char *name)
+{
+#ifndef __OSE__
+ struct utimbuf tval;
+#endif
+
+ /*
+ * On some systems chown will always fail for a non-root user unless
+ * POSIX_CHOWN_RESTRICTED is not set. Others will succeed as long as
+ * you don't try to chown a file to someone besides youself.
+ */
+#ifndef __OSE__
+ if (chown(name, pInfo->uid, pInfo->gid) && errno != EPERM) {
+ return check_error(-1, errInfo);
+ }
+#endif
+
+ if (pInfo->mode != -1) {
+ mode_t newMode = pInfo->mode & (S_ISUID | S_ISGID |
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ if (chmod(name, newMode)) {
+ newMode &= ~(S_ISUID | S_ISGID);
+ if (chmod(name, newMode)) {
+ return check_error(-1, errInfo);
+ }
+ }
+ }
+
+#ifndef __OSE__
+ tval.actime = pInfo->accessTime;
+ tval.modtime = pInfo->modifyTime;
+
+ return check_error(utime(name, &tval), errInfo);
+#else
+ return 1;
+#endif
+}
+
+
+int
+efile_write(Efile_error* errInfo, /* Where to return error codes. */
+ int flags, /* Flags given when file was
+ opened. */
+ int fd, /* File descriptor to write to. */
+ char* buf, /* Buffer to write. */
+ size_t count) /* Number of bytes to write. */
+{
+ ssize_t written; /* Bytes written in last operation. */
+
+ while (count > 0) {
+ if ((written = write(fd, buf, count)) < 0) {
+ if (errno != EINTR)
+ return check_error(-1, errInfo);
+ else
+ written = 0;
+ }
+ ASSERT(written <= count);
+ buf += written;
+ count -= written;
+ }
+ return 1;
+}
+
+int
+efile_writev(Efile_error* errInfo, /* Where to return error codes */
+ int flags, /* Flags given when file was
+ * opened */
+ int fd, /* File descriptor to write to */
+ SysIOVec* iov, /* Vector of buffer structs.
+ * The structs may be changed i.e.
+ * due to incomplete writes */
+ int iovcnt) /* Number of structs in vector */
+{
+ int cnt = 0; /* Buffers so far written */
+
+ ASSERT(iovcnt >= 0);
+
+ while (cnt < iovcnt) {
+ if ((! iov[cnt].iov_base) || (iov[cnt].iov_len <= 0)) {
+ /* Empty buffer - skip */
+ cnt++;
+ } else { /* Non-empty buffer */
+ ssize_t w; /* Bytes written in this call */
+#ifdef HAVE_WRITEV
+ int b = iovcnt - cnt; /* Buffers to write */
+ /* Use as many buffers as MAXIOV allows */
+ if (b > MAXIOV)
+ b = MAXIOV;
+ if (b > 1) {
+ do {
+ w = writev(fd, &iov[cnt], b);
+ } while (w < 0 && errno == EINTR);
+ } else
+ /* Degenerated io vector - use regular write */
+#endif
+ {
+ do {
+ w = write(fd, iov[cnt].iov_base, iov[cnt].iov_len);
+ } while (w < 0 && errno == EINTR);
+ ASSERT(w <= iov[cnt].iov_len || w == -1);
+ }
+ if (w < 0) return check_error(-1, errInfo);
+ /* Move forward to next buffer to write */
+ for (; cnt < iovcnt && w > 0; cnt++) {
+ if (iov[cnt].iov_base && iov[cnt].iov_len > 0) {
+ if (w < iov[cnt].iov_len) {
+ /* Adjust the buffer for next write */
+ iov[cnt].iov_len -= w;
+ iov[cnt].iov_base += w;
+ w = 0;
+ break;
+ } else {
+ w -= iov[cnt].iov_len;
+ }
+ }
+ }
+ ASSERT(w == 0);
+ } /* else Non-empty buffer */
+ } /* while (cnt< iovcnt) */
+ return 1;
+}
+
+int
+efile_read(Efile_error* errInfo, /* Where to return error codes. */
+ int flags, /* Flags given when file was opened. */
+ int fd, /* File descriptor to read from. */
+ char* buf, /* Buffer to read into. */
+ size_t count, /* Number of bytes to read. */
+ size_t *pBytesRead) /* Where to return number of
+ bytes read. */
+{
+ ssize_t n;
+
+ for (;;) {
+ if ((n = read(fd, buf, count)) >= 0)
+ break;
+ else if (errno != EINTR)
+ return check_error(-1, errInfo);
+ }
+ *pBytesRead = (size_t) n;
+ return 1;
+}
+
+
+/* pread() and pwrite() */
+/* Some unix systems, notably Solaris has these syscalls */
+/* It is especially nice for i.e. the dets module to have support */
+/* for this, even if the underlying OS dosn't support it, it is */
+/* reasonably easy to work around by first calling seek, and then */
+/* calling read(). */
+/* This later strategy however changes the file pointer, which pread() */
+/* does not do. We choose to ignore this and say that the location */
+/* of the file pointer is undefined after a call to any of the p functions*/
+
+
+int
+efile_pread(Efile_error* errInfo, /* Where to return error codes. */
+ int fd, /* File descriptor to read from. */
+ Sint64 offset, /* Offset in bytes from BOF. */
+ char* buf, /* Buffer to read into. */
+ size_t count, /* Number of bytes to read. */
+ size_t *pBytesRead) /* Where to return
+ number of bytes read. */
+{
+#if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
+ ssize_t n;
+ off_t off = (off_t) offset;
+ if (off != offset) {
+ errno = EINVAL;
+ return check_error(-1, errInfo);
+ }
+ for (;;) {
+ if ((n = pread(fd, buf, count, offset)) >= 0)
+ break;
+ else if (errno != EINTR)
+ return check_error(-1, errInfo);
+ }
+ *pBytesRead = (size_t) n;
+ return 1;
+#else
+ {
+ int res = efile_seek(errInfo, fd, offset, EFILE_SEEK_SET, NULL);
+ if (res) {
+ return efile_read(errInfo, 0, fd, buf, count, pBytesRead);
+ } else {
+ return res;
+ }
+ }
+#endif
+}
+
+
+
+int
+efile_pwrite(Efile_error* errInfo, /* Where to return error codes. */
+ int fd, /* File descriptor to write to. */
+ char* buf, /* Buffer to write. */
+ size_t count, /* Number of bytes to write. */
+ Sint64 offset) /* where to write it */
+{
+#if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
+ ssize_t written; /* Bytes written in last operation. */
+ off_t off = (off_t) offset;
+ if (off != offset) {
+ errno = EINVAL;
+ return check_error(-1, errInfo);
+ }
+
+ while (count > 0) {
+ if ((written = pwrite(fd, buf, count, offset)) < 0) {
+ if (errno != EINTR)
+ return check_error(-1, errInfo);
+ else
+ written = 0;
+ }
+ ASSERT(written <= count);
+ buf += written;
+ count -= written;
+ offset += written;
+ }
+ return 1;
+#else /* For unix systems that don't support pread() and pwrite() */
+ {
+ int res = efile_seek(errInfo, fd, offset, EFILE_SEEK_SET, NULL);
+
+ if (res) {
+ return efile_write(errInfo, 0, fd, buf, count);
+ } else {
+ return res;
+ }
+ }
+#endif
+}
+
+
+int
+efile_seek(Efile_error* errInfo, /* Where to return error codes. */
+ int fd, /* File descriptor to do the seek on. */
+ Sint64 offset, /* Offset in bytes from the given
+ origin. */
+ int origin, /* Origin of seek (SEEK_SET, SEEK_CUR,
+ SEEK_END). */
+ Sint64 *new_location) /* Resulting new location in file. */
+{
+ off_t off, result;
+
+ switch (origin) {
+ case EFILE_SEEK_SET: origin = SEEK_SET; break;
+ case EFILE_SEEK_CUR: origin = SEEK_CUR; break;
+ case EFILE_SEEK_END: origin = SEEK_END; break;
+ default:
+ errno = EINVAL;
+ return check_error(-1, errInfo);
+ }
+ off = (off_t) offset;
+ if (off != offset) {
+ errno = EINVAL;
+ return check_error(-1, errInfo);
+ }
+
+ errno = 0;
+ result = lseek(fd, off, origin);
+
+ /*
+ * Note that the man page for lseek (on SunOs 5) says:
+ *
+ * "if fildes is a remote file descriptor and offset is
+ * negative, lseek() returns the file pointer even if it is
+ * negative."
+ */
+
+ if (result < 0 && errno == 0)
+ errno = EINVAL;
+ if (result < 0)
+ return check_error(-1, errInfo);
+ if (new_location) {
+ *new_location = result;
+ }
+ return 1;
+}
+
+
+int
+efile_truncate_file(Efile_error* errInfo, int *fd, int flags)
+{
+#ifndef NO_FTRUNCATE
+ off_t offset;
+
+ return check_error((offset = lseek(*fd, 0, 1)) >= 0 &&
+ ftruncate(*fd, offset) == 0 ? 1 : -1,
+ errInfo);
+#else
+ return 1;
+#endif
+}
+
+int
+efile_readlink(Efile_error* errInfo, char* name, char* buffer, size_t size)
+{
+#ifndef __OSE__
+ int len;
+ ASSERT(size > 0);
+ len = readlink(name, buffer, size-1);
+ if (len == -1) {
+ return check_error(-1, errInfo);
+ }
+ buffer[len] = '\0';
+ return 1;
+#else
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+#endif
+}
+
+int
+efile_altname(Efile_error* errInfo, char* name, char* buffer, size_t size)
+{
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+}
+
+int
+efile_link(Efile_error* errInfo, char* old, char* new)
+{
+#ifndef __OSE__
+ return check_error(link(old, new), errInfo);
+#else
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+#endif
+}
+
+int
+efile_symlink(Efile_error* errInfo, char* old, char* new)
+{
+#ifndef __OSE__
+ return check_error(symlink(old, new), errInfo);
+#else
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+#endif
+}
+
+int
+efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset,
+ Sint64 length, int advise)
+{
+#ifdef HAVE_POSIX_FADVISE
+ return check_error(posix_fadvise(fd, offset, length, advise), errInfo);
+#else
+ return check_error(0, errInfo);
+#endif
+}
+
+#ifdef HAVE_SENDFILE
+/* For some reason the maximum size_t cannot be used as the max size
+ 3GB seems to work on all platforms */
+#define SENDFILE_CHUNK_SIZE ((1UL << 30) -1)
+
+/*
+ * sendfile: The implementation of the sendfile system call varies
+ * a lot on different *nix platforms so to make the api similar in all
+ * we have to emulate some things in linux and play with variables on
+ * bsd/darwin.
+ *
+ * All of the calls will split a command which tries to send more than
+ * SENDFILE_CHUNK_SIZE of data at once.
+ *
+ * On platforms where *nbytes of 0 does not mean the entire file, this is
+ * simulated.
+ *
+ * It could be possible to implement header/trailer in sendfile. Though
+ * you would have to emulate it in linux and on BSD/Darwin some complex
+ * calculations have to be made when using a non blocking socket to figure
+ * out how much of the header/file/trailer was sent in each command.
+ *
+ * The semantics of the API is this:
+ * Return value: 1 if all data was sent and the function does not need to
+ * be called again. 0 if an error occures OR if there is more data which
+ * has to be sent (EAGAIN or EINTR will be set appropriately)
+ *
+ * The amount of data written in a call is returned through nbytes.
+ *
+ */
+
+int
+efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd,
+ off_t *offset, Uint64 *nbytes, struct t_sendfile_hdtl* hdtl)
+{
+ Uint64 written = 0;
+#if defined(__linux__)
+ ssize_t retval;
+ do {
+ /* check if *nbytes is 0 or greater than chunk size */
+ if (*nbytes == 0 || *nbytes > SENDFILE_CHUNK_SIZE)
+ retval = sendfile(out_fd, in_fd, offset, SENDFILE_CHUNK_SIZE);
+ else
+ retval = sendfile(out_fd, in_fd, offset, *nbytes);
+ if (retval > 0) {
+ written += retval;
+ *nbytes -= retval;
+ }
+ } while (retval == SENDFILE_CHUNK_SIZE);
+ if (written != 0) {
+ /* -1 is not returned by the linux API so we have to simulate it */
+ retval = -1;
+ errno = EAGAIN;
+ }
+#elif defined(__sun) && defined(__SVR4) && defined(HAVE_SENDFILEV)
+ ssize_t retval;
+ size_t len;
+ sendfilevec_t fdrec;
+ fdrec.sfv_fd = in_fd;
+ fdrec.sfv_flag = 0;
+ do {
+ fdrec.sfv_off = *offset;
+ len = 0;
+ /* check if *nbytes is 0 or greater than chunk size */
+ if (*nbytes == 0 || *nbytes > SENDFILE_CHUNK_SIZE)
+ fdrec.sfv_len = SENDFILE_CHUNK_SIZE;
+ else
+ fdrec.sfv_len = *nbytes;
+ retval = sendfilev(out_fd, &fdrec, 1, &len);
+
+ /* Sometimes sendfilev can return -1 and still send data.
+ When that happens we just pretend that no error happend. */
+ if (retval != -1 || errno == EAGAIN || errno == EINTR ||
+ len != 0) {
+ *offset += len;
+ *nbytes -= len;
+ written += len;
+ if (errno != EAGAIN && errno != EINTR && len != 0)
+ retval = len;
+ }
+ } while (len == SENDFILE_CHUNK_SIZE);
+#elif defined(DARWIN)
+ int retval;
+ off_t len;
+ do {
+ /* check if *nbytes is 0 or greater than chunk size */
+ if(*nbytes > SENDFILE_CHUNK_SIZE)
+ len = SENDFILE_CHUNK_SIZE;
+ else
+ len = *nbytes;
+ retval = sendfile(in_fd, out_fd, *offset, &len, NULL, 0);
+ if (retval != -1 || errno == EAGAIN || errno == EINTR) {
+ *offset += len;
+ *nbytes -= len;
+ written += len;
+ }
+ } while (len == SENDFILE_CHUNK_SIZE);
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+ off_t len;
+ int retval;
+ do {
+ if (*nbytes > SENDFILE_CHUNK_SIZE)
+ retval = sendfile(in_fd, out_fd, *offset, SENDFILE_CHUNK_SIZE,
+ NULL, &len, 0);
+ else
+ retval = sendfile(in_fd, out_fd, *offset, *nbytes, NULL, &len, 0);
+ if (retval != -1 || errno == EAGAIN || errno == EINTR) {
+ *offset += len;
+ *nbytes -= len;
+ written += len;
+ }
+ } while(len == SENDFILE_CHUNK_SIZE);
+#endif
+ *nbytes = written;
+ return check_error(retval, errInfo);
+}
+#endif /* HAVE_SENDFILE */
+
+#ifdef HAVE_POSIX_FALLOCATE
+static int
+call_posix_fallocate(int fd, Sint64 offset, Sint64 length)
+{
+ int ret;
+
+ /*
+ * On Linux and Solaris for example, posix_fallocate() returns
+ * a positive error number on error and it does not set errno.
+ * On FreeBSD however (9.0 at least), it returns -1 on error
+ * and it sets errno.
+ */
+ do {
+ ret = posix_fallocate(fd, (off_t) offset, (off_t) length);
+ if (ret > 0) {
+ errno = ret;
+ ret = -1;
+ }
+ } while (ret != 0 && errno == EINTR);
+
+ return ret;
+}
+#endif /* HAVE_POSIX_FALLOCATE */
+
+int
+efile_fallocate(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length)
+{
+#if defined HAVE_FALLOCATE
+ /* Linux specific, more efficient than posix_fallocate. */
+ int ret;
+
+ do {
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, (off_t) offset, (off_t) length);
+ } while (ret != 0 && errno == EINTR);
+
+#if defined HAVE_POSIX_FALLOCATE
+ /* Fallback to posix_fallocate if available. */
+ if (ret != 0) {
+ ret = call_posix_fallocate(fd, offset, length);
+ }
+#endif
+
+ return check_error(ret, errInfo);
+#elif defined F_PREALLOCATE
+ /* Mac OS X specific, equivalent to posix_fallocate. */
+ int ret;
+ fstore_t fs;
+
+ memset(&fs, 0, sizeof(fs));
+ fs.fst_flags = F_ALLOCATECONTIG;
+ fs.fst_posmode = F_VOLPOSMODE;
+ fs.fst_offset = (off_t) offset;
+ fs.fst_length = (off_t) length;
+
+ ret = fcntl(fd, F_PREALLOCATE, &fs);
+
+ if (-1 == ret) {
+ fs.fst_flags = F_ALLOCATEALL;
+ ret = fcntl(fd, F_PREALLOCATE, &fs);
+
+#if defined HAVE_POSIX_FALLOCATE
+ /* Fallback to posix_fallocate if available. */
+ if (-1 == ret) {
+ ret = call_posix_fallocate(fd, offset, length);
+ }
+#endif
+ }
+
+ return check_error(ret, errInfo);
+#elif defined HAVE_POSIX_FALLOCATE
+ /* Other Unixes, use posix_fallocate if available. */
+ return check_error(call_posix_fallocate(fd, offset, length), errInfo);
+#else
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+#endif
+}