From 6089f3c961a981b6bacb6c1590386bb67905ff23 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Wed, 26 Aug 2015 14:43:20 +0200 Subject: erts: Move sys drivers to another file --- erts/emulator/sys/unix/erl_unix_sys.h | 16 + erts/emulator/sys/unix/sys.c | 1914 +------------------------------ erts/emulator/sys/unix/sys_drivers.c | 1983 +++++++++++++++++++++++++++++++++ 3 files changed, 2003 insertions(+), 1910 deletions(-) create mode 100644 erts/emulator/sys/unix/sys_drivers.c (limited to 'erts/emulator/sys/unix') diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h index 8d4e98bf3a..a11a44c259 100644 --- a/erts/emulator/sys/unix/erl_unix_sys.h +++ b/erts/emulator/sys/unix/erl_unix_sys.h @@ -470,4 +470,20 @@ extern jmp_buf erts_sys_sigsegv_jmp; } while(0) #endif +#ifdef USE_THREADS +# ifdef ENABLE_CHILD_WAITER_THREAD +# define CHLDWTHR ENABLE_CHILD_WAITER_THREAD +# else +# define CHLDWTHR 0 +# endif +# define FDBLOCK 1 +#else +# define CHLDWTHR 0 +# define FDBLOCK 0 +#endif + +#define ERTS_SYS_SUSPEND_SIGNAL SIGUSR2 + +int check_children(void); + #endif /* #ifndef _ERL_UNIX_SYS_H */ diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index 1cb7f72a2b..503ef5c2f6 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -67,7 +67,7 @@ #include "erl_mseg.h" extern char **environ; -static erts_smp_rwmtx_t environ_rwmtx; +erts_smp_rwmtx_t environ_rwmtx; #define MAX_VSIZE 16 /* Max number of entries allowed in an I/O * vector sock_sendv(). @@ -76,91 +76,12 @@ static erts_smp_rwmtx_t environ_rwmtx; * Don't need global.h, but bif_table.h (included by bif.h), * won't compile otherwise */ -#include "global.h" +#include "global.h" #include "bif.h" -#include "erl_sys_driver.h" #include "erl_check_io.h" #include "erl_cpu_topology.h" -#ifndef DISABLE_VFORK -#define DISABLE_VFORK 0 -#endif - -#if defined IOV_MAX -#define MAXIOV IOV_MAX -#elif defined UIO_MAXIOV -#define MAXIOV UIO_MAXIOV -#else -#define MAXIOV 16 -#endif - -#ifdef USE_THREADS -# ifdef ENABLE_CHILD_WAITER_THREAD -# define CHLDWTHR ENABLE_CHILD_WAITER_THREAD -# else -# define CHLDWTHR 0 -# endif -# define FDBLOCK 1 -#else -# define CHLDWTHR 0 -# define FDBLOCK 0 -#endif -/* - * [OTP-3906] - * Solaris signal management gets confused when threads are used and a - * lot of child processes dies. The confusion results in that SIGCHLD - * signals aren't delivered to the emulator which in turn results in - * a lot of defunct processes in the system. - * - * The problem seems to appear when a signal is frequently - * blocked/unblocked at the same time as the signal is frequently - * propagated. The child waiter thread is a workaround for this problem. - * The SIGCHLD signal is always blocked (in all threads), and the child - * waiter thread fetches the signal by a call to sigwait(). See - * child_waiter(). - */ - -typedef struct ErtsSysReportExit_ ErtsSysReportExit; -struct ErtsSysReportExit_ { - ErtsSysReportExit *next; - Eterm port; - int pid; - int ifd; - int ofd; -#if CHLDWTHR && !defined(ERTS_SMP) - int status; -#endif -}; - -/* Used by the fd driver iff the fd could not be set to non-blocking */ -typedef struct ErtsSysBlocking_ { - ErlDrvPDL pdl; - int res; - int err; - unsigned int pkey; -} ErtsSysBlocking; - - -/* This data is shared by these drivers - initialized by spawn_init() */ -typedef struct driver_data { - ErlDrvPort port_num; - int ofd, packet_bytes; - ErtsSysReportExit *report_exit; - int pid; - int alive; - int status; - int terminating; - ErtsSysBlocking *blocking; -} ErtsSysDriverData; - -static ErtsSysDriverData *driver_data; /* indexed by fd */ - -static ErtsSysReportExit *report_exit_list; -#if CHLDWTHR && !defined(ERTS_SMP) -static ErtsSysReportExit *report_exit_transit_list; -#endif - extern int driver_interrupt(int, int); extern void do_break(void); @@ -172,32 +93,6 @@ extern void erts_sys_init_float(void); extern void erl_crash_dump(char* file, int line, char* fmt, ...); -#define DIR_SEPARATOR_CHAR '/' - -#if defined(__ANDROID__) -#define SHELL "/system/bin/sh" -#else -#define SHELL "/bin/sh" -#endif /* __ANDROID__ */ - - -#if defined(DEBUG) -#define ERL_BUILD_TYPE_MARKER ".debug" -#elif defined(PURIFY) -#define ERL_BUILD_TYPE_MARKER ".purify" -#elif defined(QUANTIFY) -#define ERL_BUILD_TYPE_MARKER ".quantify" -#elif defined(PURECOV) -#define ERL_BUILD_TYPE_MARKER ".purecov" -#elif defined(VALGRIND) -#define ERL_BUILD_TYPE_MARKER ".valgrind" -#else /* opt */ -#define ERL_BUILD_TYPE_MARKER -#endif - -#define CHILD_SETUP_PROG_NAME "erl_child_setup" ERL_BUILD_TYPE_MARKER -static char *child_setup_prog; - #ifdef DEBUG static int debug_log = 0; #endif @@ -220,61 +115,18 @@ static volatile int have_prepared_crash_dump; (have_prepared_crash_dump++) #endif -static erts_smp_atomic_t sys_misc_mem_sz; +erts_smp_atomic_t sys_misc_mem_sz; #if defined(ERTS_SMP) static void smp_sig_notify(char c); static int sig_notify_fds[2] = {-1, -1}; static int sig_suspend_fds[2] = {-1, -1}; -#define ERTS_SYS_SUSPEND_SIGNAL SIGUSR2 #endif jmp_buf erts_sys_sigsegv_jmp; -#if CHLDWTHR || defined(ERTS_SMP) -erts_mtx_t chld_stat_mtx; -#endif -#if CHLDWTHR -static erts_tid_t child_waiter_tid; -/* chld_stat_mtx is used to protect against concurrent accesses - of the driver_data fields pid, alive, and status. */ -erts_cnd_t chld_stat_cnd; -static long children_alive; -#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) -#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) -#define CHLD_STAT_WAIT erts_cnd_wait(&chld_stat_cnd, &chld_stat_mtx) -#define CHLD_STAT_SIGNAL erts_cnd_signal(&chld_stat_cnd) -#elif defined(ERTS_SMP) /* ------------------------------------------------- */ -#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) -#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) - -#else /* ------------------------------------------------------------------- */ -#define CHLD_STAT_LOCK -#define CHLD_STAT_UNLOCK -static volatile int children_died; -#endif - - -typedef struct ErtsSysFdData { - char pbuf[4]; /* hold partial packet bytes */ - int psz; /* size of pbuf */ - char *buf; - char *cpos; - int sz; - int remain; /* for input on fd */ -} ErtsSysFdData; - -static ErtsSysFdData *fd_data; /* indexed by fd */ - -/* static FUNCTION(int, write_fill, (int, char*, int)); unused? */ -static void note_child_death(int, int); - -#if CHLDWTHR -static void* child_waiter(void *); -#endif - static int crashdump_companion_cube_fd = -1; /********************* General functions ****************************/ @@ -559,8 +411,6 @@ erts_sys_pre_init(void) erts_thr_init(&eid); - report_exit_list = NULL; - #ifdef ERTS_ENABLE_LOCK_COUNT erts_lcnt_init(); #endif @@ -571,17 +421,6 @@ erts_sys_pre_init(void) #ifdef USE_THREADS -#if CHLDWTHR || defined(ERTS_SMP) - erts_mtx_init(&chld_stat_mtx, "child_status"); -#endif -#if CHLDWTHR -#ifndef ERTS_SMP - report_exit_transit_list = NULL; -#endif - erts_cnd_init(&chld_stat_cnd); - children_alive = 0; -#endif - #ifdef ERTS_SMP erts_smp_atomic32_init_nob(&erts_break_requested, 0); erts_smp_atomic32_init_nob(&erts_got_sigusr1, 0); @@ -591,9 +430,6 @@ erts_sys_pre_init(void) erts_got_sigusr1 = 0; have_prepared_crash_dump = 0; #endif -#if !CHLDWTHR && !defined(ERTS_SMP) - children_died = 0; -#endif #endif /* USE_THREADS */ @@ -630,39 +466,6 @@ erts_sys_pre_init(void) void erl_sys_init(void) { -#if !DISABLE_VFORK - { - int res; - char bindir[MAXPATHLEN]; - size_t bindirsz = sizeof(bindir); - Uint csp_path_sz; - - res = erts_sys_getenv_raw("BINDIR", bindir, &bindirsz); - if (res != 0) { - if (res < 0) - erl_exit(-1, - "Environment variable BINDIR is not set\n"); - if (res > 0) - erl_exit(-1, - "Value of environment variable BINDIR is too large\n"); - } - if (bindir[0] != DIR_SEPARATOR_CHAR) - erl_exit(-1, - "Environment variable BINDIR does not contain an" - " absolute path\n"); - csp_path_sz = (strlen(bindir) - + 1 /* DIR_SEPARATOR_CHAR */ - + sizeof(CHILD_SETUP_PROG_NAME) - + 1); - child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, csp_path_sz); - erts_snprintf(child_setup_prog, csp_path_sz, - "%s%c%s", - bindir, - DIR_SEPARATOR_CHAR, - CHILD_SETUP_PROG_NAME); - } -#endif #ifdef USE_SETLINEBUF setlinebuf(stdout); @@ -980,43 +783,6 @@ int sys_max_files(void) return(max_files); } -static void block_signals(void) -{ -#if !CHLDWTHR - sys_sigblock(SIGCHLD); -#endif -#ifndef ERTS_SMP - sys_sigblock(SIGINT); -#ifndef ETHR_UNUSABLE_SIGUSRX - sys_sigblock(SIGUSR1); -#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ -#endif /* #ifndef ERTS_SMP */ - -#if defined(ERTS_SMP) && !defined(ETHR_UNUSABLE_SIGUSRX) - sys_sigblock(ERTS_SYS_SUSPEND_SIGNAL); -#endif - -} - -static void unblock_signals(void) -{ - /* Update erl_child_setup.c if changed */ -#if !CHLDWTHR - sys_sigrelease(SIGCHLD); -#endif -#ifndef ERTS_SMP - sys_sigrelease(SIGINT); -#ifndef ETHR_UNUSABLE_SIGUSRX - sys_sigrelease(SIGUSR1); -#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ -#endif /* #ifndef ERTS_SMP */ - -#if defined(ERTS_SMP) && !defined(ETHR_UNUSABLE_SIGUSRX) - sys_sigrelease(ERTS_SYS_SUSPEND_SIGNAL); -#endif - -} - /************************** OS info *******************************/ /* Used by erlang:info/1. */ @@ -1104,1502 +870,6 @@ void fini_getenv_state(GETENV_STATE *state) erts_smp_rwmtx_runlock(&environ_rwmtx); } - -/************************** Port I/O *******************************/ - - - -/* I. Common stuff */ - -/* - * Decreasing the size of it below 16384 is not allowed. - */ - -/* II. The spawn/fd/vanilla drivers */ - -#define ERTS_SYS_READ_BUF_SZ (64*1024) - -/* Driver interfaces */ -static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); -static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); -#if FDBLOCK -static void fd_async(void *); -static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data); -#endif -static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, - char **, ErlDrvSizeT); -static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*); -static int spawn_init(void); -static void fd_stop(ErlDrvData); -static void fd_flush(ErlDrvData); -static void stop(ErlDrvData); -static void ready_input(ErlDrvData, ErlDrvEvent); -static void ready_output(ErlDrvData, ErlDrvEvent); -static void output(ErlDrvData, char*, ErlDrvSizeT); -static void outputv(ErlDrvData, ErlIOVec*); -static void stop_select(ErlDrvEvent, void*); - -struct erl_drv_entry spawn_driver_entry = { - spawn_init, - spawn_start, - stop, - output, - ready_input, - ready_output, - "spawn", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - ERL_DRV_FLAG_USE_PORT_LOCKING, - NULL, NULL, - stop_select -}; -struct erl_drv_entry fd_driver_entry = { - NULL, - fd_start, - fd_stop, - output, - ready_input, - ready_output, - "fd", - NULL, - NULL, - fd_control, - NULL, - outputv, -#if FDBLOCK - fd_ready_async, /* ready_async */ -#else - NULL, -#endif - fd_flush, /* flush */ - NULL, /* call */ - NULL, /* event */ - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - 0, /* ERL_DRV_FLAGs */ - NULL, /* handle2 */ - NULL, /* process_exit */ - stop_select -}; -struct erl_drv_entry vanilla_driver_entry = { - NULL, - vanilla_start, - stop, - output, - ready_input, - ready_output, - "vanilla", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, /* flush */ - NULL, /* call */ - NULL, /* event */ - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - 0, /* ERL_DRV_FLAGs */ - NULL, /* handle2 */ - NULL, /* process_exit */ - stop_select -}; - -/* Handle SIGCHLD signals. */ -#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL)) -static RETSIGTYPE onchld(void) -#else -static RETSIGTYPE onchld(int signum) -#endif -{ -#if CHLDWTHR - ASSERT(0); /* We should *never* catch a SIGCHLD signal */ -#elif defined(ERTS_SMP) - smp_sig_notify('C'); -#else - children_died = 1; - ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */ -#endif -} - -static int set_blocking_data(ErtsSysDriverData *dd) { - - dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking)); - - erts_smp_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking)); - - dd->blocking->pdl = driver_pdl_create(dd->port_num); - dd->blocking->res = 0; - dd->blocking->err = 0; - dd->blocking->pkey = driver_async_port_key(dd->port_num); - - return 1; -} - -static int set_driver_data(ErlDrvPort port_num, - int ifd, - int ofd, - int packet_bytes, - int read_write, - int exit_status, - int pid, - int is_blocking) -{ - Port *prt; - ErtsSysReportExit *report_exit; - - if (!exit_status) - report_exit = NULL; - else { - report_exit = erts_alloc(ERTS_ALC_T_PRT_REP_EXIT, - sizeof(ErtsSysReportExit)); - report_exit->next = report_exit_list; - report_exit->port = erts_drvport2id(port_num); - report_exit->pid = pid; - report_exit->ifd = read_write & DO_READ ? ifd : -1; - report_exit->ofd = read_write & DO_WRITE ? ofd : -1; -#if CHLDWTHR && !defined(ERTS_SMP) - report_exit->status = 0; -#endif - report_exit_list = report_exit; - } - - prt = erts_drvport2port(port_num); - if (prt != ERTS_INVALID_ERL_DRV_PORT) - prt->os_pid = pid; - - if (read_write & DO_READ) { - driver_data[ifd].packet_bytes = packet_bytes; - driver_data[ifd].port_num = port_num; - driver_data[ifd].report_exit = report_exit; - driver_data[ifd].pid = pid; - driver_data[ifd].alive = 1; - driver_data[ifd].status = 0; - driver_data[ifd].terminating = 0; - driver_data[ifd].blocking = NULL; - if (read_write & DO_WRITE) { - driver_data[ifd].ofd = ofd; - if (is_blocking && FDBLOCK) - if (!set_blocking_data(driver_data+ifd)) - return -1; - if (ifd != ofd) - driver_data[ofd] = driver_data[ifd]; /* structure copy */ - } else { /* DO_READ only */ - driver_data[ifd].ofd = -1; - } - (void) driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1); - return(ifd); - } else { /* DO_WRITE only */ - driver_data[ofd].packet_bytes = packet_bytes; - driver_data[ofd].port_num = port_num; - driver_data[ofd].report_exit = report_exit; - driver_data[ofd].ofd = ofd; - driver_data[ofd].pid = pid; - driver_data[ofd].alive = 1; - driver_data[ofd].status = 0; - driver_data[ofd].terminating = 0; - driver_data[ofd].blocking = NULL; - if (is_blocking && FDBLOCK) - if (!set_blocking_data(driver_data+ofd)) - return -1; - return(ofd); - } -} - -static int spawn_init() -{ - int i; -#if CHLDWTHR - erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER; - - thr_opts.detached = 0; - thr_opts.suggested_stack_size = 0; /* Smallest possible */ - thr_opts.name = "child_waiter"; -#endif - - sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */ - driver_data = (ErtsSysDriverData *) - erts_alloc(ERTS_ALC_T_DRV_TAB, max_files * sizeof(ErtsSysDriverData)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, - max_files * sizeof(ErtsSysDriverData)); - - for (i = 0; i < max_files; i++) - driver_data[i].pid = -1; - -#if CHLDWTHR - sys_sigblock(SIGCHLD); -#endif - - sys_signal(SIGCHLD, onchld); /* Reap children */ - -#if CHLDWTHR - erts_thr_create(&child_waiter_tid, child_waiter, NULL, &thr_opts); -#endif - - return 1; -} - -static void close_pipes(int ifd[2], int ofd[2], int read_write) -{ - if (read_write & DO_READ) { - (void) close(ifd[0]); - (void) close(ifd[1]); - } - if (read_write & DO_WRITE) { - (void) close(ofd[0]); - (void) close(ofd[1]); - } -} - -static void init_fd_data(int fd, ErlDrvPort port_num) -{ - fd_data[fd].buf = NULL; - fd_data[fd].cpos = NULL; - fd_data[fd].remain = 0; - fd_data[fd].sz = 0; - fd_data[fd].psz = 0; -} - -static char **build_unix_environment(char *block) -{ - int i; - int j; - int len; - char *cp; - char **cpp; - char** old_env; - - ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); - - cp = block; - len = 0; - while (*cp != '\0') { - cp += strlen(cp) + 1; - len++; - } - old_env = environ; - while (*old_env++ != NULL) { - len++; - } - - cpp = (char **) erts_alloc_fnf(ERTS_ALC_T_ENVIRONMENT, - sizeof(char *) * (len+1)); - if (cpp == NULL) { - return NULL; - } - - cp = block; - len = 0; - while (*cp != '\0') { - cpp[len] = cp; - cp += strlen(cp) + 1; - len++; - } - - i = len; - for (old_env = environ; *old_env; old_env++) { - char* old = *old_env; - - for (j = 0; j < len; j++) { - char *s, *t; - - s = cpp[j]; - t = old; - while (*s == *t && *s != '=') { - s++, t++; - } - if (*s == '=' && *t == '=') { - break; - } - } - - if (j == len) { /* New version not found */ - cpp[len++] = old; - } - } - - for (j = 0; j < i; ) { - size_t last = strlen(cpp[j])-1; - if (cpp[j][last] == '=' && strchr(cpp[j], '=') == cpp[j]+last) { - cpp[j] = cpp[--len]; - if (len < i) { - i--; - } else { - j++; - } - } - else { - j++; - } - } - - cpp[len] = NULL; - return cpp; -} - -/* - [arndt] In most Unix systems, including Solaris 2.5, 'fork' allocates memory - in swap space for the child of a 'fork', whereas 'vfork' does not do this. - The natural call to use here is therefore 'vfork'. Due to a bug in - 'vfork' in Solaris 2.5 (apparently fixed in 2.6), using 'vfork' - can be dangerous in what seems to be these circumstances: - If the child code under a vfork sets the signal action to SIG_DFL - (or SIG_IGN) - for any signal which was previously set to a signal handler, the - state of the parent is clobbered, so that the later arrival of - such a signal yields a sigsegv in the parent. If the signal was - not set to a signal handler, but ignored, all seems to work. - If you change the forking code below, beware of this. - */ - -static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) -{ -#define CMD_LINE_PREFIX_STR "exec " -#define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1) - - int ifd[2], ofd[2], len, pid, i; - char **volatile new_environ; /* volatile since a vfork() then cannot - cause 'new_environ' to be clobbered - in the parent process. */ - int saved_errno; - long res; - char *cmd_line; -#ifndef QNX - int unbind; -#endif -#if !DISABLE_VFORK - int no_vfork; - size_t no_vfork_sz = sizeof(no_vfork); - - no_vfork = (erts_sys_getenv_raw("ERL_NO_VFORK", - (char *) &no_vfork, - &no_vfork_sz) >= 0); -#endif - - switch (opts->read_write) { - case DO_READ: - if (pipe(ifd) < 0) - return ERL_DRV_ERROR_ERRNO; - if (ifd[0] >= max_files) { - close_pipes(ifd, ofd, opts->read_write); - errno = EMFILE; - return ERL_DRV_ERROR_ERRNO; - } - ofd[1] = -1; /* keep purify happy */ - break; - case DO_WRITE: - if (pipe(ofd) < 0) return ERL_DRV_ERROR_ERRNO; - if (ofd[1] >= max_files) { - close_pipes(ifd, ofd, opts->read_write); - errno = EMFILE; - return ERL_DRV_ERROR_ERRNO; - } - ifd[0] = -1; /* keep purify happy */ - break; - case DO_READ|DO_WRITE: - if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO; - errno = EMFILE; /* default for next two conditions */ - if (ifd[0] >= max_files || pipe(ofd) < 0) { - close_pipes(ifd, ofd, DO_READ); - return ERL_DRV_ERROR_ERRNO; - } - if (ofd[1] >= max_files) { - close_pipes(ifd, ofd, opts->read_write); - errno = EMFILE; - return ERL_DRV_ERROR_ERRNO; - } - break; - default: - ASSERT(0); - return ERL_DRV_ERROR_GENERAL; - } - - if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { - /* started with spawn_executable, not with spawn */ - len = strlen(name); - cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1); - if (!cmd_line) { - close_pipes(ifd, ofd, opts->read_write); - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } - memcpy((void *) cmd_line,(void *) name, len); - cmd_line[len] = '\0'; - if (access(cmd_line,X_OK) != 0) { - int save_errno = errno; - erts_free(ERTS_ALC_T_TMP, cmd_line); - errno = save_errno; - return ERL_DRV_ERROR_ERRNO; - } - } else { - /* make the string suitable for giving to "sh" */ - len = strlen(name); - cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, - CMD_LINE_PREFIX_STR_SZ + len + 1); - if (!cmd_line) { - close_pipes(ifd, ofd, opts->read_write); - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } - memcpy((void *) cmd_line, - (void *) CMD_LINE_PREFIX_STR, - CMD_LINE_PREFIX_STR_SZ); - memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len); - cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0'; - } - - erts_smp_rwmtx_rlock(&environ_rwmtx); - - if (opts->envir == NULL) { - new_environ = environ; - } else if ((new_environ = build_unix_environment(opts->envir)) == NULL) { - erts_smp_rwmtx_runlock(&environ_rwmtx); - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } - -#ifndef QNX - /* Block child from SIGINT and SIGUSR1. Must be before fork() - to be safe. */ - block_signals(); - - CHLD_STAT_LOCK; - - unbind = erts_sched_bind_atfork_prepare(); - -#if !DISABLE_VFORK - /* See fork/vfork discussion before this function. */ - if (no_vfork) { -#endif - - DEBUGF(("Using fork\n")); - pid = fork(); - - if (pid == 0) { - /* The child! Setup child... */ - - if (erts_sched_bind_atfork_child(unbind) != 0) - goto child_error; - - /* OBSERVE! - * Keep child setup after vfork() (implemented below and in - * erl_child_setup.c) up to date if changes are made here. - */ - - if (opts->use_stdio) { - if (opts->read_write & DO_READ) { - /* stdout for process */ - if (dup2(ifd[1], 1) < 0) - goto child_error; - if(opts->redir_stderr) - /* stderr for process */ - if (dup2(ifd[1], 2) < 0) - goto child_error; - } - if (opts->read_write & DO_WRITE) - /* stdin for process */ - if (dup2(ofd[0], 0) < 0) - goto child_error; - } - else { /* XXX will fail if ofd[0] == 4 (unlikely..) */ - if (opts->read_write & DO_READ) - if (dup2(ifd[1], 4) < 0) - goto child_error; - if (opts->read_write & DO_WRITE) - if (dup2(ofd[0], 3) < 0) - goto child_error; - } - -#if defined(HAVE_CLOSEFROM) - closefrom(opts->use_stdio ? 3 : 5); -#else - for (i = opts->use_stdio ? 3 : 5; i < max_files; i++) - (void) close(i); -#endif - - if (opts->wd && chdir(opts->wd) < 0) - goto child_error; - -#if defined(USE_SETPGRP_NOARGS) /* SysV */ - (void) setpgrp(); -#elif defined(USE_SETPGRP) /* BSD */ - (void) setpgrp(0, getpid()); -#else /* POSIX */ - (void) setsid(); -#endif - - unblock_signals(); - - if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { - if (opts->argv == NULL) { - execle(cmd_line,cmd_line,(char *) NULL, new_environ); - } else { - if (opts->argv[0] == erts_default_arg0) { - opts->argv[0] = cmd_line; - } - execve(cmd_line, opts->argv, new_environ); - if (opts->argv[0] == cmd_line) { - opts->argv[0] = erts_default_arg0; - } - } - } else { - execle(SHELL, "sh", "-c", cmd_line, (char *) NULL, new_environ); - } - child_error: - _exit(1); - } -#if !DISABLE_VFORK - } -#define ENOUGH_BYTES (44) - else { /* Use vfork() */ - char **cs_argv= erts_alloc(ERTS_ALC_T_TMP,(CS_ARGV_NO_OF_ARGS + 1)* - sizeof(char *)); - char fd_close_range[ENOUGH_BYTES]; /* 44 bytes are enough to */ - char dup2_op[CS_ARGV_NO_OF_DUP2_OPS][ENOUGH_BYTES]; /* hold any "%d:%d" string */ - /* on a 64-bit machine. */ - - /* Setup argv[] for the child setup program (implemented in - erl_child_setup.c) */ - i = 0; - if (opts->use_stdio) { - if (opts->read_write & DO_READ){ - /* stdout for process */ - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 1); - if(opts->redir_stderr) - /* stderr for process */ - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 2); - } - if (opts->read_write & DO_WRITE) - /* stdin for process */ - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 0); - } else { /* XXX will fail if ofd[0] == 4 (unlikely..) */ - if (opts->read_write & DO_READ) - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 4); - if (opts->read_write & DO_WRITE) - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 3); - } - for (; i < CS_ARGV_NO_OF_DUP2_OPS; i++) - strcpy(&dup2_op[i][0], "-"); - erts_snprintf(fd_close_range, ENOUGH_BYTES, "%d:%d", opts->use_stdio ? 3 : 5, max_files-1); - - cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog; - cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : "."; - cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind); - cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range; - for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++) - cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0]; - - if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { - int num = 0; - int j = 0; - if (opts->argv != NULL) { - for(; opts->argv[num] != NULL; ++num) - ; - } - cs_argv = erts_realloc(ERTS_ALC_T_TMP,cs_argv, (CS_ARGV_NO_OF_ARGS + 1 + num + 1) * sizeof(char *)); - cs_argv[CS_ARGV_CMD_IX] = "-"; - cs_argv[CS_ARGV_NO_OF_ARGS] = cmd_line; - if (opts->argv != NULL) { - for (;opts->argv[j] != NULL; ++j) { - if (opts->argv[j] == erts_default_arg0) { - cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = cmd_line; - } else { - cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = opts->argv[j]; - } - } - } - cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = NULL; - } else { - cs_argv[CS_ARGV_CMD_IX] = cmd_line; /* Command */ - cs_argv[CS_ARGV_NO_OF_ARGS] = NULL; - } - DEBUGF(("Using vfork\n")); - pid = vfork(); - - if (pid == 0) { - /* The child! */ - - /* Observe! - * OTP-4389: The child setup program (implemented in - * erl_child_setup.c) will perform the necessary setup of the - * child before it execs to the user program. This because - * vfork() only allow an *immediate* execve() or _exit() in the - * child. - */ - execve(child_setup_prog, cs_argv, new_environ); - _exit(1); - } - erts_free(ERTS_ALC_T_TMP,cs_argv); - } -#undef ENOUGH_BYTES -#endif - - erts_sched_bind_atfork_parent(unbind); - - if (pid == -1) { - saved_errno = errno; - CHLD_STAT_UNLOCK; - erts_smp_rwmtx_runlock(&environ_rwmtx); - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - unblock_signals(); - close_pipes(ifd, ofd, opts->read_write); - errno = saved_errno; - return ERL_DRV_ERROR_ERRNO; - } -#else /* QNX */ - if (opts->use_stdio) { - if (opts->read_write & DO_READ) - qnx_spawn_options.iov[1] = ifd[1]; /* stdout for process */ - if (opts->read_write & DO_WRITE) - qnx_spawn_options.iov[0] = ofd[0]; /* stdin for process */ - } - else { - if (opts->read_write & DO_READ) - qnx_spawn_options.iov[4] = ifd[1]; - if (opts->read_write & DO_WRITE) - qnx_spawn_options.iov[3] = ofd[0]; - } - /* Close fds on exec */ - for (i = 3; i < max_files; i++) - fcntl(i, F_SETFD, 1); - - qnx_spawn_options.flags = _SPAWN_SETSID; - if ((pid = spawnl(P_NOWAIT, SHELL, SHELL, "-c", cmd_line, - (char *) 0)) < 0) { - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - reset_qnx_spawn(); - erts_smp_rwmtx_runlock(&environ_rwmtx); - close_pipes(ifd, ofd, opts->read_write); - return ERL_DRV_ERROR_GENERAL; - } - reset_qnx_spawn(); -#endif /* QNX */ - - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - - if (new_environ != environ) - erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); - - if (opts->read_write & DO_READ) - (void) close(ifd[1]); - if (opts->read_write & DO_WRITE) - (void) close(ofd[0]); - - if (opts->read_write & DO_READ) { - SET_NONBLOCKING(ifd[0]); - init_fd_data(ifd[0], port_num); - } - if (opts->read_write & DO_WRITE) { - SET_NONBLOCKING(ofd[1]); - init_fd_data(ofd[1], port_num); - } - - res = set_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes, - opts->read_write, opts->exit_status, pid, 0); - /* Don't unblock SIGCHLD until now, since the call above must - first complete putting away the info about our new subprocess. */ - unblock_signals(); - -#if CHLDWTHR - ASSERT(children_alive >= 0); - - if (!(children_alive++)) - CHLD_STAT_SIGNAL; /* Wake up child waiter thread if no children - was alive before we fork()ed ... */ -#endif - /* Don't unlock chld_stat_mtx until now of the same reason as above */ - CHLD_STAT_UNLOCK; - - erts_smp_rwmtx_runlock(&environ_rwmtx); - - return (ErlDrvData)res; -#undef CMD_LINE_PREFIX_STR -#undef CMD_LINE_PREFIX_STR_SZ -} - -#ifdef QNX -static reset_qnx_spawn() -{ - int i; - - /* Reset qnx_spawn_options */ - qnx_spawn_options.flags = 0; - qnx_spawn_options.iov[0] = 0xff; - qnx_spawn_options.iov[1] = 0xff; - qnx_spawn_options.iov[2] = 0xff; - qnx_spawn_options.iov[3] = 0xff; -} -#endif - -#define FD_DEF_HEIGHT 24 -#define FD_DEF_WIDTH 80 -/* Control op */ -#define FD_CTRL_OP_GET_WINSIZE 100 - -static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height) -{ -#ifdef TIOCGWINSZ - struct winsize ws; - if (ioctl(fd,TIOCGWINSZ,&ws) == 0) { - *width = (Uint32) ws.ws_col; - *height = (Uint32) ws.ws_row; - return 0; - } -#endif - return -1; -} - -static ErlDrvSSizeT fd_control(ErlDrvData drv_data, - unsigned int command, - char *buf, ErlDrvSizeT len, - char **rbuf, ErlDrvSizeT rlen) -{ - int fd = (int)(long)drv_data; - char resbuff[2*sizeof(Uint32)]; - switch (command) { - case FD_CTRL_OP_GET_WINSIZE: - { - Uint32 w,h; - if (fd_get_window_size(fd,&w,&h)) - return 0; - memcpy(resbuff,&w,sizeof(Uint32)); - memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32)); - } - break; - default: - return 0; - } - if (rlen < 2*sizeof(Uint32)) { - *rbuf = driver_alloc(2*sizeof(Uint32)); - } - memcpy(*rbuf,resbuff,2*sizeof(Uint32)); - return 2*sizeof(Uint32); -} - -static ErlDrvData fd_start(ErlDrvPort port_num, char* name, - SysDriverOpts* opts) -{ - ErlDrvData res; - int non_blocking = 0; - - if (((opts->read_write & DO_READ) && opts->ifd >= max_files) || - ((opts->read_write & DO_WRITE) && opts->ofd >= max_files)) - return ERL_DRV_ERROR_GENERAL; - - /* - * Historical: - * - * "Note about nonblocking I/O. - * - * At least on Solaris, setting the write end of a TTY to nonblocking, - * will set the input end to nonblocking as well (and vice-versa). - * If erl is run in a pipeline like this: cat | erl - * the input end of the TTY will be the standard input of cat. - * And cat is not prepared to handle nonblocking I/O." - * - * Actually, the reason for this is not that the tty itself gets set - * in non-blocking mode, but that the "input end" (cat's stdin) and - * the "output end" (erlang's stdout) are typically the "same" file - * descriptor, dup()'ed from a single fd by one of this process' - * ancestors. - * - * The workaround for this problem used to be a rather bad kludge, - * interposing an extra process ("internal cat") between erlang's - * stdout and the original stdout, allowing erlang to set its stdout - * in non-blocking mode without affecting the stdin of the preceding - * process in the pipeline - and being a kludge, it caused all kinds - * of weird problems. - * - * So, this is the current logic: - * - * The only reason to set non-blocking mode on the output fd at all is - * if it's something that can cause a write() to block, of course, - * i.e. primarily if it points to a tty, socket, pipe, or fifo. - * - * If we don't set non-blocking mode when we "should" have, and output - * becomes blocked, the entire runtime system will be suspended - this - * is normally bad of course, and can happen fairly "easily" - e.g. user - * hits ^S on tty - but doesn't necessarily happen. - * - * If we do set non-blocking mode when we "shouldn't" have, the runtime - * system will end up seeing EOF on the input fd (due to the preceding - * process dying), which typically will cause the entire runtime system - * to terminate immediately (due to whatever erlang process is seeing - * the EOF taking it as a signal to halt the system). This is *very* bad. - * - * I.e. we should take a conservative approach, and only set non- - * blocking mode when we a) need to, and b) are reasonably certain - * that it won't be a problem. And as in the example above, the problem - * occurs when input fd and output fd point to different "things". - * - * However, determining that they are not just the same "type" of - * "thing", but actually the same instance of that type of thing, is - * unreasonably complex in many/most cases. - * - * Also, with pipes, sockets, and fifos it's far from obvious that the - * user *wants* non-blocking output: If you're running erlang inside - * some complex pipeline, you're probably not running a real-time system - * that must never stop, but rather *want* it to suspend if the output - * channel is "full". - * - * So, the bottom line: We will only set the output fd non-blocking if - * it points to a tty, and either a) the input fd also points to a tty, - * or b) we can make sure that setting the output fd non-blocking - * doesn't interfere with someone else's input, via a somewhat milder - * kludge than the above. - * - * Also keep in mind that while this code is almost exclusively run as - * a result of an erlang open_port({fd,0,1}, ...), that isn't the only - * case - it can be called with any old pre-existing file descriptors, - * the relations between which (if they're even two) we can only guess - * at - still, we try our best... - * - * Added note OTP 18: Some systems seem to use stdout/stderr to log data - * using unix pipes, so we cannot allow the system to block on a write. - * Therefore we use an async thread to write the data to fd's that could - * not be set to non-blocking. When no async threads are available we - * fall back on the old behaviour. - * - * Also the guarantee about what is delivered to the OS has changed. - * Pre 18 the fd driver did no flushing of data before terminating. - * Now it does. This is because we want to be able to guarantee that things - * such as escripts and friends really have outputted all data before - * terminating. This could potentially block the termination of the system - * for a very long time, but if the user wants to terminate fast she should - * use erlang:halt with flush=false. - */ - - if (opts->read_write & DO_READ) { - init_fd_data(opts->ifd, port_num); - } - if (opts->read_write & DO_WRITE) { - init_fd_data(opts->ofd, port_num); - - /* If we don't have a read end, all bets are off - no non-blocking. */ - if (opts->read_write & DO_READ) { - - if (isatty(opts->ofd)) { /* output fd is a tty:-) */ - - if (isatty(opts->ifd)) { /* input fd is also a tty */ - - /* To really do this "right", we should also check that - input and output fd point to the *same* tty - but - this seems like overkill; ttyname() isn't for free, - and this is a very common case - and it's hard to - imagine a scenario where setting non-blocking mode - here would cause problems - go ahead and do it. */ - - non_blocking = 1; - SET_NONBLOCKING(opts->ofd); - - } else { /* output fd is a tty, input fd isn't */ - - /* This is a "problem case", but also common (see the - example above) - i.e. it makes sense to try a bit - harder before giving up on non-blocking mode: Try to - re-open the tty that the output fd points to, and if - successful replace the original one with the "new" fd - obtained this way, and set *that* one in non-blocking - mode. (Yes, this is a kludge.) - - However, re-opening the tty may fail in a couple of - (unusual) cases: - - 1) The name of the tty (or an equivalent one, i.e. - same major/minor number) can't be found, because - it actually lives somewhere other than /dev (or - wherever ttyname() looks for it), and isn't - equivalent to any of those that do live in the - "standard" place - this should be *very* unusual. - - 2) Permissions on the tty don't allow us to open it - - it's perfectly possible to have an fd open to an - object whose permissions wouldn't allow us to open - it. This is not as unusual as it sounds, one case - is if the user has su'ed to someone else (not - root) - we have a read/write fd open to the tty - (because it has been inherited all the way down - here), but we have neither read nor write - permission for the tty. - - In these cases, we finally give up, and don't set the - output fd in non-blocking mode. */ - - char *tty; - int nfd; - - if ((tty = ttyname(opts->ofd)) != NULL && - (nfd = open(tty, O_WRONLY)) != -1) { - dup2(nfd, opts->ofd); - close(nfd); - non_blocking = 1; - SET_NONBLOCKING(opts->ofd); - } - } - } - } - } - CHLD_STAT_LOCK; - res = (ErlDrvData)(long)set_driver_data(port_num, opts->ifd, opts->ofd, - opts->packet_bytes, - opts->read_write, 0, -1, - !non_blocking); - CHLD_STAT_UNLOCK; - return res; -} - -static void clear_fd_data(int fd) -{ - if (fd_data[fd].sz > 0) { - erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fd_data[fd].buf); - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fd_data[fd].sz); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fd_data[fd].sz); - } - fd_data[fd].buf = NULL; - fd_data[fd].sz = 0; - fd_data[fd].remain = 0; - fd_data[fd].cpos = NULL; - fd_data[fd].psz = 0; -} - -static void nbio_stop_fd(ErlDrvPort prt, int fd) -{ - driver_select(prt,fd,DO_READ|DO_WRITE,0); - clear_fd_data(fd); - SET_BLOCKING(fd); -} - -static void fd_stop(ErlDrvData ev) /* Does not close the fds */ -{ - int ofd; - int fd = (int)(long)ev; - ErlDrvPort prt = driver_data[fd].port_num; - -#if FDBLOCK - if (driver_data[fd].blocking) { - erts_free(ERTS_ALC_T_SYS_BLOCKING,driver_data[fd].blocking); - driver_data[fd].blocking = NULL; - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*sizeof(ErtsSysBlocking)); - } -#endif - - nbio_stop_fd(prt, fd); - ofd = driver_data[fd].ofd; - if (ofd != fd && ofd != -1) - nbio_stop_fd(prt, ofd); -} - -static void fd_flush(ErlDrvData fd) -{ - if (!driver_data[(int)(long)fd].terminating) - driver_data[(int)(long)fd].terminating = 1; -} - -static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name, - SysDriverOpts* opts) -{ - int flags, fd; - ErlDrvData res; - - flags = (opts->read_write == DO_READ ? O_RDONLY : - opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC : - O_RDWR|O_CREAT); - if ((fd = open(name, flags, 0666)) < 0) - return ERL_DRV_ERROR_GENERAL; - if (fd >= max_files) { - close(fd); - return ERL_DRV_ERROR_GENERAL; - } - SET_NONBLOCKING(fd); - init_fd_data(fd, port_num); - - CHLD_STAT_LOCK; - res = (ErlDrvData)(long)set_driver_data(port_num, fd, fd, - opts->packet_bytes, - opts->read_write, 0, -1, 0); - CHLD_STAT_UNLOCK; - return res; -} - -/* Note that driver_data[fd].ifd == fd if the port was opened for reading, */ -/* otherwise (i.e. write only) driver_data[fd].ofd = fd. */ - -static void stop(ErlDrvData fd) -{ - ErlDrvPort prt; - int ofd; - - prt = driver_data[(int)(long)fd].port_num; - nbio_stop_fd(prt, (int)(long)fd); - - ofd = driver_data[(int)(long)fd].ofd; - if (ofd != (int)(long)fd && (int)(long)ofd != -1) - nbio_stop_fd(prt, ofd); - else - ofd = -1; - - CHLD_STAT_LOCK; - - /* Mark as unused. */ - driver_data[(int)(long)fd].pid = -1; - - CHLD_STAT_UNLOCK; - - /* SMP note: Close has to be last thing done (open file descriptors work - as locks on driver_data[] entries) */ - driver_select(prt, (int)(long)fd, ERL_DRV_USE, 0); /* close(fd); */ - if (ofd >= 0) { - driver_select(prt, (int)(long)ofd, ERL_DRV_USE, 0); /* close(ofd); */ - } -} - -/* used by fd_driver */ -static void outputv(ErlDrvData e, ErlIOVec* ev) -{ - int fd = (int)(long)e; - ErlDrvPort ix = driver_data[fd].port_num; - int pb = driver_data[fd].packet_bytes; - int ofd = driver_data[fd].ofd; - ssize_t n; - ErlDrvSizeT sz; - char lb[4]; - char* lbp; - ErlDrvSizeT len = ev->size; - - /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ - /* if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/ - if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) { - driver_failure_posix(ix, EINVAL); - return; /* -1; */ - } - /* Handles 0 <= pb <= 4 only */ - put_int32((Uint32) len, lb); - lbp = lb + (4-pb); - - ev->iov[0].iov_base = lbp; - ev->iov[0].iov_len = pb; - ev->size += pb; - - if (driver_data[fd].blocking && FDBLOCK) - driver_pdl_lock(driver_data[fd].blocking->pdl); - - if ((sz = driver_sizeq(ix)) > 0) { - driver_enqv(ix, ev, 0); - - if (driver_data[fd].blocking && FDBLOCK) - driver_pdl_unlock(driver_data[fd].blocking->pdl); - - if (sz + ev->size >= (1 << 13)) - set_busy_port(ix, 1); - } - else if (!driver_data[fd].blocking || !FDBLOCK) { - /* We try to write directly if the fd in non-blocking */ - int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize; - - n = writev(ofd, (const void *) (ev->iov), vsize); - if (n == ev->size) - return; /* 0;*/ - if (n < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { - driver_failure_posix(ix, errno); - return; /* -1;*/ - } - n = 0; - } - driver_enqv(ix, ev, n); /* n is the skip value */ - driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); - } -#if FDBLOCK - else { - if (ev->size != 0) { - driver_enqv(ix, ev, 0); - driver_pdl_unlock(driver_data[fd].blocking->pdl); - driver_async(ix, &driver_data[fd].blocking->pkey, - fd_async, driver_data+fd, NULL); - } else { - driver_pdl_unlock(driver_data[fd].blocking->pdl); - } - } -#endif - /* return 0;*/ -} - -/* Used by spawn_driver and vanilla driver */ -static void output(ErlDrvData e, char* buf, ErlDrvSizeT len) -{ - int fd = (int)(long)e; - ErlDrvPort ix = driver_data[fd].port_num; - int pb = driver_data[fd].packet_bytes; - int ofd = driver_data[fd].ofd; - ssize_t n; - ErlDrvSizeT sz; - char lb[4]; - char* lbp; - struct iovec iv[2]; - - /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ - if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) { - driver_failure_posix(ix, EINVAL); - return; /* -1; */ - } - put_int32(len, lb); - lbp = lb + (4-pb); - - if ((sz = driver_sizeq(ix)) > 0) { - driver_enq(ix, lbp, pb); - driver_enq(ix, buf, len); - if (sz + len + pb >= (1 << 13)) - set_busy_port(ix, 1); - } - else { - iv[0].iov_base = lbp; - iv[0].iov_len = pb; /* should work for pb=0 */ - iv[1].iov_base = buf; - iv[1].iov_len = len; - n = writev(ofd, iv, 2); - if (n == pb+len) - return; /* 0; */ - if (n < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { - driver_failure_posix(ix, errno); - return; /* -1; */ - } - n = 0; - } - if (n < pb) { - driver_enq(ix, lbp+n, pb-n); - driver_enq(ix, buf, len); - } - else { - n -= pb; - driver_enq(ix, buf+n, len-n); - } - driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); - } - return; /* 0; */ -} - -static int port_inp_failure(ErlDrvPort port_num, int ready_fd, int res) - /* Result: 0 (eof) or -1 (error) */ -{ - int err = errno; - - ASSERT(res <= 0); - (void) driver_select(port_num, ready_fd, ERL_DRV_READ|ERL_DRV_WRITE, 0); - clear_fd_data(ready_fd); - - if (driver_data[ready_fd].blocking && FDBLOCK) { - driver_pdl_lock(driver_data[ready_fd].blocking->pdl); - if (driver_sizeq(driver_data[ready_fd].port_num) > 0) { - driver_pdl_unlock(driver_data[ready_fd].blocking->pdl); - /* We have stuff in the output queue, so we just - set the state to terminating and wait for fd_async_ready - to terminate the port */ - if (res == 0) - driver_data[ready_fd].terminating = 2; - else - driver_data[ready_fd].terminating = -err; - return 0; - } - driver_pdl_unlock(driver_data[ready_fd].blocking->pdl); - } - - if (res == 0) { - if (driver_data[ready_fd].report_exit) { - CHLD_STAT_LOCK; - - if (driver_data[ready_fd].alive) { - /* - * We have eof and want to report exit status, but the process - * hasn't exited yet. When it does report_exit_status() will - * driver_select() this fd which will make sure that we get - * back here with driver_data[ready_fd].alive == 0 and - * driver_data[ready_fd].status set. - */ - CHLD_STAT_UNLOCK; - return 0; - } - else { - int status = driver_data[ready_fd].status; - CHLD_STAT_UNLOCK; - - /* We need not be prepared for stopped/continued processes. */ - if (WIFSIGNALED(status)) - status = 128 + WTERMSIG(status); - else - status = WEXITSTATUS(status); - - driver_report_exit(driver_data[ready_fd].port_num, status); - } - } - driver_failure_eof(port_num); - } else { - driver_failure_posix(port_num, err); - } - return 0; -} - -/* fd is the drv_data that is returned from the */ -/* initial start routine */ -/* ready_fd is the descriptor that is ready to read */ - -static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd) -{ - int fd = (int)(long)e; - ErlDrvPort port_num; - int packet_bytes; - int res; - Uint h; - - port_num = driver_data[fd].port_num; - packet_bytes = driver_data[fd].packet_bytes; - - - if (packet_bytes == 0) { - byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, - ERTS_SYS_READ_BUF_SZ); - res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); - if (res < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) - port_inp_failure(port_num, ready_fd, res); - } - else if (res == 0) - port_inp_failure(port_num, ready_fd, res); - else - driver_output(port_num, (char*) read_buf, res); - erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); - } - else if (fd_data[ready_fd].remain > 0) { /* We try to read the remainder */ - /* space is allocated in buf */ - res = read(ready_fd, fd_data[ready_fd].cpos, - fd_data[ready_fd].remain); - if (res < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) - port_inp_failure(port_num, ready_fd, res); - } - else if (res == 0) { - port_inp_failure(port_num, ready_fd, res); - } - else if (res == fd_data[ready_fd].remain) { /* we're done */ - driver_output(port_num, fd_data[ready_fd].buf, - fd_data[ready_fd].sz); - clear_fd_data(ready_fd); - } - else { /* if (res < fd_data[ready_fd].remain) */ - fd_data[ready_fd].cpos += res; - fd_data[ready_fd].remain -= res; - } - } - else if (fd_data[ready_fd].remain == 0) { /* clean fd */ - byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, - ERTS_SYS_READ_BUF_SZ); - /* We make one read attempt and see what happens */ - res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); - if (res < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) - port_inp_failure(port_num, ready_fd, res); - } - else if (res == 0) { /* eof */ - port_inp_failure(port_num, ready_fd, res); - } - else if (res < packet_bytes - fd_data[ready_fd].psz) { - memcpy(fd_data[ready_fd].pbuf+fd_data[ready_fd].psz, - read_buf, res); - fd_data[ready_fd].psz += res; - } - else { /* if (res >= packet_bytes) */ - unsigned char* cpos = read_buf; - int bytes_left = res; - - while (1) { - int psz = fd_data[ready_fd].psz; - char* pbp = fd_data[ready_fd].pbuf + psz; - - while(bytes_left && (psz < packet_bytes)) { - *pbp++ = *cpos++; - bytes_left--; - psz++; - } - - if (psz < packet_bytes) { - fd_data[ready_fd].psz = psz; - break; - } - fd_data[ready_fd].psz = 0; - - switch (packet_bytes) { - case 1: h = get_int8(fd_data[ready_fd].pbuf); break; - case 2: h = get_int16(fd_data[ready_fd].pbuf); break; - case 4: h = get_int32(fd_data[ready_fd].pbuf); break; - default: ASSERT(0); return; /* -1; */ - } - - if (h <= (bytes_left)) { - driver_output(port_num, (char*) cpos, h); - cpos += h; - bytes_left -= h; - continue; - } - else { /* The last message we got was split */ - char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h); - if (!buf) { - errno = ENOMEM; - port_inp_failure(port_num, ready_fd, -1); - } - else { - erts_smp_atomic_add_nob(&sys_misc_mem_sz, h); - sys_memcpy(buf, cpos, bytes_left); - fd_data[ready_fd].buf = buf; - fd_data[ready_fd].sz = h; - fd_data[ready_fd].remain = h - bytes_left; - fd_data[ready_fd].cpos = buf + bytes_left; - } - break; - } - } - } - erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); - } -} - - -/* fd is the drv_data that is returned from the */ -/* initial start routine */ -/* ready_fd is the descriptor that is ready to read */ - -static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd) -{ - int fd = (int)(long)e; - ErlDrvPort ix = driver_data[fd].port_num; - int n; - struct iovec* iv; - int vsize; - - - if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) { - driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); - if (driver_data[fd].terminating) - driver_failure_atom(driver_data[fd].port_num,"normal"); - return; /* 0; */ - } - vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; - if ((n = writev(ready_fd, iv, vsize)) > 0) { - if (driver_deq(ix, n) == 0) - set_busy_port(ix, 0); - } - else if (n < 0) { - if (errno == ERRNO_BLOCK || errno == EINTR) - return; /* 0; */ - else { - int res = errno; - driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); - driver_failure_posix(ix, res); - return; /* -1; */ - } - } - return; /* 0; */ -} - -static void stop_select(ErlDrvEvent fd, void* _) -{ - close((int)fd); -} - -#if FDBLOCK - -static void -fd_async(void *async_data) -{ - int res; - ErtsSysDriverData *dd = (ErtsSysDriverData*)async_data; - SysIOVec *iov0; - SysIOVec *iov; - int iovlen; - int err = 0; - /* much of this code is stolen from efile_drv:invoke_writev */ - driver_pdl_lock(dd->blocking->pdl); - iov0 = driver_peekq(dd->port_num, &iovlen); - iovlen = iovlen < MAXIOV ? iovlen : MAXIOV; - iov = erts_alloc_fnf(ERTS_ALC_T_SYS_WRITE_BUF, - sizeof(SysIOVec)*iovlen); - if (!iov) { - res = -1; - err = ENOMEM; - driver_pdl_unlock(dd->blocking->pdl); - } else { - memcpy(iov,iov0,iovlen*sizeof(SysIOVec)); - driver_pdl_unlock(dd->blocking->pdl); - - do { - res = writev(dd->ofd, iov, iovlen); - } while (res < 0 && errno == EINTR); - if (res < 0) - err = errno; - - erts_free(ERTS_ALC_T_SYS_WRITE_BUF, iov); - } - dd->blocking->res = res; - dd->blocking->err = err; -} - -void fd_ready_async(ErlDrvData drv_data, - ErlDrvThreadData thread_data) { - ErtsSysDriverData *dd = (ErtsSysDriverData *)thread_data; - ErlDrvPort port_num = dd->port_num; - - ASSERT(dd->blocking); - ASSERT(dd == (driver_data + (int)(long)drv_data)); - - if (dd->blocking->res > 0) { - driver_pdl_lock(dd->blocking->pdl); - if (driver_deq(port_num, dd->blocking->res) == 0) { - driver_pdl_unlock(dd->blocking->pdl); - set_busy_port(port_num, 0); - if (dd->terminating) { - /* The port is has been ordered to terminate - from either fd_flush or port_inp_failure */ - if (dd->terminating == 1) - driver_failure_atom(port_num, "normal"); - else if (dd->terminating == 2) - driver_failure_eof(port_num); - else if (dd->terminating < 0) - driver_failure_posix(port_num, -dd->terminating); - return; /* -1; */ - } - } else { - driver_pdl_unlock(dd->blocking->pdl); - /* still data left to write in queue */ - driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); - return /* 0; */; - } - } else if (dd->blocking->res < 0) { - if (dd->blocking->err == ERRNO_BLOCK) { - set_busy_port(port_num, 1); - /* still data left to write in queue */ - driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); - } else - driver_failure_posix(port_num, dd->blocking->err); - return; /* -1; */ - } - return; /* 0; */ -} - -#endif - void erts_do_break_handling(void) { struct termios temp_mode; @@ -2740,10 +1010,7 @@ erts_sys_unsetenv(char *key) void sys_init_io(void) { - fd_data = (ErtsSysFdData *) - erts_alloc(ERTS_ALC_T_FD_TAB, max_files * sizeof(ErtsSysFdData)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, - max_files * sizeof(ErtsSysFdData)); + } #if (0) /* unused? */ @@ -2937,178 +1204,6 @@ erl_debug(char* fmt, ...) #endif /* DEBUG */ -static ERTS_INLINE void -report_exit_status(ErtsSysReportExit *rep, int status) -{ - Port *pp; -#ifdef ERTS_SMP - CHLD_STAT_UNLOCK; - pp = erts_thr_id2port_sflgs(rep->port, - ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP); - CHLD_STAT_LOCK; -#else - pp = erts_id2port_sflgs(rep->port, - NULL, - 0, - ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP); -#endif - if (pp) { - if (rep->ifd >= 0) { - driver_data[rep->ifd].alive = 0; - driver_data[rep->ifd].status = status; - (void) driver_select(ERTS_Port2ErlDrvPort(pp), - rep->ifd, - (ERL_DRV_READ|ERL_DRV_USE), - 1); - } - if (rep->ofd >= 0) { - driver_data[rep->ofd].alive = 0; - driver_data[rep->ofd].status = status; - (void) driver_select(ERTS_Port2ErlDrvPort(pp), - rep->ofd, - (ERL_DRV_WRITE|ERL_DRV_USE), - 1); - } -#ifdef ERTS_SMP - erts_thr_port_release(pp); -#else - erts_port_release(pp); -#endif - } - erts_free(ERTS_ALC_T_PRT_REP_EXIT, rep); -} - -#if !CHLDWTHR /* ---------------------------------------------------------- */ - -#define ERTS_REPORT_EXIT_STATUS report_exit_status - -static int check_children(void) -{ - int res = 0; - int pid; - int status; - -#ifndef ERTS_SMP - if (children_died) -#endif - { - sys_sigblock(SIGCHLD); - CHLD_STAT_LOCK; - while ((pid = waitpid(-1, &status, WNOHANG)) > 0) - note_child_death(pid, status); -#ifndef ERTS_SMP - children_died = 0; -#endif - CHLD_STAT_UNLOCK; - sys_sigrelease(SIGCHLD); - res = 1; - } - return res; -} - -#ifdef ERTS_SMP - -void -erts_check_children(void) -{ - (void) check_children(); -} - -#endif - -#elif CHLDWTHR && defined(ERTS_SMP) /* ------------------------------------- */ - -#define ERTS_REPORT_EXIT_STATUS report_exit_status - -#define check_children() (0) - - -#else /* CHLDWTHR && !defined(ERTS_SMP) ------------------------------------ */ - -#define ERTS_REPORT_EXIT_STATUS initiate_report_exit_status - -static ERTS_INLINE void -initiate_report_exit_status(ErtsSysReportExit *rep, int status) -{ - rep->next = report_exit_transit_list; - rep->status = status; - report_exit_transit_list = rep; - erts_sys_schedule_interrupt(1); -} - -static int check_children(void) -{ - int res; - ErtsSysReportExit *rep; - CHLD_STAT_LOCK; - rep = report_exit_transit_list; - res = rep != NULL; - while (rep) { - ErtsSysReportExit *curr_rep = rep; - rep = rep->next; - report_exit_status(curr_rep, curr_rep->status); - } - report_exit_transit_list = NULL; - CHLD_STAT_UNLOCK; - return res; -} - -#endif /* ------------------------------------------------------------------ */ - -static void note_child_death(int pid, int status) -{ - ErtsSysReportExit **repp = &report_exit_list; - ErtsSysReportExit *rep = report_exit_list; - - while (rep) { - if (pid == rep->pid) { - *repp = rep->next; - ERTS_REPORT_EXIT_STATUS(rep, status); - break; - } - repp = &rep->next; - rep = rep->next; - } -} - -#if CHLDWTHR - -static void * -child_waiter(void *unused) -{ - int pid; - int status; - -#ifdef ERTS_ENABLE_LOCK_CHECK - erts_lc_set_thread_name("child waiter"); -#endif - - while(1) { -#ifdef DEBUG - int waitpid_errno; -#endif - pid = waitpid(-1, &status, 0); -#ifdef DEBUG - waitpid_errno = errno; -#endif - CHLD_STAT_LOCK; - if (pid < 0) { - ASSERT(waitpid_errno == ECHILD); - } - else { - children_alive--; - ASSERT(children_alive >= 0); - note_child_death(pid, status); - } - while (!children_alive) - CHLD_STAT_WAIT; /* Wait for children to wait on... :) */ - CHLD_STAT_UNLOCK; - } - - return NULL; -} - -#endif /* * Called from schedule() when it runs out of runnable processes, @@ -3127,7 +1222,6 @@ erl_sys_schedule(int runnable) (void) check_children(); } - #ifdef ERTS_SMP static erts_smp_tid_t sig_dispatcher_tid; diff --git a/erts/emulator/sys/unix/sys_drivers.c b/erts/emulator/sys/unix/sys_drivers.c new file mode 100644 index 0000000000..e3f089fc0f --- /dev/null +++ b/erts/emulator/sys/unix/sys_drivers.c @@ -0,0 +1,1983 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1996-2014. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef ISC32 +#define _POSIX_SOURCE +#define _XOPEN_SOURCE +#endif + +#include /* ! */ +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef ISC32 +#include +#endif + +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_SYS_IOCTL_H +#include +#endif + +#define NEED_CHILD_SETUP_DEFINES +#define WANT_NONBLOCKING /* must define this to pull in defs from sys.h */ +#include "sys.h" +#include "erl_thr_progress.h" + +#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) +#define __DARWIN__ 1 +#endif + +#ifdef USE_THREADS +#include "erl_threads.h" +#endif + +#include "erl_mseg.h" + +extern char **environ; +extern erts_smp_rwmtx_t environ_rwmtx; + +extern erts_smp_atomic_t sys_misc_mem_sz; + +#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O + * vector sock_sendv(). + */ +/* + * Don't need global.h, but erl_cpu_topology.h won't compile otherwise + */ +#include "global.h" + +#include "erl_sys_driver.h" +#include "erl_check_io.h" +#include "erl_cpu_topology.h" + +#ifndef DISABLE_VFORK +#define DISABLE_VFORK 0 +#endif + +#if defined IOV_MAX +#define MAXIOV IOV_MAX +#elif defined UIO_MAXIOV +#define MAXIOV UIO_MAXIOV +#else +#define MAXIOV 16 +#endif + +/* + * [OTP-3906] + * Solaris signal management gets confused when threads are used and a + * lot of child processes dies. The confusion results in that SIGCHLD + * signals aren't delivered to the emulator which in turn results in + * a lot of defunct processes in the system. + * + * The problem seems to appear when a signal is frequently + * blocked/unblocked at the same time as the signal is frequently + * propagated. The child waiter thread is a workaround for this problem. + * The SIGCHLD signal is always blocked (in all threads), and the child + * waiter thread fetches the signal by a call to sigwait(). See + * child_waiter(). + */ + +typedef struct ErtsSysReportExit_ ErtsSysReportExit; +struct ErtsSysReportExit_ { + ErtsSysReportExit *next; + Eterm port; + int pid; + int ifd; + int ofd; +#if CHLDWTHR && !defined(ERTS_SMP) + int status; +#endif +}; + +/* Used by the fd driver iff the fd could not be set to non-blocking */ +typedef struct ErtsSysBlocking_ { + ErlDrvPDL pdl; + int res; + int err; + unsigned int pkey; +} ErtsSysBlocking; + + +/* This data is shared by these drivers - initialized by spawn_init() */ +typedef struct driver_data { + ErlDrvPort port_num; + int ofd, packet_bytes; + ErtsSysReportExit *report_exit; + int pid; + int alive; + int status; + int terminating; + ErtsSysBlocking *blocking; +} ErtsSysDriverData; + +static ErtsSysDriverData *driver_data; /* indexed by fd */ + +static ErtsSysReportExit *report_exit_list; +#if CHLDWTHR && !defined(ERTS_SMP) +static ErtsSysReportExit *report_exit_transit_list; +#endif + +#define DIR_SEPARATOR_CHAR '/' + +#if defined(__ANDROID__) +#define SHELL "/system/bin/sh" +#else +#define SHELL "/bin/sh" +#endif /* __ANDROID__ */ + + +#if defined(DEBUG) +#define ERL_BUILD_TYPE_MARKER ".debug" +#elif defined(PURIFY) +#define ERL_BUILD_TYPE_MARKER ".purify" +#elif defined(QUANTIFY) +#define ERL_BUILD_TYPE_MARKER ".quantify" +#elif defined(PURECOV) +#define ERL_BUILD_TYPE_MARKER ".purecov" +#elif defined(VALGRIND) +#define ERL_BUILD_TYPE_MARKER ".valgrind" +#else /* opt */ +#define ERL_BUILD_TYPE_MARKER +#endif + +#define CHILD_SETUP_PROG_NAME "erl_child_setup" ERL_BUILD_TYPE_MARKER +static char *child_setup_prog; + +#if CHLDWTHR || defined(ERTS_SMP) +erts_mtx_t chld_stat_mtx; +#endif +#if CHLDWTHR +static erts_tid_t child_waiter_tid; +/* chld_stat_mtx is used to protect against concurrent accesses + of the driver_data fields pid, alive, and status. */ +erts_cnd_t chld_stat_cnd; +static long children_alive; +#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) +#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) +#define CHLD_STAT_WAIT erts_cnd_wait(&chld_stat_cnd, &chld_stat_mtx) +#define CHLD_STAT_SIGNAL erts_cnd_signal(&chld_stat_cnd) +#elif defined(ERTS_SMP) /* ------------------------------------------------- */ +#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) +#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) + +#else /* ------------------------------------------------------------------- */ +#define CHLD_STAT_LOCK +#define CHLD_STAT_UNLOCK +static volatile int children_died; +#endif + +typedef struct ErtsSysFdData { + char pbuf[4]; /* hold partial packet bytes */ + int psz; /* size of pbuf */ + char *buf; + char *cpos; + int sz; + int remain; /* for input on fd */ +} ErtsSysFdData; + +static ErtsSysFdData *fd_data; /* indexed by fd */ + +/* static FUNCTION(int, write_fill, (int, char*, int)); unused? */ +static void note_child_death(int, int); + +#if CHLDWTHR +static void* child_waiter(void *); +#endif + +static void block_signals(void) +{ +#if !CHLDWTHR + sys_sigblock(SIGCHLD); +#endif +#ifndef ERTS_SMP + sys_sigblock(SIGINT); +#ifndef ETHR_UNUSABLE_SIGUSRX + sys_sigblock(SIGUSR1); +#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ +#endif /* #ifndef ERTS_SMP */ + +#if defined(ERTS_SMP) && !defined(ETHR_UNUSABLE_SIGUSRX) + sys_sigblock(ERTS_SYS_SUSPEND_SIGNAL); +#endif + +} + +static void unblock_signals(void) +{ + /* Update erl_child_setup.c if changed */ +#if !CHLDWTHR + sys_sigrelease(SIGCHLD); +#endif +#ifndef ERTS_SMP + sys_sigrelease(SIGINT); +#ifndef ETHR_UNUSABLE_SIGUSRX + sys_sigrelease(SIGUSR1); +#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ +#endif /* #ifndef ERTS_SMP */ + +#if defined(ERTS_SMP) && !defined(ETHR_UNUSABLE_SIGUSRX) + sys_sigrelease(ERTS_SYS_SUSPEND_SIGNAL); +#endif + +} + +/************************** Port I/O *******************************/ + + + +/* I. Common stuff */ + +/* + * Decreasing the size of it below 16384 is not allowed. + */ + +/* II. The spawn/fd/vanilla drivers */ + +#define ERTS_SYS_READ_BUF_SZ (64*1024) + +/* Driver interfaces */ +static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); +static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); +#if FDBLOCK +static void fd_async(void *); +static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data); +#endif +static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, + char **, ErlDrvSizeT); +static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*); +static int spawn_init(void); +static void fd_stop(ErlDrvData); +static void fd_flush(ErlDrvData); +static void stop(ErlDrvData); +static void ready_input(ErlDrvData, ErlDrvEvent); +static void ready_output(ErlDrvData, ErlDrvEvent); +static void output(ErlDrvData, char*, ErlDrvSizeT); +static void outputv(ErlDrvData, ErlIOVec*); +static void stop_select(ErlDrvEvent, void*); + +struct erl_drv_entry spawn_driver_entry = { + spawn_init, + spawn_start, + stop, + output, + ready_input, + ready_output, + "spawn", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, NULL, + stop_select +}; +struct erl_drv_entry fd_driver_entry = { + NULL, + fd_start, + fd_stop, + output, + ready_input, + ready_output, + "fd", + NULL, + NULL, + fd_control, + NULL, + outputv, +#if FDBLOCK + fd_ready_async, /* ready_async */ +#else + NULL, +#endif + fd_flush, /* flush */ + NULL, /* call */ + NULL, /* event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + 0, /* ERL_DRV_FLAGs */ + NULL, /* handle2 */ + NULL, /* process_exit */ + stop_select +}; +struct erl_drv_entry vanilla_driver_entry = { + NULL, + vanilla_start, + stop, + output, + ready_input, + ready_output, + "vanilla", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* flush */ + NULL, /* call */ + NULL, /* event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + 0, /* ERL_DRV_FLAGs */ + NULL, /* handle2 */ + NULL, /* process_exit */ + stop_select +}; + +/* Handle SIGCHLD signals. */ +#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL)) +static RETSIGTYPE onchld(void) +#else +static RETSIGTYPE onchld(int signum) +#endif +{ +#if CHLDWTHR + ASSERT(0); /* We should *never* catch a SIGCHLD signal */ +#elif defined(ERTS_SMP) + smp_sig_notify('C'); +#else + children_died = 1; + ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */ +#endif +} + +static int set_blocking_data(ErtsSysDriverData *dd) { + + dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking)); + + erts_smp_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking)); + + dd->blocking->pdl = driver_pdl_create(dd->port_num); + dd->blocking->res = 0; + dd->blocking->err = 0; + dd->blocking->pkey = driver_async_port_key(dd->port_num); + + return 1; +} + +static int set_driver_data(ErlDrvPort port_num, + int ifd, + int ofd, + int packet_bytes, + int read_write, + int exit_status, + int pid, + int is_blocking) +{ + Port *prt; + ErtsSysReportExit *report_exit; + + if (!exit_status) + report_exit = NULL; + else { + report_exit = erts_alloc(ERTS_ALC_T_PRT_REP_EXIT, + sizeof(ErtsSysReportExit)); + report_exit->next = report_exit_list; + report_exit->port = erts_drvport2id(port_num); + report_exit->pid = pid; + report_exit->ifd = read_write & DO_READ ? ifd : -1; + report_exit->ofd = read_write & DO_WRITE ? ofd : -1; +#if CHLDWTHR && !defined(ERTS_SMP) + report_exit->status = 0; +#endif + report_exit_list = report_exit; + } + + prt = erts_drvport2port(port_num); + if (prt != ERTS_INVALID_ERL_DRV_PORT) + prt->os_pid = pid; + + if (read_write & DO_READ) { + driver_data[ifd].packet_bytes = packet_bytes; + driver_data[ifd].port_num = port_num; + driver_data[ifd].report_exit = report_exit; + driver_data[ifd].pid = pid; + driver_data[ifd].alive = 1; + driver_data[ifd].status = 0; + driver_data[ifd].terminating = 0; + driver_data[ifd].blocking = NULL; + if (read_write & DO_WRITE) { + driver_data[ifd].ofd = ofd; + if (is_blocking && FDBLOCK) + if (!set_blocking_data(driver_data+ifd)) + return -1; + if (ifd != ofd) + driver_data[ofd] = driver_data[ifd]; /* structure copy */ + } else { /* DO_READ only */ + driver_data[ifd].ofd = -1; + } + (void) driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1); + return(ifd); + } else { /* DO_WRITE only */ + driver_data[ofd].packet_bytes = packet_bytes; + driver_data[ofd].port_num = port_num; + driver_data[ofd].report_exit = report_exit; + driver_data[ofd].ofd = ofd; + driver_data[ofd].pid = pid; + driver_data[ofd].alive = 1; + driver_data[ofd].status = 0; + driver_data[ofd].terminating = 0; + driver_data[ofd].blocking = NULL; + if (is_blocking && FDBLOCK) + if (!set_blocking_data(driver_data+ofd)) + return -1; + return(ofd); + } +} + +static int spawn_init() +{ + int i; +#if CHLDWTHR + erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER; +#endif + int res; + char bindir[MAXPATHLEN]; + size_t bindirsz = sizeof(bindir); + Uint csp_path_sz; + +#if CHLDWTHR + thr_opts.detached = 0; + thr_opts.suggested_stack_size = 0; /* Smallest possible */ + thr_opts.name = "child_waiter"; +#endif + +#if !DISABLE_VFORK + res = erts_sys_getenv_raw("BINDIR", bindir, &bindirsz); + if (res != 0) { + if (res < 0) + erl_exit(-1, + "Environment variable BINDIR is not set\n"); + if (res > 0) + erl_exit(-1, + "Value of environment variable BINDIR is too large\n"); + } + if (bindir[0] != DIR_SEPARATOR_CHAR) + erl_exit(-1, + "Environment variable BINDIR does not contain an" + " absolute path\n"); + csp_path_sz = (strlen(bindir) + + 1 /* DIR_SEPARATOR_CHAR */ + + sizeof(CHILD_SETUP_PROG_NAME) + + 1); + child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, csp_path_sz); + erts_snprintf(child_setup_prog, csp_path_sz, + "%s%c%s", + bindir, + DIR_SEPARATOR_CHAR, + CHILD_SETUP_PROG_NAME); +#endif + + report_exit_list = NULL; + +#ifdef USE_THREADS + +#if CHLDWTHR || defined(ERTS_SMP) + erts_mtx_init(&chld_stat_mtx, "child_status"); +#endif +#if CHLDWTHR +#ifndef ERTS_SMP + report_exit_transit_list = NULL; +#endif + erts_cnd_init(&chld_stat_cnd); + children_alive = 0; +#endif + +#if !CHLDWTHR && !defined(ERTS_SMP) + children_died = 0; +#endif + +#endif /* USE_THREADS */ + + sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */ + driver_data = (ErtsSysDriverData *) + erts_alloc(ERTS_ALC_T_DRV_TAB, sys_max_files() * sizeof(ErtsSysDriverData)); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, + sys_max_files() * sizeof(ErtsSysDriverData)); + + for (i = 0; i < sys_max_files(); i++) + driver_data[i].pid = -1; + + fd_data = (ErtsSysFdData *) + erts_alloc(ERTS_ALC_T_FD_TAB, sys_max_files() * sizeof(ErtsSysFdData)); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, + sys_max_files() * sizeof(ErtsSysFdData)); + +#if CHLDWTHR + sys_sigblock(SIGCHLD); +#endif + + sys_signal(SIGCHLD, onchld); /* Reap children */ + +#if CHLDWTHR + erts_thr_create(&child_waiter_tid, child_waiter, NULL, &thr_opts); +#endif + + return 1; +} + +static void close_pipes(int ifd[2], int ofd[2], int read_write) +{ + if (read_write & DO_READ) { + (void) close(ifd[0]); + (void) close(ifd[1]); + } + if (read_write & DO_WRITE) { + (void) close(ofd[0]); + (void) close(ofd[1]); + } +} + +static void init_fd_data(int fd, ErlDrvPort port_num) +{ + fd_data[fd].buf = NULL; + fd_data[fd].cpos = NULL; + fd_data[fd].remain = 0; + fd_data[fd].sz = 0; + fd_data[fd].psz = 0; +} + +static char **build_unix_environment(char *block) +{ + int i; + int j; + int len; + char *cp; + char **cpp; + char** old_env; + + ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); + + cp = block; + len = 0; + while (*cp != '\0') { + cp += strlen(cp) + 1; + len++; + } + old_env = environ; + while (*old_env++ != NULL) { + len++; + } + + cpp = (char **) erts_alloc_fnf(ERTS_ALC_T_ENVIRONMENT, + sizeof(char *) * (len+1)); + if (cpp == NULL) { + return NULL; + } + + cp = block; + len = 0; + while (*cp != '\0') { + cpp[len] = cp; + cp += strlen(cp) + 1; + len++; + } + + i = len; + for (old_env = environ; *old_env; old_env++) { + char* old = *old_env; + + for (j = 0; j < len; j++) { + char *s, *t; + + s = cpp[j]; + t = old; + while (*s == *t && *s != '=') { + s++, t++; + } + if (*s == '=' && *t == '=') { + break; + } + } + + if (j == len) { /* New version not found */ + cpp[len++] = old; + } + } + + for (j = 0; j < i; ) { + size_t last = strlen(cpp[j])-1; + if (cpp[j][last] == '=' && strchr(cpp[j], '=') == cpp[j]+last) { + cpp[j] = cpp[--len]; + if (len < i) { + i--; + } else { + j++; + } + } + else { + j++; + } + } + + cpp[len] = NULL; + return cpp; +} + +/* + [arndt] In most Unix systems, including Solaris 2.5, 'fork' allocates memory + in swap space for the child of a 'fork', whereas 'vfork' does not do this. + The natural call to use here is therefore 'vfork'. Due to a bug in + 'vfork' in Solaris 2.5 (apparently fixed in 2.6), using 'vfork' + can be dangerous in what seems to be these circumstances: + If the child code under a vfork sets the signal action to SIG_DFL + (or SIG_IGN) + for any signal which was previously set to a signal handler, the + state of the parent is clobbered, so that the later arrival of + such a signal yields a sigsegv in the parent. If the signal was + not set to a signal handler, but ignored, all seems to work. + If you change the forking code below, beware of this. + */ + +static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) +{ +#define CMD_LINE_PREFIX_STR "exec " +#define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1) + + int ifd[2], ofd[2], len, pid, i; + char **volatile new_environ; /* volatile since a vfork() then cannot + cause 'new_environ' to be clobbered + in the parent process. */ + int saved_errno; + long res; + char *cmd_line; +#ifndef QNX + int unbind; +#endif +#if !DISABLE_VFORK + int no_vfork; + size_t no_vfork_sz = sizeof(no_vfork); + + no_vfork = (erts_sys_getenv_raw("ERL_NO_VFORK", + (char *) &no_vfork, + &no_vfork_sz) >= 0); +#endif + + switch (opts->read_write) { + case DO_READ: + if (pipe(ifd) < 0) + return ERL_DRV_ERROR_ERRNO; + if (ifd[0] >= sys_max_files()) { + close_pipes(ifd, ofd, opts->read_write); + errno = EMFILE; + return ERL_DRV_ERROR_ERRNO; + } + ofd[1] = -1; /* keep purify happy */ + break; + case DO_WRITE: + if (pipe(ofd) < 0) return ERL_DRV_ERROR_ERRNO; + if (ofd[1] >= sys_max_files()) { + close_pipes(ifd, ofd, opts->read_write); + errno = EMFILE; + return ERL_DRV_ERROR_ERRNO; + } + ifd[0] = -1; /* keep purify happy */ + break; + case DO_READ|DO_WRITE: + if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO; + errno = EMFILE; /* default for next two conditions */ + if (ifd[0] >= sys_max_files() || pipe(ofd) < 0) { + close_pipes(ifd, ofd, DO_READ); + return ERL_DRV_ERROR_ERRNO; + } + if (ofd[1] >= sys_max_files()) { + close_pipes(ifd, ofd, opts->read_write); + errno = EMFILE; + return ERL_DRV_ERROR_ERRNO; + } + break; + default: + ASSERT(0); + return ERL_DRV_ERROR_GENERAL; + } + + if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { + /* started with spawn_executable, not with spawn */ + len = strlen(name); + cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1); + if (!cmd_line) { + close_pipes(ifd, ofd, opts->read_write); + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + memcpy((void *) cmd_line,(void *) name, len); + cmd_line[len] = '\0'; + if (access(cmd_line,X_OK) != 0) { + int save_errno = errno; + erts_free(ERTS_ALC_T_TMP, cmd_line); + errno = save_errno; + return ERL_DRV_ERROR_ERRNO; + } + } else { + /* make the string suitable for giving to "sh" */ + len = strlen(name); + cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, + CMD_LINE_PREFIX_STR_SZ + len + 1); + if (!cmd_line) { + close_pipes(ifd, ofd, opts->read_write); + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + memcpy((void *) cmd_line, + (void *) CMD_LINE_PREFIX_STR, + CMD_LINE_PREFIX_STR_SZ); + memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len); + cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0'; + } + + erts_smp_rwmtx_rlock(&environ_rwmtx); + + if (opts->envir == NULL) { + new_environ = environ; + } else if ((new_environ = build_unix_environment(opts->envir)) == NULL) { + erts_smp_rwmtx_runlock(&environ_rwmtx); + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + +#ifndef QNX + /* Block child from SIGINT and SIGUSR1. Must be before fork() + to be safe. */ + block_signals(); + + CHLD_STAT_LOCK; + + unbind = erts_sched_bind_atfork_prepare(); + +#if !DISABLE_VFORK + /* See fork/vfork discussion before this function. */ + if (no_vfork) { +#endif + + DEBUGF(("Using fork\n")); + pid = fork(); + + if (pid == 0) { + /* The child! Setup child... */ + + if (erts_sched_bind_atfork_child(unbind) != 0) + goto child_error; + + /* OBSERVE! + * Keep child setup after vfork() (implemented below and in + * erl_child_setup.c) up to date if changes are made here. + */ + + if (opts->use_stdio) { + if (opts->read_write & DO_READ) { + /* stdout for process */ + if (dup2(ifd[1], 1) < 0) + goto child_error; + if(opts->redir_stderr) + /* stderr for process */ + if (dup2(ifd[1], 2) < 0) + goto child_error; + } + if (opts->read_write & DO_WRITE) + /* stdin for process */ + if (dup2(ofd[0], 0) < 0) + goto child_error; + } + else { /* XXX will fail if ofd[0] == 4 (unlikely..) */ + if (opts->read_write & DO_READ) + if (dup2(ifd[1], 4) < 0) + goto child_error; + if (opts->read_write & DO_WRITE) + if (dup2(ofd[0], 3) < 0) + goto child_error; + } + +#if defined(HAVE_CLOSEFROM) + closefrom(opts->use_stdio ? 3 : 5); +#else + for (i = opts->use_stdio ? 3 : 5; i < sys_max_files(); i++) + (void) close(i); +#endif + + if (opts->wd && chdir(opts->wd) < 0) + goto child_error; + +#if defined(USE_SETPGRP_NOARGS) /* SysV */ + (void) setpgrp(); +#elif defined(USE_SETPGRP) /* BSD */ + (void) setpgrp(0, getpid()); +#else /* POSIX */ + (void) setsid(); +#endif + + unblock_signals(); + + if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { + if (opts->argv == NULL) { + execle(cmd_line,cmd_line,(char *) NULL, new_environ); + } else { + if (opts->argv[0] == erts_default_arg0) { + opts->argv[0] = cmd_line; + } + execve(cmd_line, opts->argv, new_environ); + if (opts->argv[0] == cmd_line) { + opts->argv[0] = erts_default_arg0; + } + } + } else { + execle(SHELL, "sh", "-c", cmd_line, (char *) NULL, new_environ); + } + child_error: + _exit(1); + } +#if !DISABLE_VFORK + } +#define ENOUGH_BYTES (44) + else { /* Use vfork() */ + char **cs_argv= erts_alloc(ERTS_ALC_T_TMP,(CS_ARGV_NO_OF_ARGS + 1)* + sizeof(char *)); + char fd_close_range[ENOUGH_BYTES]; /* 44 bytes are enough to */ + char dup2_op[CS_ARGV_NO_OF_DUP2_OPS][ENOUGH_BYTES]; /* hold any "%d:%d" string */ + /* on a 64-bit machine. */ + + /* Setup argv[] for the child setup program (implemented in + erl_child_setup.c) */ + i = 0; + if (opts->use_stdio) { + if (opts->read_write & DO_READ){ + /* stdout for process */ + erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 1); + if(opts->redir_stderr) + /* stderr for process */ + erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 2); + } + if (opts->read_write & DO_WRITE) + /* stdin for process */ + erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 0); + } else { /* XXX will fail if ofd[0] == 4 (unlikely..) */ + if (opts->read_write & DO_READ) + erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 4); + if (opts->read_write & DO_WRITE) + erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 3); + } + for (; i < CS_ARGV_NO_OF_DUP2_OPS; i++) + strcpy(&dup2_op[i][0], "-"); + erts_snprintf(fd_close_range, ENOUGH_BYTES, "%d:%d", opts->use_stdio ? 3 : 5, sys_max_files()-1); + + cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog; + cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : "."; + cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind); + cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range; + for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++) + cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0]; + + if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { + int num = 0; + int j = 0; + if (opts->argv != NULL) { + for(; opts->argv[num] != NULL; ++num) + ; + } + cs_argv = erts_realloc(ERTS_ALC_T_TMP,cs_argv, (CS_ARGV_NO_OF_ARGS + 1 + num + 1) * sizeof(char *)); + cs_argv[CS_ARGV_CMD_IX] = "-"; + cs_argv[CS_ARGV_NO_OF_ARGS] = cmd_line; + if (opts->argv != NULL) { + for (;opts->argv[j] != NULL; ++j) { + if (opts->argv[j] == erts_default_arg0) { + cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = cmd_line; + } else { + cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = opts->argv[j]; + } + } + } + cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = NULL; + } else { + cs_argv[CS_ARGV_CMD_IX] = cmd_line; /* Command */ + cs_argv[CS_ARGV_NO_OF_ARGS] = NULL; + } + DEBUGF(("Using vfork\n")); + pid = vfork(); + + if (pid == 0) { + /* The child! */ + + /* Observe! + * OTP-4389: The child setup program (implemented in + * erl_child_setup.c) will perform the necessary setup of the + * child before it execs to the user program. This because + * vfork() only allow an *immediate* execve() or _exit() in the + * child. + */ + execve(child_setup_prog, cs_argv, new_environ); + _exit(1); + } + erts_free(ERTS_ALC_T_TMP,cs_argv); + } +#undef ENOUGH_BYTES +#endif + + erts_sched_bind_atfork_parent(unbind); + + if (pid == -1) { + saved_errno = errno; + CHLD_STAT_UNLOCK; + erts_smp_rwmtx_runlock(&environ_rwmtx); + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + unblock_signals(); + close_pipes(ifd, ofd, opts->read_write); + errno = saved_errno; + return ERL_DRV_ERROR_ERRNO; + } +#else /* QNX */ + if (opts->use_stdio) { + if (opts->read_write & DO_READ) + qnx_spawn_options.iov[1] = ifd[1]; /* stdout for process */ + if (opts->read_write & DO_WRITE) + qnx_spawn_options.iov[0] = ofd[0]; /* stdin for process */ + } + else { + if (opts->read_write & DO_READ) + qnx_spawn_options.iov[4] = ifd[1]; + if (opts->read_write & DO_WRITE) + qnx_spawn_options.iov[3] = ofd[0]; + } + /* Close fds on exec */ + for (i = 3; i < sys_max_files(); i++) + fcntl(i, F_SETFD, 1); + + qnx_spawn_options.flags = _SPAWN_SETSID; + if ((pid = spawnl(P_NOWAIT, SHELL, SHELL, "-c", cmd_line, + (char *) 0)) < 0) { + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + reset_qnx_spawn(); + erts_smp_rwmtx_runlock(&environ_rwmtx); + close_pipes(ifd, ofd, opts->read_write); + return ERL_DRV_ERROR_GENERAL; + } + reset_qnx_spawn(); +#endif /* QNX */ + + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + + if (new_environ != environ) + erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); + + if (opts->read_write & DO_READ) + (void) close(ifd[1]); + if (opts->read_write & DO_WRITE) + (void) close(ofd[0]); + + if (opts->read_write & DO_READ) { + SET_NONBLOCKING(ifd[0]); + init_fd_data(ifd[0], port_num); + } + if (opts->read_write & DO_WRITE) { + SET_NONBLOCKING(ofd[1]); + init_fd_data(ofd[1], port_num); + } + + res = set_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes, + opts->read_write, opts->exit_status, pid, 0); + /* Don't unblock SIGCHLD until now, since the call above must + first complete putting away the info about our new subprocess. */ + unblock_signals(); + +#if CHLDWTHR + ASSERT(children_alive >= 0); + + if (!(children_alive++)) + CHLD_STAT_SIGNAL; /* Wake up child waiter thread if no children + was alive before we fork()ed ... */ +#endif + /* Don't unlock chld_stat_mtx until now of the same reason as above */ + CHLD_STAT_UNLOCK; + + erts_smp_rwmtx_runlock(&environ_rwmtx); + + return (ErlDrvData)res; +#undef CMD_LINE_PREFIX_STR +#undef CMD_LINE_PREFIX_STR_SZ +} + +#ifdef QNX +static reset_qnx_spawn() +{ + int i; + + /* Reset qnx_spawn_options */ + qnx_spawn_options.flags = 0; + qnx_spawn_options.iov[0] = 0xff; + qnx_spawn_options.iov[1] = 0xff; + qnx_spawn_options.iov[2] = 0xff; + qnx_spawn_options.iov[3] = 0xff; +} +#endif + +#define FD_DEF_HEIGHT 24 +#define FD_DEF_WIDTH 80 +/* Control op */ +#define FD_CTRL_OP_GET_WINSIZE 100 + +static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height) +{ +#ifdef TIOCGWINSZ + struct winsize ws; + if (ioctl(fd,TIOCGWINSZ,&ws) == 0) { + *width = (Uint32) ws.ws_col; + *height = (Uint32) ws.ws_row; + return 0; + } +#endif + return -1; +} + +static ErlDrvSSizeT fd_control(ErlDrvData drv_data, + unsigned int command, + char *buf, ErlDrvSizeT len, + char **rbuf, ErlDrvSizeT rlen) +{ + int fd = (int)(long)drv_data; + char resbuff[2*sizeof(Uint32)]; + switch (command) { + case FD_CTRL_OP_GET_WINSIZE: + { + Uint32 w,h; + if (fd_get_window_size(fd,&w,&h)) + return 0; + memcpy(resbuff,&w,sizeof(Uint32)); + memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32)); + } + break; + default: + return 0; + } + if (rlen < 2*sizeof(Uint32)) { + *rbuf = driver_alloc(2*sizeof(Uint32)); + } + memcpy(*rbuf,resbuff,2*sizeof(Uint32)); + return 2*sizeof(Uint32); +} + +static ErlDrvData fd_start(ErlDrvPort port_num, char* name, + SysDriverOpts* opts) +{ + ErlDrvData res; + int non_blocking = 0; + + if (((opts->read_write & DO_READ) && opts->ifd >= sys_max_files()) || + ((opts->read_write & DO_WRITE) && opts->ofd >= sys_max_files())) + return ERL_DRV_ERROR_GENERAL; + + /* + * Historical: + * + * "Note about nonblocking I/O. + * + * At least on Solaris, setting the write end of a TTY to nonblocking, + * will set the input end to nonblocking as well (and vice-versa). + * If erl is run in a pipeline like this: cat | erl + * the input end of the TTY will be the standard input of cat. + * And cat is not prepared to handle nonblocking I/O." + * + * Actually, the reason for this is not that the tty itself gets set + * in non-blocking mode, but that the "input end" (cat's stdin) and + * the "output end" (erlang's stdout) are typically the "same" file + * descriptor, dup()'ed from a single fd by one of this process' + * ancestors. + * + * The workaround for this problem used to be a rather bad kludge, + * interposing an extra process ("internal cat") between erlang's + * stdout and the original stdout, allowing erlang to set its stdout + * in non-blocking mode without affecting the stdin of the preceding + * process in the pipeline - and being a kludge, it caused all kinds + * of weird problems. + * + * So, this is the current logic: + * + * The only reason to set non-blocking mode on the output fd at all is + * if it's something that can cause a write() to block, of course, + * i.e. primarily if it points to a tty, socket, pipe, or fifo. + * + * If we don't set non-blocking mode when we "should" have, and output + * becomes blocked, the entire runtime system will be suspended - this + * is normally bad of course, and can happen fairly "easily" - e.g. user + * hits ^S on tty - but doesn't necessarily happen. + * + * If we do set non-blocking mode when we "shouldn't" have, the runtime + * system will end up seeing EOF on the input fd (due to the preceding + * process dying), which typically will cause the entire runtime system + * to terminate immediately (due to whatever erlang process is seeing + * the EOF taking it as a signal to halt the system). This is *very* bad. + * + * I.e. we should take a conservative approach, and only set non- + * blocking mode when we a) need to, and b) are reasonably certain + * that it won't be a problem. And as in the example above, the problem + * occurs when input fd and output fd point to different "things". + * + * However, determining that they are not just the same "type" of + * "thing", but actually the same instance of that type of thing, is + * unreasonably complex in many/most cases. + * + * Also, with pipes, sockets, and fifos it's far from obvious that the + * user *wants* non-blocking output: If you're running erlang inside + * some complex pipeline, you're probably not running a real-time system + * that must never stop, but rather *want* it to suspend if the output + * channel is "full". + * + * So, the bottom line: We will only set the output fd non-blocking if + * it points to a tty, and either a) the input fd also points to a tty, + * or b) we can make sure that setting the output fd non-blocking + * doesn't interfere with someone else's input, via a somewhat milder + * kludge than the above. + * + * Also keep in mind that while this code is almost exclusively run as + * a result of an erlang open_port({fd,0,1}, ...), that isn't the only + * case - it can be called with any old pre-existing file descriptors, + * the relations between which (if they're even two) we can only guess + * at - still, we try our best... + * + * Added note OTP 18: Some systems seem to use stdout/stderr to log data + * using unix pipes, so we cannot allow the system to block on a write. + * Therefore we use an async thread to write the data to fd's that could + * not be set to non-blocking. When no async threads are available we + * fall back on the old behaviour. + * + * Also the guarantee about what is delivered to the OS has changed. + * Pre 18 the fd driver did no flushing of data before terminating. + * Now it does. This is because we want to be able to guarantee that things + * such as escripts and friends really have outputted all data before + * terminating. This could potentially block the termination of the system + * for a very long time, but if the user wants to terminate fast she should + * use erlang:halt with flush=false. + */ + + if (opts->read_write & DO_READ) { + init_fd_data(opts->ifd, port_num); + } + if (opts->read_write & DO_WRITE) { + init_fd_data(opts->ofd, port_num); + + /* If we don't have a read end, all bets are off - no non-blocking. */ + if (opts->read_write & DO_READ) { + + if (isatty(opts->ofd)) { /* output fd is a tty:-) */ + + if (isatty(opts->ifd)) { /* input fd is also a tty */ + + /* To really do this "right", we should also check that + input and output fd point to the *same* tty - but + this seems like overkill; ttyname() isn't for free, + and this is a very common case - and it's hard to + imagine a scenario where setting non-blocking mode + here would cause problems - go ahead and do it. */ + + non_blocking = 1; + SET_NONBLOCKING(opts->ofd); + + } else { /* output fd is a tty, input fd isn't */ + + /* This is a "problem case", but also common (see the + example above) - i.e. it makes sense to try a bit + harder before giving up on non-blocking mode: Try to + re-open the tty that the output fd points to, and if + successful replace the original one with the "new" fd + obtained this way, and set *that* one in non-blocking + mode. (Yes, this is a kludge.) + + However, re-opening the tty may fail in a couple of + (unusual) cases: + + 1) The name of the tty (or an equivalent one, i.e. + same major/minor number) can't be found, because + it actually lives somewhere other than /dev (or + wherever ttyname() looks for it), and isn't + equivalent to any of those that do live in the + "standard" place - this should be *very* unusual. + + 2) Permissions on the tty don't allow us to open it - + it's perfectly possible to have an fd open to an + object whose permissions wouldn't allow us to open + it. This is not as unusual as it sounds, one case + is if the user has su'ed to someone else (not + root) - we have a read/write fd open to the tty + (because it has been inherited all the way down + here), but we have neither read nor write + permission for the tty. + + In these cases, we finally give up, and don't set the + output fd in non-blocking mode. */ + + char *tty; + int nfd; + + if ((tty = ttyname(opts->ofd)) != NULL && + (nfd = open(tty, O_WRONLY)) != -1) { + dup2(nfd, opts->ofd); + close(nfd); + non_blocking = 1; + SET_NONBLOCKING(opts->ofd); + } + } + } + } + } + CHLD_STAT_LOCK; + res = (ErlDrvData)(long)set_driver_data(port_num, opts->ifd, opts->ofd, + opts->packet_bytes, + opts->read_write, 0, -1, + !non_blocking); + CHLD_STAT_UNLOCK; + return res; +} + +static void clear_fd_data(int fd) +{ + if (fd_data[fd].sz > 0) { + erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fd_data[fd].buf); + ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fd_data[fd].sz); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fd_data[fd].sz); + } + fd_data[fd].buf = NULL; + fd_data[fd].sz = 0; + fd_data[fd].remain = 0; + fd_data[fd].cpos = NULL; + fd_data[fd].psz = 0; +} + +static void nbio_stop_fd(ErlDrvPort prt, int fd) +{ + driver_select(prt,fd,DO_READ|DO_WRITE,0); + clear_fd_data(fd); + SET_BLOCKING(fd); +} + +static void fd_stop(ErlDrvData ev) /* Does not close the fds */ +{ + int ofd; + int fd = (int)(long)ev; + ErlDrvPort prt = driver_data[fd].port_num; + +#if FDBLOCK + if (driver_data[fd].blocking) { + erts_free(ERTS_ALC_T_SYS_BLOCKING,driver_data[fd].blocking); + driver_data[fd].blocking = NULL; + erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*sizeof(ErtsSysBlocking)); + } +#endif + + nbio_stop_fd(prt, fd); + ofd = driver_data[fd].ofd; + if (ofd != fd && ofd != -1) + nbio_stop_fd(prt, ofd); +} + +static void fd_flush(ErlDrvData fd) +{ + if (!driver_data[(int)(long)fd].terminating) + driver_data[(int)(long)fd].terminating = 1; +} + +static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name, + SysDriverOpts* opts) +{ + int flags, fd; + ErlDrvData res; + + flags = (opts->read_write == DO_READ ? O_RDONLY : + opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC : + O_RDWR|O_CREAT); + if ((fd = open(name, flags, 0666)) < 0) + return ERL_DRV_ERROR_GENERAL; + if (fd >= sys_max_files()) { + close(fd); + return ERL_DRV_ERROR_GENERAL; + } + SET_NONBLOCKING(fd); + init_fd_data(fd, port_num); + + CHLD_STAT_LOCK; + res = (ErlDrvData)(long)set_driver_data(port_num, fd, fd, + opts->packet_bytes, + opts->read_write, 0, -1, 0); + CHLD_STAT_UNLOCK; + return res; +} + +/* Note that driver_data[fd].ifd == fd if the port was opened for reading, */ +/* otherwise (i.e. write only) driver_data[fd].ofd = fd. */ + +static void stop(ErlDrvData fd) +{ + ErlDrvPort prt; + int ofd; + + prt = driver_data[(int)(long)fd].port_num; + nbio_stop_fd(prt, (int)(long)fd); + + ofd = driver_data[(int)(long)fd].ofd; + if (ofd != (int)(long)fd && (int)(long)ofd != -1) + nbio_stop_fd(prt, ofd); + else + ofd = -1; + + CHLD_STAT_LOCK; + + /* Mark as unused. */ + driver_data[(int)(long)fd].pid = -1; + + CHLD_STAT_UNLOCK; + + /* SMP note: Close has to be last thing done (open file descriptors work + as locks on driver_data[] entries) */ + driver_select(prt, (int)(long)fd, ERL_DRV_USE, 0); /* close(fd); */ + if (ofd >= 0) { + driver_select(prt, (int)(long)ofd, ERL_DRV_USE, 0); /* close(ofd); */ + } +} + +/* used by fd_driver */ +static void outputv(ErlDrvData e, ErlIOVec* ev) +{ + int fd = (int)(long)e; + ErlDrvPort ix = driver_data[fd].port_num; + int pb = driver_data[fd].packet_bytes; + int ofd = driver_data[fd].ofd; + ssize_t n; + ErlDrvSizeT sz; + char lb[4]; + char* lbp; + ErlDrvSizeT len = ev->size; + + /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ + /* if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/ + if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) { + driver_failure_posix(ix, EINVAL); + return; /* -1; */ + } + /* Handles 0 <= pb <= 4 only */ + put_int32((Uint32) len, lb); + lbp = lb + (4-pb); + + ev->iov[0].iov_base = lbp; + ev->iov[0].iov_len = pb; + ev->size += pb; + + if (driver_data[fd].blocking && FDBLOCK) + driver_pdl_lock(driver_data[fd].blocking->pdl); + + if ((sz = driver_sizeq(ix)) > 0) { + driver_enqv(ix, ev, 0); + + if (driver_data[fd].blocking && FDBLOCK) + driver_pdl_unlock(driver_data[fd].blocking->pdl); + + if (sz + ev->size >= (1 << 13)) + set_busy_port(ix, 1); + } + else if (!driver_data[fd].blocking || !FDBLOCK) { + /* We try to write directly if the fd in non-blocking */ + int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize; + + n = writev(ofd, (const void *) (ev->iov), vsize); + if (n == ev->size) + return; /* 0;*/ + if (n < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { + driver_failure_posix(ix, errno); + return; /* -1;*/ + } + n = 0; + } + driver_enqv(ix, ev, n); /* n is the skip value */ + driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); + } +#if FDBLOCK + else { + if (ev->size != 0) { + driver_enqv(ix, ev, 0); + driver_pdl_unlock(driver_data[fd].blocking->pdl); + driver_async(ix, &driver_data[fd].blocking->pkey, + fd_async, driver_data+fd, NULL); + } else { + driver_pdl_unlock(driver_data[fd].blocking->pdl); + } + } +#endif + /* return 0;*/ +} + +/* Used by spawn_driver and vanilla driver */ +static void output(ErlDrvData e, char* buf, ErlDrvSizeT len) +{ + int fd = (int)(long)e; + ErlDrvPort ix = driver_data[fd].port_num; + int pb = driver_data[fd].packet_bytes; + int ofd = driver_data[fd].ofd; + ssize_t n; + ErlDrvSizeT sz; + char lb[4]; + char* lbp; + struct iovec iv[2]; + + /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ + if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) { + driver_failure_posix(ix, EINVAL); + return; /* -1; */ + } + put_int32(len, lb); + lbp = lb + (4-pb); + + if ((sz = driver_sizeq(ix)) > 0) { + driver_enq(ix, lbp, pb); + driver_enq(ix, buf, len); + if (sz + len + pb >= (1 << 13)) + set_busy_port(ix, 1); + } + else { + iv[0].iov_base = lbp; + iv[0].iov_len = pb; /* should work for pb=0 */ + iv[1].iov_base = buf; + iv[1].iov_len = len; + n = writev(ofd, iv, 2); + if (n == pb+len) + return; /* 0; */ + if (n < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { + driver_failure_posix(ix, errno); + return; /* -1; */ + } + n = 0; + } + if (n < pb) { + driver_enq(ix, lbp+n, pb-n); + driver_enq(ix, buf, len); + } + else { + n -= pb; + driver_enq(ix, buf+n, len-n); + } + driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); + } + return; /* 0; */ +} + +static int port_inp_failure(ErlDrvPort port_num, int ready_fd, int res) + /* Result: 0 (eof) or -1 (error) */ +{ + int err = errno; + + ASSERT(res <= 0); + (void) driver_select(port_num, ready_fd, ERL_DRV_READ|ERL_DRV_WRITE, 0); + clear_fd_data(ready_fd); + + if (driver_data[ready_fd].blocking && FDBLOCK) { + driver_pdl_lock(driver_data[ready_fd].blocking->pdl); + if (driver_sizeq(driver_data[ready_fd].port_num) > 0) { + driver_pdl_unlock(driver_data[ready_fd].blocking->pdl); + /* We have stuff in the output queue, so we just + set the state to terminating and wait for fd_async_ready + to terminate the port */ + if (res == 0) + driver_data[ready_fd].terminating = 2; + else + driver_data[ready_fd].terminating = -err; + return 0; + } + driver_pdl_unlock(driver_data[ready_fd].blocking->pdl); + } + + if (res == 0) { + if (driver_data[ready_fd].report_exit) { + CHLD_STAT_LOCK; + + if (driver_data[ready_fd].alive) { + /* + * We have eof and want to report exit status, but the process + * hasn't exited yet. When it does report_exit_status() will + * driver_select() this fd which will make sure that we get + * back here with driver_data[ready_fd].alive == 0 and + * driver_data[ready_fd].status set. + */ + CHLD_STAT_UNLOCK; + return 0; + } + else { + int status = driver_data[ready_fd].status; + CHLD_STAT_UNLOCK; + + /* We need not be prepared for stopped/continued processes. */ + if (WIFSIGNALED(status)) + status = 128 + WTERMSIG(status); + else + status = WEXITSTATUS(status); + + driver_report_exit(driver_data[ready_fd].port_num, status); + } + } + driver_failure_eof(port_num); + } else { + driver_failure_posix(port_num, err); + } + return 0; +} + +/* fd is the drv_data that is returned from the */ +/* initial start routine */ +/* ready_fd is the descriptor that is ready to read */ + +static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd) +{ + int fd = (int)(long)e; + ErlDrvPort port_num; + int packet_bytes; + int res; + Uint h; + + port_num = driver_data[fd].port_num; + packet_bytes = driver_data[fd].packet_bytes; + + + if (packet_bytes == 0) { + byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, + ERTS_SYS_READ_BUF_SZ); + res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); + if (res < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) + port_inp_failure(port_num, ready_fd, res); + } + else if (res == 0) + port_inp_failure(port_num, ready_fd, res); + else + driver_output(port_num, (char*) read_buf, res); + erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); + } + else if (fd_data[ready_fd].remain > 0) { /* We try to read the remainder */ + /* space is allocated in buf */ + res = read(ready_fd, fd_data[ready_fd].cpos, + fd_data[ready_fd].remain); + if (res < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) + port_inp_failure(port_num, ready_fd, res); + } + else if (res == 0) { + port_inp_failure(port_num, ready_fd, res); + } + else if (res == fd_data[ready_fd].remain) { /* we're done */ + driver_output(port_num, fd_data[ready_fd].buf, + fd_data[ready_fd].sz); + clear_fd_data(ready_fd); + } + else { /* if (res < fd_data[ready_fd].remain) */ + fd_data[ready_fd].cpos += res; + fd_data[ready_fd].remain -= res; + } + } + else if (fd_data[ready_fd].remain == 0) { /* clean fd */ + byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, + ERTS_SYS_READ_BUF_SZ); + /* We make one read attempt and see what happens */ + res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); + if (res < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) + port_inp_failure(port_num, ready_fd, res); + } + else if (res == 0) { /* eof */ + port_inp_failure(port_num, ready_fd, res); + } + else if (res < packet_bytes - fd_data[ready_fd].psz) { + memcpy(fd_data[ready_fd].pbuf+fd_data[ready_fd].psz, + read_buf, res); + fd_data[ready_fd].psz += res; + } + else { /* if (res >= packet_bytes) */ + unsigned char* cpos = read_buf; + int bytes_left = res; + + while (1) { + int psz = fd_data[ready_fd].psz; + char* pbp = fd_data[ready_fd].pbuf + psz; + + while(bytes_left && (psz < packet_bytes)) { + *pbp++ = *cpos++; + bytes_left--; + psz++; + } + + if (psz < packet_bytes) { + fd_data[ready_fd].psz = psz; + break; + } + fd_data[ready_fd].psz = 0; + + switch (packet_bytes) { + case 1: h = get_int8(fd_data[ready_fd].pbuf); break; + case 2: h = get_int16(fd_data[ready_fd].pbuf); break; + case 4: h = get_int32(fd_data[ready_fd].pbuf); break; + default: ASSERT(0); return; /* -1; */ + } + + if (h <= (bytes_left)) { + driver_output(port_num, (char*) cpos, h); + cpos += h; + bytes_left -= h; + continue; + } + else { /* The last message we got was split */ + char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h); + if (!buf) { + errno = ENOMEM; + port_inp_failure(port_num, ready_fd, -1); + } + else { + erts_smp_atomic_add_nob(&sys_misc_mem_sz, h); + sys_memcpy(buf, cpos, bytes_left); + fd_data[ready_fd].buf = buf; + fd_data[ready_fd].sz = h; + fd_data[ready_fd].remain = h - bytes_left; + fd_data[ready_fd].cpos = buf + bytes_left; + } + break; + } + } + } + erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); + } +} + + +/* fd is the drv_data that is returned from the */ +/* initial start routine */ +/* ready_fd is the descriptor that is ready to read */ + +static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd) +{ + int fd = (int)(long)e; + ErlDrvPort ix = driver_data[fd].port_num; + int n; + struct iovec* iv; + int vsize; + + + if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) { + driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); + if (driver_data[fd].terminating) + driver_failure_atom(driver_data[fd].port_num,"normal"); + return; /* 0; */ + } + vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; + if ((n = writev(ready_fd, iv, vsize)) > 0) { + if (driver_deq(ix, n) == 0) + set_busy_port(ix, 0); + } + else if (n < 0) { + if (errno == ERRNO_BLOCK || errno == EINTR) + return; /* 0; */ + else { + int res = errno; + driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); + driver_failure_posix(ix, res); + return; /* -1; */ + } + } + return; /* 0; */ +} + +static void stop_select(ErlDrvEvent fd, void* _) +{ + close((int)fd); +} + +#if FDBLOCK + +static void +fd_async(void *async_data) +{ + int res; + ErtsSysDriverData *dd = (ErtsSysDriverData*)async_data; + SysIOVec *iov0; + SysIOVec *iov; + int iovlen; + int err = 0; + /* much of this code is stolen from efile_drv:invoke_writev */ + driver_pdl_lock(dd->blocking->pdl); + iov0 = driver_peekq(dd->port_num, &iovlen); + iovlen = iovlen < MAXIOV ? iovlen : MAXIOV; + iov = erts_alloc_fnf(ERTS_ALC_T_SYS_WRITE_BUF, + sizeof(SysIOVec)*iovlen); + if (!iov) { + res = -1; + err = ENOMEM; + driver_pdl_unlock(dd->blocking->pdl); + } else { + memcpy(iov,iov0,iovlen*sizeof(SysIOVec)); + driver_pdl_unlock(dd->blocking->pdl); + + do { + res = writev(dd->ofd, iov, iovlen); + } while (res < 0 && errno == EINTR); + if (res < 0) + err = errno; + err = errno; + + erts_free(ERTS_ALC_T_SYS_WRITE_BUF, iov); + } + dd->blocking->res = res; + dd->blocking->err = err; +} + +void fd_ready_async(ErlDrvData drv_data, + ErlDrvThreadData thread_data) { + ErtsSysDriverData *dd = (ErtsSysDriverData *)thread_data; + ErlDrvPort port_num = dd->port_num; + + ASSERT(dd->blocking); + ASSERT(dd == (driver_data + (int)(long)drv_data)); + + if (dd->blocking->res > 0) { + driver_pdl_lock(dd->blocking->pdl); + if (driver_deq(port_num, dd->blocking->res) == 0) { + driver_pdl_unlock(dd->blocking->pdl); + set_busy_port(port_num, 0); + if (dd->terminating) { + /* The port is has been ordered to terminate + from either fd_flush or port_inp_failure */ + if (dd->terminating == 1) + driver_failure_atom(port_num, "normal"); + else if (dd->terminating == 2) + driver_failure_eof(port_num); + else if (dd->terminating < 0) + driver_failure_posix(port_num, -dd->terminating); + return; /* -1; */ + } + } else { + driver_pdl_unlock(dd->blocking->pdl); + /* still data left to write in queue */ + driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); + return /* 0; */; + } + } else if (dd->blocking->res < 0) { + if (dd->blocking->err == ERRNO_BLOCK) { + set_busy_port(port_num, 1); + /* still data left to write in queue */ + driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); + } else + driver_failure_posix(port_num, dd->blocking->err); + return; /* -1; */ + } + return; /* 0; */ +} + +#endif + +static ERTS_INLINE void +report_exit_status(ErtsSysReportExit *rep, int status) +{ + Port *pp; +#ifdef ERTS_SMP + CHLD_STAT_UNLOCK; + pp = erts_thr_id2port_sflgs(rep->port, + ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP); + CHLD_STAT_LOCK; +#else + pp = erts_id2port_sflgs(rep->port, + NULL, + 0, + ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP); +#endif + if (pp) { + if (rep->ifd >= 0) { + driver_data[rep->ifd].alive = 0; + driver_data[rep->ifd].status = status; + (void) driver_select(ERTS_Port2ErlDrvPort(pp), + rep->ifd, + (ERL_DRV_READ|ERL_DRV_USE), + 1); + } + if (rep->ofd >= 0) { + driver_data[rep->ofd].alive = 0; + driver_data[rep->ofd].status = status; + (void) driver_select(ERTS_Port2ErlDrvPort(pp), + rep->ofd, + (ERL_DRV_WRITE|ERL_DRV_USE), + 1); + } +#ifdef ERTS_SMP + erts_thr_port_release(pp); +#else + erts_port_release(pp); +#endif + } + erts_free(ERTS_ALC_T_PRT_REP_EXIT, rep); +} + +#if !CHLDWTHR /* ---------------------------------------------------------- */ + +#define ERTS_REPORT_EXIT_STATUS report_exit_status + +int check_children(void) +{ + int res = 0; + int pid; + int status; + +#ifndef ERTS_SMP + if (children_died) +#endif + { + sys_sigblock(SIGCHLD); + CHLD_STAT_LOCK; + while ((pid = waitpid(-1, &status, WNOHANG)) > 0) + note_child_death(pid, status); +#ifndef ERTS_SMP + children_died = 0; +#endif + CHLD_STAT_UNLOCK; + sys_sigrelease(SIGCHLD); + res = 1; + } + return res; +} + +#ifdef ERTS_SMP + +void +erts_check_children(void) +{ + (void) check_children(); +} + +#endif + +#elif CHLDWTHR && defined(ERTS_SMP) /* ------------------------------------- */ + +#define ERTS_REPORT_EXIT_STATUS report_exit_status + +int check_children(void) +{ + return 0; +} + +#else /* CHLDWTHR && !defined(ERTS_SMP) ------------------------------------ */ + +#define ERTS_REPORT_EXIT_STATUS initiate_report_exit_status + +static ERTS_INLINE void +initiate_report_exit_status(ErtsSysReportExit *rep, int status) +{ + rep->next = report_exit_transit_list; + rep->status = status; + report_exit_transit_list = rep; + erts_sys_schedule_interrupt(1); +} + +int check_children(void) +{ + int res; + ErtsSysReportExit *rep; + CHLD_STAT_LOCK; + rep = report_exit_transit_list; + res = rep != NULL; + while (rep) { + ErtsSysReportExit *curr_rep = rep; + rep = rep->next; + report_exit_status(curr_rep, curr_rep->status); + } + report_exit_transit_list = NULL; + CHLD_STAT_UNLOCK; + return res; +} + +#endif /* ------------------------------------------------------------------ */ + +static void note_child_death(int pid, int status) +{ + ErtsSysReportExit **repp = &report_exit_list; + ErtsSysReportExit *rep = report_exit_list; + + while (rep) { + if (pid == rep->pid) { + *repp = rep->next; + ERTS_REPORT_EXIT_STATUS(rep, status); + break; + } + repp = &rep->next; + rep = rep->next; + } +} + +#if CHLDWTHR + +static void * +child_waiter(void *unused) +{ + int pid; + int status; + +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_set_thread_name("child waiter"); +#endif + + while(1) { +#ifdef DEBUG + int waitpid_errno; +#endif + pid = waitpid(-1, &status, 0); +#ifdef DEBUG + waitpid_errno = errno; +#endif + CHLD_STAT_LOCK; + if (pid < 0) { + ASSERT(waitpid_errno == ECHILD); + } + else { + children_alive--; + ASSERT(children_alive >= 0); + note_child_death(pid, status); + } + while (!children_alive) + CHLD_STAT_WAIT; /* Wait for children to wait on... :) */ + CHLD_STAT_UNLOCK; + } + + return NULL; +} + +#endif -- cgit v1.2.3