diff options
author | Lukas Larsson <[email protected]> | 2017-10-02 10:40:25 +0200 |
---|---|---|
committer | Lukas Larsson <[email protected]> | 2017-10-02 10:40:25 +0200 |
commit | 9033a41375f3a31a18eb0cba3ea0dc84efbc0aa0 (patch) | |
tree | 8731693cd5c15ffd8f064a3f304aec49fdcebca8 | |
parent | f75fa8129fcd18fc56407778960252e6c10a3a37 (diff) | |
parent | 0c6df1f92be6337efe4d7e3d1964063f9acf770f (diff) | |
download | otp-9033a41375f3a31a18eb0cba3ea0dc84efbc0aa0.tar.gz otp-9033a41375f3a31a18eb0cba3ea0dc84efbc0aa0.tar.bz2 otp-9033a41375f3a31a18eb0cba3ea0dc84efbc0aa0.zip |
Merge branch 'lukas/erts/poll-thread/OTP-14346'
* lukas/erts/poll-thread/OTP-14346: (25 commits)
erts: Trigger ready events when erts_io_control fails
erts: enif_select steal test
kernel: Rewrite gen_udp_SUITE:read_packet tc
erts: disable kernel-poll on OS X vsn < 16
erts: Fix msacc testcase with new poll-thread
erts: Add testcases to test IOp and IOt options
erts: get_internal_state(check_io_debug) now prints to error_logger
erts: Remove eager check io
erts: Move all I/O polling to a seperate thread
erts: Fix smp_select testcase to use ERL_DRV_USE
erts: Fix msacc unmanaged state counter
erts: Optimize port_task quick allocator
erts: Add ERTS_THR_PREF_QUICK_ALLOC_IMPL
erts: Update suspend of scheduler to handle multiple pollsets
erts: Add multiple poll sets
erts: Some code cleanup for gdb to work better
erts: temp_alloc can no longer be disabled
erts: Refactor check_io to use one static struct
erts: Replace check_io spinlock with lock-less list insertion
erts: Add number of enif_select's to check_io_debug
...
56 files changed, 3940 insertions, 5514 deletions
diff --git a/erts/configure.in b/erts/configure.in index 2cb446b470..9dec562f33 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -3463,62 +3463,24 @@ case $poll_works-$host_os in esac # -# If kqueue() found, check that it can be selected or polled on... +# If kqueue() found # if test $have_kernel_poll = kqueue; then - if test $poll_works = yes; then - kqueue_with=poll - else - kqueue_with=select - fi - AC_MSG_CHECKING([whether kqueue() fd can be ${kqueue_with}()ed on]) - AC_TRY_RUN([ -#include <stdlib.h> -#include <sys/types.h> -#include <sys/event.h> -#include <sys/time.h> -#ifdef ERTS_USE_POLL -#include <poll.h> -#else -#include <unistd.h> -#endif -int main(void) { - int kq = kqueue(); - if (kq < 0) return 2; - { -#ifdef ERTS_USE_POLL - struct pollfd pfds = {kq, POLLIN, 0}; - if (poll(&pfds, 1, 0) < 0) return 1; -#else - struct timeval tv = {0, 0}; - fd_set set; FD_ZERO(&set); FD_SET(kq, &set); - if (select(kq+1, &set, NULL, NULL, &tv) < 0) return 1; -#endif - } - return 0; -} - ], - ok_kqueue=yes, - ok_kqueue=no, - [ - case X$erl_xcomp_kqueue in - X) ok_kqueue=cross;; - Xyes|Xno) ok_kqueue=$erl_xcomp_kqueue;; - *) AC_MSG_ERROR([Bad erl_xcomp_kqueue value: $erl_xcomp_kqueue]);; - esac - ]) - AC_MSG_RESULT($ok_kqueue); - case $ok_kqueue in - yes) - ;; - cross) - have_kernel_poll=no - AC_MSG_WARN([result no guessed because of cross compilation]);; - *) - have_kernel_poll=no;; - esac +## Some OS X kernel version seems to have bugs in them with regards to kqueue +## Disable kernel poll on those versions + AC_MSG_CHECKING([whether host os has known kqueue bugs]) + case $host_os in + # Any OS X version < 16 has known problems with using kqueue + # so we don't use it there. See erl_poll.c for details. + darwin[[0-9]].*|darwin1[[0-5]].*) + AC_MSG_RESULT([yes, disabling kernel poll]) + have_kernel_poll=no + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac fi - # # If epoll() found, check that it is level triggered. # @@ -3545,6 +3507,7 @@ fi # AC_MSG_CHECKING(whether kernel poll support should be enabled) ERTS_ENABLE_KERNEL_POLL=no +ERTS_BUILD_FALLBACK_POLL=no case $enable_kernel_poll-$have_kernel_poll in no-*) AC_MSG_RESULT(no; disabled by user);; @@ -3555,11 +3518,16 @@ case $enable_kernel_poll-$have_kernel_poll in *) case $have_kernel_poll in epoll) - AC_DEFINE(HAVE_SYS_EPOLL_H, 1, [Define if you have the <sys/epoll.h> header file.]);; + AC_DEFINE(HAVE_SYS_EPOLL_H, 1, [Define if you have the <sys/epoll.h> header file.]) + ERTS_BUILD_FALLBACK_POLL=yes + ;; /dev/poll) - AC_DEFINE(HAVE_SYS_DEVPOLL_H, 1, [Define if you have <sys/devpoll.h> header file.]);; + AC_DEFINE(HAVE_SYS_DEVPOLL_H, 1, [Define if you have <sys/devpoll.h> header file.]) + ;; kqueue) - AC_DEFINE(HAVE_SYS_EVENT_H, 1, [Define if you have <sys/event.h> header file.]);; + AC_DEFINE(HAVE_SYS_EVENT_H, 1, [Define if you have <sys/event.h> header file.]) + ERTS_BUILD_FALLBACK_POLL=yes + ;; *) AC_MSG_ERROR(configure.in need to be updated);; esac @@ -3567,7 +3535,7 @@ case $enable_kernel_poll-$have_kernel_poll in AC_DEFINE(ERTS_ENABLE_KERNEL_POLL, 1, [Define if you have kernel poll and want to use it]) AC_MSG_RESULT([yes; $have_kernel_poll]);; esac -AC_SUBST(ERTS_ENABLE_KERNEL_POLL) +AC_SUBST(ERTS_BUILD_FALLBACK_POLL) AC_MSG_CHECKING([whether putenv() stores a copy of the key-value pair]) AC_TRY_RUN([ diff --git a/erts/doc/src/driver_entry.xml b/erts/doc/src/driver_entry.xml index 2421e0a8d9..e8c7e26457 100644 --- a/erts/doc/src/driver_entry.xml +++ b/erts/doc/src/driver_entry.xml @@ -196,10 +196,7 @@ typedef struct erl_drv_entry { char **rbuf, ErlDrvSizeT rlen, unsigned int *flags); /* Works mostly like 'control', a synchronous call into the driver */ - void (*event)(ErlDrvData drv_data, ErlDrvEvent event, - ErlDrvEventData event_data); - /* Called when an event selected by - driver_event() has occurred */ + void* unused_event_callback; int extended_marker; /* ERL_DRV_EXTENDED_MARKER */ int major_version; /* ERL_DRV_EXTENDED_MAJOR_VERSION */ int minor_version; /* ERL_DRV_EXTENDED_MINOR_VERSION */ diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index 71fe08d4e6..dc58cdeb13 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -762,13 +762,36 @@ <seealso marker="erlang#process_flag_message_queue_data"> <c>process_flag(message_queue_data, MQD)</c></seealso>.</p> </item> - <tag><c><![CDATA[+K true | false]]></c></tag> + <tag><marker id="+IOp"/><c>+IOp PollSets</c></tag> <item> - <p>Enables or disables the kernel poll functionality if supported by - the emulator. Defaults to <c><![CDATA[false]]></c> (disabled). - If the emulator does not support kernel poll, and flag - <c><![CDATA[+K]]></c> is passed to the emulator, a warning is - issued at startup.</p> + <p>Sets the number of IO pollsets to use when polling for I/O. + This option is only used on platforms that support concurrent + updates of a pollset, otherwise the same number of pollsets + are used as IO poll threads. + The default is 1. + </p> + </item> + <tag><marker id="+IOt"/><c>+IOt PollThreads</c></tag> + <item> + <p>Sets the number of IO poll threads to use when polling for I/O. + The maximum number of poll threads allowed is 1024. The default is 1. + </p> + </item> + <tag><marker id="+IOPp"/><c>+IOPp PollSetsPercentage</c></tag> + <item> + <p>Similar to <seealso marker="#+IOp"><c>+IOp</c></seealso> but uses + percentages to set the number of IO pollsets to create, based on the + number of poll threads configured. If both <c>+IOPp</c> and <c>+IOp</c> + are used, <c>+IOPp</c> is ignored. + </p> + </item> + <tag><marker id="+IOPt"/><c>+IOPt PollThreadsPercentage</c></tag> + <item> + <p>Similar to <seealso marker="#+IOt"><c>+IOt</c></seealso> but uses + percentages to set the number of IO poll threads to create, based on + the number of schedulers configures. If both <c>+IOPt</c> and + <c>+IOt</c> are used, <c>+IOPt</c> is ignored. + </p> </item> <tag><c><![CDATA[+l]]></c></tag> <item> @@ -1273,25 +1296,6 @@ <seealso marker="erlang#system_info_cpu_topology"> <c>erlang:system_info(cpu_topology)</c></seealso>.</p> </item> - <tag><marker id="+secio"/><c>+secio true|false</c></tag> - <item> - <p>Enables or disables eager check I/O scheduling. Defaults - to <c>true</c>. The default was changed from <c>false</c> - as from ERTS 7.0. The behavior before this - flag was introduced corresponds to <c>+secio false</c>.</p> - <p>The flag effects when schedulers will check for I/O - operations possible to execute, and when such I/O operations - will execute. As the parameter name implies, - schedulers are more eager to check for I/O when - <c>true</c> is passed. This, however, also implies that - execution of outstanding I/O operation is not - prioritized to the same extent as when <c>false</c> is - passed.</p> - <p><seealso marker="erlang#system_info_eager_check_io"> - <c>erlang:system_info(eager_check_io)</c></seealso> - returns the value of this parameter used when starting - the virtual machine.</p> - </item> <tag><marker id="+sfwi"/><c>+sfwi Interval</c></tag> <item> <p>Sets scheduler-forced wakeup interval. All run queues are diff --git a/erts/doc/src/erl_driver.xml b/erts/doc/src/erl_driver.xml index 5705100ab2..ca9d458e1e 100644 --- a/erts/doc/src/erl_driver.xml +++ b/erts/doc/src/erl_driver.xml @@ -157,7 +157,7 @@ </note> <p>Most functions in this API are <em>not</em> thread-safe, that is, - they <em>cannot</em> be called from any thread. Functions + they <em>cannot</em> be called from arbitrary threads. Functions that are not documented as thread-safe can only be called from driver callbacks or function calls descending from a driver callback call. Notice that driver callbacks can be called from diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index ca8ba044e7..06f568c832 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -8042,15 +8042,6 @@ ok The return value will always be <c>false</c>, as the <c>elib_malloc</c> allocator has been removed.</p> </item> - <tag><marker id="system_info_eager_check_io"/> - <c>eager_check_io</c></tag> - <item> - <p>Returns the value of command-line flag - <seealso marker="erl#+secio"><c>+secio</c></seealso> in - <c>erl(1)</c>, which is either <c>true</c> or <c>false</c>. - For information about the different values, see the - documentation of the command-line flag.</p> - </item> <tag><c>ets_limit</c></tag> <item> <p>Returns the maximum number of ETS tables allowed. This diff --git a/erts/doc/src/erts_alloc.xml b/erts/doc/src/erts_alloc.xml index d3f725ef99..580780e73b 100644 --- a/erts/doc/src/erts_alloc.xml +++ b/erts/doc/src/erts_alloc.xml @@ -87,9 +87,9 @@ the number of system calls made.</item> </taglist> - <p><c>sys_alloc</c> and <c>literal_alloc</c> are always enabled and - cannot be disabled. <c>exec_alloc</c> is only available if it is needed - and cannot be disabled. <c>mseg_alloc</c> is always enabled if it is + <p><c>sys_alloc</c>, <c>literal_alloc</c> and <c>temp_alloc</c> are always + enabled and cannot be disabled. <c>exec_alloc</c> is only available if it + is needed and cannot be disabled. <c>mseg_alloc</c> is always enabled if it is available and an allocator that uses it is enabled. All other allocators can be <seealso marker="#M_e">enabled or disabled</seealso>. By default all allocators are enabled. diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 02a9a9a93b..915cad4e18 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -251,7 +251,7 @@ endif HIPEBEAMLDFLAGS=$($(ARCH)BEAMLDFLAGS) endif -ERTS_ENABLE_KERNEL_POLL=@ERTS_ENABLE_KERNEL_POLL@ +ERTS_BUILD_FALLBACK_POLL=@ERTS_BUILD_FALLBACK_POLL@ # # @@ -928,17 +928,15 @@ $(STATIC_NIF_LIBS) $(STATIC_DRIVER_LIBS): echo "=== Leaving lib after making static libs" endif -ifeq ($(ERTS_ENABLE_KERNEL_POLL),yes) +ifeq ($(ERTS_BUILD_FALLBACK_POLL),yes) OS_OBJS += $(OBJDIR)/erl_poll.kp.o \ - $(OBJDIR)/erl_check_io.kp.o \ - $(OBJDIR)/erl_poll.nkp.o \ - $(OBJDIR)/erl_check_io.nkp.o + $(OBJDIR)/erl_poll.nkp.o else -OS_OBJS += $(OBJDIR)/erl_poll.o \ - $(OBJDIR)/erl_check_io.o +OS_OBJS += $(OBJDIR)/erl_poll.o endif -OS_OBJS += $(OBJDIR)/erl_mseg.o \ +OS_OBJS += $(OBJDIR)/erl_check_io.o \ + $(OBJDIR)/erl_mseg.o \ $(OBJDIR)/erl_mmap.o \ $(OBJDIR)/erl_$(ERLANG_OSTYPE)_sys_ddll.o \ $(OBJDIR)/erl_mtrace_sys_wrap.o \ @@ -1112,7 +1110,7 @@ else SED_PREFIX= endif -ifeq ($(ERTS_ENABLE_KERNEL_POLL),yes) +ifeq ($(ERTS_BUILD_FALLBACK_POLL),yes) SED_SUFFIX=;$(SED_REPL_POLL);$(SED_REPL_CHK_IO) else SED_SUFFIX= diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 845cef24c7..88285d8be6 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -385,6 +385,7 @@ set_default_temp_alloc_opts(struct au_init *ip) SET_DEFAULT_ALLOC_OPTS(ip); ip->enable = AU_ALLOC_DEFAULT_ENABLE(1); ip->thr_spec = 1; + ip->disable_allowed = 0; ip->carrier_migration_allowed = 0; ip->atype = AFIT; ip->init.util.name_prefix = "temp_"; @@ -644,8 +645,6 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) = ERTS_MONITOR_SH_SIZE * sizeof(Uint); fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_NLINK_SH)] = ERTS_LINK_SH_SIZE * sizeof(Uint); - fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_DRV_EV_D_STATE)] - = sizeof(ErtsDrvEventDataState); fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_DRV_SEL_D_STATE)] = sizeof(ErtsDrvSelectDataState); fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_NIF_SEL_D_STATE)] @@ -1494,8 +1493,7 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) &init->ll_alloc, &init->driver_alloc, &init->fix_alloc, - &init->sl_alloc, - &init->temp_alloc + &init->sl_alloc /* test_alloc not affected by +Mea??? or +Mu??? */ }; int aui_sz = (int) sizeof(aui)/sizeof(aui[0]); diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h index c661d0b226..117f96a4ad 100644 --- a/erts/emulator/beam/erl_alloc.h +++ b/erts/emulator/beam/erl_alloc.h @@ -402,6 +402,32 @@ NAME##_free(TYPE *p) \ erts_free(ALCT, (void *) p); \ } +#define ERTS_THR_PREF_AUX(NAME, TYPE, PASZ) \ +ERTS_THR_PREF_PRE_ALLOC_IMPL(NAME##_pre, TYPE, PASZ) + +#define ERTS_THR_PREF_QUICK_ALLOC_IMPL(NAME, TYPE, PASZ, ALCT) \ +ERTS_THR_PREF_AUX(NAME, TYPE, PASZ) \ +static void \ +init_##NAME##_alloc(int nthreads) \ +{ \ + init_##NAME##_pre_alloc(nthreads); \ +} \ +static ERTS_INLINE TYPE * \ +NAME##_alloc(void) \ +{ \ + TYPE *res = NAME##_pre_alloc(); \ + if (!res) \ + res = erts_alloc(ALCT, sizeof(TYPE)); \ + return res; \ +} \ +static ERTS_INLINE void \ +NAME##_free(TYPE *p) \ +{ \ + if (!NAME##_pre_free(p)) \ + erts_free(ALCT, (void *) p); \ +} + + #ifdef DEBUG #define ERTS_PRE_ALLOC_SIZE(SZ) ((SZ) < 1000 ? (SZ)/10 + 10 : 100) #define ERTS_PRE_ALLOC_CLOBBER(P, T) memset((void *) (P), 0xfd, sizeof(T)) @@ -482,7 +508,8 @@ init_##NAME##_alloc(void) \ { \ sspa_data_##NAME##__ = \ erts_sspa_create(sizeof(union erts_sspa_##NAME##__), \ - ERTS_PRE_ALLOC_SIZE((PASZ))); \ + ERTS_PRE_ALLOC_SIZE((PASZ)), \ + 0, NULL); \ } \ \ static TYPE * \ @@ -504,6 +531,57 @@ NAME##_free(TYPE *p) \ (char *) p); \ } + +#define ERTS_THR_PREF_PRE_ALLOC_IMPL(NAME, TYPE, PASZ) \ +union erts_sspa_##NAME##__ { \ + erts_sspa_blk_t next; \ + TYPE type; \ +}; \ + \ +static erts_sspa_data_t *sspa_data_##NAME##__; \ + \ +static void \ +init_##NAME##_alloc(int nthreads) \ +{ \ + sspa_data_##NAME##__ = \ + erts_sspa_create(sizeof(union erts_sspa_##NAME##__), \ + ERTS_PRE_ALLOC_SIZE((PASZ)), \ + nthreads, \ + #NAME); \ +} \ + \ +void \ +erts_##NAME##_alloc_init_thread(void) \ +{ \ + int id = erts_atomic_inc_read_nob(&sspa_data_##NAME##__->id_generator);\ + if (id > sspa_data_##NAME##__->nthreads) { \ + erts_exit(ERTS_ABORT_EXIT, \ + "%s:%d:%s(): Too many threads for '" #NAME "'\n", \ + __FILE__, __LINE__, __func__); \ + } \ + erts_tsd_set(sspa_data_##NAME##__->tsd_key, (void*)(SWord)id); \ +} \ + \ +static TYPE * \ +NAME##_alloc(void) \ +{ \ + int id = (int)(SWord)erts_tsd_get(sspa_data_##NAME##__->tsd_key); \ + if (id == 0) \ + return NULL; \ + return (TYPE *) erts_sspa_alloc(sspa_data_##NAME##__, \ + id-1); \ +} \ + \ +static int \ +NAME##_free(TYPE *p) \ +{ \ + int id = (int)(SWord)erts_tsd_get(sspa_data_##NAME##__->tsd_key); \ + return erts_sspa_free(sspa_data_##NAME##__, \ + id - 1, \ + (char *) p); \ +} + + #ifdef DEBUG #define ERTS_ALC_DBG_BLK_SZ(PTR) (*(((UWord *) (PTR)) - 2)) #endif /* #ifdef DEBUG */ diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 11884299e2..bf5487d31e 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -351,11 +351,9 @@ type SYS_CHECK_REQ SHORT_LIVED SYSTEM system_check_request type TEMP_TERM TEMPORARY SYSTEM temp_term type DRV_TAB LONG_LIVED SYSTEM drv_tab type DRV_EV_STATE LONG_LIVED SYSTEM driver_event_state -type DRV_EV_D_STATE FIXED_SIZE SYSTEM driver_event_data_state type DRV_SEL_D_STATE FIXED_SIZE SYSTEM driver_select_data_state type NIF_SEL_D_STATE FIXED_SIZE SYSTEM enif_select_data_state type FD_LIST SHORT_LIVED SYSTEM fd_list -type ACTIVE_FD_ARR SHORT_LIVED SYSTEM active_fd_array type POLLSET LONG_LIVED SYSTEM pollset type POLLSET_UPDREQ SHORT_LIVED SYSTEM pollset_update_req type POLL_FDS LONG_LIVED SYSTEM poll_fds diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 36939d6acc..17c936f041 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -46,6 +46,7 @@ #include "erl_thr_progress.h" #include "erl_bif_unique.h" #include "erl_map.h" +#include "erl_check_io.h" #define ERTS_PTAB_WANT_DEBUG_FUNCS__ #include "erl_ptab.h" #include "erl_time.h" @@ -97,9 +98,6 @@ static char erts_system_version[] = ("Erlang/OTP " ERLANG_OTP_RELEASE #ifdef HIPE " [hipe]" #endif -#ifdef ERTS_ENABLE_KERNEL_POLL - " [kernel-poll:%s]" -#endif #ifdef ET_DEBUG #if ET_DEBUG " [type-assertions]" @@ -371,9 +369,6 @@ erts_print_system_version(fmtfn_t to, void *arg, Process *c_p) , total, online , dirty_cpu, dirty_cpu_onln, dirty_io , erts_async_max_threads -#ifdef ERTS_ENABLE_KERNEL_POLL - , erts_use_kernel_poll ? "true" : "false" -#endif ); } @@ -2695,7 +2690,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(make_small(CONTEXT_REDS)); } else if (ERTS_IS_ATOM_STR("kernel_poll", BIF_ARG_1)) { #ifdef ERTS_ENABLE_KERNEL_POLL - BIF_RET(erts_use_kernel_poll ? am_true : am_false); + BIF_RET(am_true); #else BIF_RET(am_false); #endif @@ -2848,7 +2843,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(am_disabled); } else if (ERTS_IS_ATOM_STR("eager_check_io",BIF_ARG_1)) { - BIF_RET(erts_eager_check_io ? am_true : am_false); + BIF_RET(am_true); } else if (ERTS_IS_ATOM_STR("literal_test",BIF_ARG_1)) { #ifdef ERTS_HAVE_IS_IN_LITERAL_RANGE @@ -3580,8 +3575,8 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1) (Uint) ciodi.no_used_fds), erts_bld_uint(hpp, szp, (Uint) ciodi.no_driver_select_structs), - erts_bld_uint(hpp, szp, - (Uint) ciodi.no_driver_event_structs)); + erts_bld_uint(hpp, szp, + (Uint) ciodi.no_enif_select_structs)); if (hpp) break; hp = HAlloc(BIF_P, sz); diff --git a/erts/emulator/beam/erl_driver.h b/erts/emulator/beam/erl_driver.h index 5ad616fec3..d5379a40d5 100644 --- a/erts/emulator/beam/erl_driver.h +++ b/erts/emulator/beam/erl_driver.h @@ -148,14 +148,6 @@ typedef struct _erl_drv_event* ErlDrvEvent; /* An event to be selected on. */ typedef struct _erl_drv_port* ErlDrvPort; /* A port descriptor. */ typedef struct _erl_drv_port* ErlDrvThreadData; /* Thread data. */ -#if !defined(__WIN32__) && !defined(_WIN32) && !defined(_WIN32_) && !defined(USE_SELECT) -struct erl_drv_event_data { - short events; - short revents; -}; -#endif -typedef struct erl_drv_event_data *ErlDrvEventData; /* Event data */ - typedef struct { unsigned long megasecs; unsigned long secs; @@ -270,10 +262,7 @@ typedef struct erl_drv_entry { unsigned int *flags); /* Works mostly like 'control', a synchronous call into the driver. */ - void (*event)(ErlDrvData drv_data, ErlDrvEvent event, - ErlDrvEventData event_data); - /* Called when an event selected by - driver_event() has occurred */ + void (*unused_event_callback)(void); int extended_marker; /* ERL_DRV_EXTENDED_MARKER */ int major_version; /* ERL_DRV_EXTENDED_MAJOR_VERSION */ int minor_version; /* ERL_DRV_EXTENDED_MINOR_VERSION */ @@ -340,8 +329,6 @@ EXTERN void erl_drv_busy_msgq_limits(ErlDrvPort port, ErlDrvSizeT *high); EXTERN int driver_select(ErlDrvPort port, ErlDrvEvent event, int mode, int on); -EXTERN int driver_event(ErlDrvPort port, ErlDrvEvent event, - ErlDrvEventData event_data); EXTERN int driver_output(ErlDrvPort port, char *buf, ErlDrvSizeT len); EXTERN int driver_output2(ErlDrvPort port, char *hbuf, ErlDrvSizeT hlen, diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 8c14c86219..6cef9bd0e3 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -49,6 +49,7 @@ #include "erl_bif_unique.h" #define ERTS_WANT_TIMER_WHEEL_API #include "erl_time.h" +#include "erl_check_io.h" #ifdef HIPE #include "hipe_mode_switch.h" /* for hipe_mode_switch_init() */ @@ -76,8 +77,10 @@ const char etp_otp_release[] = ERLANG_OTP_RELEASE; const char etp_compile_date[] = ERLANG_COMPILE_DATE; const char etp_arch[] = ERLANG_ARCHITECTURE; #ifdef ERTS_ENABLE_KERNEL_POLL +const int erts_use_kernel_poll = 1; const int etp_kernel_poll_support = 1; #else +const int erts_use_kernel_poll = 0; const int etp_kernel_poll_support = 0; #endif #if defined(ARCH_64) @@ -203,16 +206,16 @@ int erts_no_line_info = 0; /* -L: Don't load line information */ */ ErtsModifiedTimings erts_modified_timings[] = { - /* 0 */ {make_small(0), CONTEXT_REDS, INPUT_REDUCTIONS}, - /* 1 */ {make_small(0), (3*CONTEXT_REDS)/4, 2*INPUT_REDUCTIONS}, - /* 2 */ {make_small(0), CONTEXT_REDS/2, INPUT_REDUCTIONS/2}, - /* 3 */ {make_small(0), (7*CONTEXT_REDS)/8, 3*INPUT_REDUCTIONS}, - /* 4 */ {make_small(0), CONTEXT_REDS/3, 3*INPUT_REDUCTIONS}, - /* 5 */ {make_small(0), (10*CONTEXT_REDS)/11, INPUT_REDUCTIONS/2}, - /* 6 */ {make_small(1), CONTEXT_REDS/4, 2*INPUT_REDUCTIONS}, - /* 7 */ {make_small(1), (5*CONTEXT_REDS)/7, INPUT_REDUCTIONS/3}, - /* 8 */ {make_small(10), CONTEXT_REDS/5, 3*INPUT_REDUCTIONS}, - /* 9 */ {make_small(10), (6*CONTEXT_REDS)/7, INPUT_REDUCTIONS/4} + /* 0 */ {make_small(0), CONTEXT_REDS}, + /* 1 */ {make_small(0), (3*CONTEXT_REDS)/4}, + /* 2 */ {make_small(0), CONTEXT_REDS/2}, + /* 3 */ {make_small(0), (7*CONTEXT_REDS)/8}, + /* 4 */ {make_small(0), CONTEXT_REDS/3}, + /* 5 */ {make_small(0), (10*CONTEXT_REDS)/11}, + /* 6 */ {make_small(1), CONTEXT_REDS/4}, + /* 7 */ {make_small(1), (5*CONTEXT_REDS)/7}, + /* 8 */ {make_small(10), CONTEXT_REDS/5}, + /* 9 */ {make_small(10), (6*CONTEXT_REDS)/7} }; #define ERTS_MODIFIED_TIMING_LEVELS \ @@ -310,8 +313,9 @@ erl_init(int ncpu, erts_init_sys_common_misc(); erts_init_process(ncpu, proc_tab_sz, legacy_proc_tab); erts_init_scheduling(no_schedulers, - no_schedulers_online - , no_dirty_cpu_schedulers, + no_schedulers_online, + erts_no_poll_threads, + no_dirty_cpu_schedulers, no_dirty_cpu_schedulers_online, no_dirty_io_schedulers ); @@ -558,9 +562,19 @@ void erts_usage(void) erts_fprintf(stderr, "-hmqd val set default message queue data flag for processes,\n"); erts_fprintf(stderr, " valid values are: off_heap | on_heap\n"); + erts_fprintf(stderr, "-IOp number set number of pollsets to be used to poll for I/O,\n"); + erts_fprintf(stderr, " This value has to be equal or smaller than the\n"); + erts_fprintf(stderr, " number of poll threads. If the current platform\n"); + erts_fprintf(stderr, " does not support concurrent update of pollsets\n"); + erts_fprintf(stderr, " this value is ignored.\n"); + erts_fprintf(stderr, "-IOt number set number of threads to be used to poll for I/O\n"); + erts_fprintf(stderr, "-IOPp number set number of pollsets as a percentage of the\n"); + erts_fprintf(stderr, " number of poll threads."); + erts_fprintf(stderr, "-IOPt number set number of threads to be used to poll for I/O\n"); + erts_fprintf(stderr, " as a percentage of the number of schedulers."); + /* erts_fprintf(stderr, "-i module set the boot module (default init)\n"); */ - erts_fprintf(stderr, "-K boolean enable or disable kernel poll\n"); erts_fprintf(stderr, "-n[s|a|d] Control behavior of signals to ports\n"); erts_fprintf(stderr, " Note that this flag is deprecated!\n"); erts_fprintf(stderr, "-M<X> <Y> memory allocator switches,\n"); @@ -727,7 +741,6 @@ early_init(int *argc, char **argv) /* char envbuf[21]; /* enough for any 64-bit integer */ size_t envbufsz; - erts_save_emu_args(*argc, argv); erts_sched_compact_load = 1; @@ -839,6 +852,7 @@ early_init(int *argc, char **argv) /* } break; } + case 'S' : if (argv[i][2] == 'P') { int ptot, ponln; @@ -1100,6 +1114,9 @@ early_init(int *argc, char **argv) /* erts_alloc_init(argc, argv, &alloc_opts); /* Handles (and removes) -M flags. */ /* Require allocators */ + + erts_init_check_io(argc, argv); + /* * Thread progress management: * @@ -1107,13 +1124,14 @@ early_init(int *argc, char **argv) /* * ** Scheduler threads (see erl_process.c) * ** Aux thread (see erl_process.c) * ** Sys message dispatcher thread (see erl_trace.c) + * ** IO Poll threads (see erl_check_io.c) * * * Unmanaged threads that need to register: * ** Async threads (see erl_async.c) * ** Dirty scheduler threads */ erts_thr_progress_init(no_schedulers, - no_schedulers+2, + no_schedulers+2+erts_no_poll_threads, erts_async_max_threads + erts_no_dirty_cpu_schedulers + erts_no_dirty_io_schedulers @@ -1561,16 +1579,6 @@ erl_start(int argc, char **argv) have_break_handler = 0; break; - case 'K': - /* If kernel poll support is present, - erl_sys_args() will remove the K parameter - and value */ - get_arg(argv[i]+2, argv[i+1], &i); - erts_fprintf(stderr, - "kernel-poll not supported; \"K\" parameter ignored\n", - arg); - break; - case 'n': arg = get_arg(argv[i]+2, argv[i+1], &i); switch (arg[0]) { @@ -1740,22 +1748,9 @@ erl_start(int argc, char **argv) erts_usage(); } } - else if (has_prefix("ecio", sub_param)) { - arg = get_arg(sub_param+4, argv[i+1], &i); -#ifndef __OSE__ - if (sys_strcmp("true", arg) == 0) - erts_eager_check_io = 1; - else -#endif - if (sys_strcmp("false", arg) == 0) - erts_eager_check_io = 0; - else { - erts_fprintf(stderr, - "bad schedule eager check I/O value '%s'\n", - arg); - erts_usage(); - } - } + else if (has_prefix("ecio", sub_param)) { + /* ignore argument, eager check io no longer used */ + } else if (has_prefix("pp", sub_param)) { arg = get_arg(sub_param+2, argv[i+1], &i); if (sys_strcmp(arg, "true") == 0) diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index f81c90818f..4cdef0200f 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -113,7 +113,6 @@ static erts_lc_lock_order_t erts_lock_order[] = { { "drv_ev_state_grow", NULL, }, { "drv_ev_state", "address" }, { "safe_hash", "address" }, - { "pollset_rm_list", NULL }, { "removed_fd_pre_alloc_lock", "address" }, { "state_prealloc", NULL }, { "schdlr_sspnd", NULL }, diff --git a/erts/emulator/beam/erl_lock_count.c b/erts/emulator/beam/erl_lock_count.c index d2e8f47d59..1ae6076b12 100644 --- a/erts/emulator/beam/erl_lock_count.c +++ b/erts/emulator/beam/erl_lock_count.c @@ -554,16 +554,6 @@ erts_lock_flags_t erts_lcnt_get_category_mask() { return lcnt_category_mask__; } -#ifdef ERTS_ENABLE_KERNEL_POLL -/* erl_poll/erl_check_io only exports one of these variants at a time, and we - * may need to use either one depending on emulator startup flags. */ -void erts_lcnt_update_pollset_locks_nkp(int); -void erts_lcnt_update_pollset_locks_kp(int); - -void erts_lcnt_update_cio_locks_nkp(int); -void erts_lcnt_update_cio_locks_kp(int); -#endif - void erts_lcnt_set_category_mask(erts_lock_flags_t mask) { erts_lock_flags_t changed_categories; @@ -590,19 +580,7 @@ void erts_lcnt_set_category_mask(erts_lock_flags_t mask) { } if(changed_categories & ERTS_LOCK_FLAGS_CATEGORY_IO) { -#ifdef ERTS_ENABLE_KERNEL_POLL - if(erts_use_kernel_poll) { - erts_lcnt_update_pollset_locks_kp(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); - erts_lcnt_update_cio_locks_kp(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); - } else { - erts_lcnt_update_pollset_locks_nkp(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); - erts_lcnt_update_cio_locks_nkp(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); - } -#else - erts_lcnt_update_pollset_locks(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); erts_lcnt_update_cio_locks(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); -#endif - erts_lcnt_update_driver_locks(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); erts_lcnt_update_port_locks(mask & ERTS_LOCK_FLAGS_CATEGORY_IO); } diff --git a/erts/emulator/beam/erl_msacc.h b/erts/emulator/beam/erl_msacc.h index 8349a7e297..2d4637f800 100644 --- a/erts/emulator/beam/erl_msacc.h +++ b/erts/emulator/beam/erl_msacc.h @@ -318,8 +318,8 @@ ERTS_GLB_INLINE void erts_msacc_set_state_um__(ErtsMsAcc *msacc, Uint new_state, int increment) { if (ERTS_UNLIKELY(msacc->unmanaged)) { erts_mtx_lock(&msacc->mtx); - msacc->state = new_state; if (ERTS_LIKELY(!msacc->perf_counter)) { + msacc->state = new_state; erts_mtx_unlock(&msacc->mtx); return; } diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c index 14977dfa17..a588477320 100644 --- a/erts/emulator/beam/erl_port_task.c +++ b/erts/emulator/beam/erl_port_task.c @@ -36,6 +36,7 @@ #include "erl_check_io.h" #include "dtrace-wrapper.h" #include "lttng-wrapper.h" +#include "erl_check_io.h" #include <stdarg.h> /* @@ -90,8 +91,6 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q erts_atomic_read_nob(&(PP)->run_queue))); \ } while (0) -erts_atomic_t erts_port_task_outstanding_io_tasks; - #define ERTS_PT_STATE_SCHEDULED 0 #define ERTS_PT_STATE_ABORTED 1 #define ERTS_PT_STATE_EXECUTING 2 @@ -99,7 +98,6 @@ erts_atomic_t erts_port_task_outstanding_io_tasks; typedef union { struct { /* I/O tasks */ ErlDrvEvent event; - ErlDrvEventData event_data; } io; struct { ErtsProc2PortSigCallback callback; @@ -149,10 +147,10 @@ static void begin_port_cleanup(Port *pp, ErtsPortTask **execq, int *processing_busy_q_p); -ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(port_task, - ErtsPortTask, - 1000, - ERTS_ALC_T_PORT_TASK) +ERTS_THR_PREF_QUICK_ALLOC_IMPL(port_task, + ErtsPortTask, + 1000, + ERTS_ALC_T_PORT_TASK) ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(busy_caller_table, ErtsPortTaskBusyCallerTable, @@ -585,8 +583,9 @@ reset_executed_io_task_handle(ErtsPortTask *ptp) { if (ptp->u.alive.handle) { ASSERT(ptp == handle2task(ptp->u.alive.handle)); - erts_io_notify_port_task_executed(ptp->u.alive.handle); - reset_port_task_handle(ptp->u.alive.handle); + /* The port task handle is reset inside task_executed */ + erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, + reset_port_task_handle); } } @@ -1308,21 +1307,7 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp) if (old_state != ERTS_PT_STATE_SCHEDULED) res = - 1; /* Task already aborted, executing, or executed */ else { - reset_port_task_handle(pthp); - - switch (ptp->type) { - case ERTS_PORT_TASK_INPUT: - case ERTS_PORT_TASK_OUTPUT: - case ERTS_PORT_TASK_EVENT: - ASSERT(erts_atomic_read_nob( - &erts_port_task_outstanding_io_tasks) > 0); - erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks); - break; - default: - break; - } - res = 0; } } @@ -1459,16 +1444,6 @@ erts_port_task_schedule(Eterm id, va_start(argp, type); ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent); va_end(argp); - erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks); - break; - } - case ERTS_PORT_TASK_EVENT: { - va_list argp; - va_start(argp, type); - ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent); - ptp->u.alive.td.io.event_data = va_arg(argp, ErlDrvEventData); - va_end(argp); - erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks); break; } case ERTS_PORT_TASK_PROC_SIG: { @@ -1644,13 +1619,12 @@ erts_port_task_free_port(Port *pp) * scheduling of processes. Run-queue lock should be held by caller. */ -int +void erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) { Port *pp; ErtsPortTask *execq; int processing_busy_q; - int res = 0; int vreds = 0; int reds = 0; erts_aint_t io_tasks_executed = 0; @@ -1665,7 +1639,6 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) pp = pop_port(runq); if (!pp) { - res = 0; goto done; } @@ -1768,17 +1741,6 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) reset_executed_io_task_handle(ptp); io_tasks_executed++; break; - case ERTS_PORT_TASK_EVENT: - reds = ERTS_PORT_REDS_EVENT; - ASSERT((state & ERTS_PORT_SFLGS_DEAD) == 0); - DTRACE_DRIVER(driver_event, pp); - LTTNG_DRIVER(driver_event, pp); - (*pp->drv_ptr->event)((ErlDrvData) pp->drv_data, - ptp->u.alive.td.io.event, - ptp->u.alive.td.io.event_data); - reset_executed_io_task_handle(ptp); - io_tasks_executed++; - break; case ERTS_PORT_TASK_PROC_SIG: { ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data; reset_handle(ptp); @@ -1843,14 +1805,6 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) erts_unblock_fpe(fpe_was_unmasked); ERTS_MSACC_POP_STATE_M(); - - if (io_tasks_executed) { - ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks) - >= io_tasks_executed); - erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks, - -1*io_tasks_executed); - } - ASSERT(runq == (ErtsRunQueue *) erts_atomic_read_nob(&pp->run_queue)); active = finalize_exec(pp, &execq, processing_busy_q); @@ -1891,15 +1845,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) } done: - res = (erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks) - != (erts_aint_t) 0); runq->scheduler->reductions += reds; ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq)); ERTS_PORT_REDUCTIONS_EXECUTED(esdp, runq, reds); - - return res; } static void @@ -2031,13 +1981,6 @@ begin_port_cleanup(Port *pp, ErtsPortTask **execqp, int *processing_busy_q_p) DO_WRITE, 1); break; - case ERTS_PORT_TASK_EVENT: - erts_stale_drv_select(pp->common.id, - ERTS_Port2ErlDrvPort(pp), - ptp->u.alive.td.io.event, - 0, - 1); - break; case ERTS_PORT_TASK_DIST_CMD: break; case ERTS_PORT_TASK_PROC_SIG: { @@ -2151,8 +2094,7 @@ erts_dequeue_port(ErtsRunQueue *rq) void erts_port_task_init(void) { - erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks, - (erts_aint_t) 0); - init_port_task_alloc(); + init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads + + 1); /* aux_thread */ init_busy_caller_table_alloc(); } diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h index 561f4ca936..ae78a7d8a3 100644 --- a/erts/emulator/beam/erl_port_task.h +++ b/erts/emulator/beam/erl_port_task.h @@ -56,17 +56,11 @@ typedef erts_atomic_t ErtsPortTaskHandle; typedef enum { ERTS_PORT_TASK_INPUT, ERTS_PORT_TASK_OUTPUT, - ERTS_PORT_TASK_EVENT, ERTS_PORT_TASK_TIMEOUT, ERTS_PORT_TASK_DIST_CMD, ERTS_PORT_TASK_PROC_SIG } ErtsPortTaskType; -#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS -/* NOTE: Do not access any of the exported variables directly */ -extern erts_atomic_t erts_port_task_outstanding_io_tasks; -#endif - #define ERTS_PTS_FLG_IN_RUNQ (((erts_aint32_t) 1) << 0) #define ERTS_PTS_FLG_EXEC (((erts_aint32_t) 1) << 1) #define ERTS_PTS_FLG_HAVE_TASKS (((erts_aint32_t) 1) << 2) @@ -140,10 +134,6 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp); -#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS -ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void); -#endif - #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE void @@ -221,24 +211,16 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp) erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING); } -#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS - -ERTS_GLB_INLINE int -erts_port_task_have_outstanding_io_tasks(void) -{ - return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks) - != 0); -} - -#endif /* ERTS_INCLUDE_SCHEDULER_INTERNALS */ - #endif #ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS -int erts_port_task_execute(ErtsRunQueue *, Port **); +void erts_port_task_execute(ErtsRunQueue *, Port **); void erts_port_task_init(void); #endif +/* generated for 'port_task' quick allocator */ +void erts_port_task_pre_alloc_init_thread(void); + void erts_port_task_tmp_handle_detach(ErtsPortTaskHandle *); int erts_port_task_abort(ErtsPortTaskHandle *); diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index f3f99f1835..61fdf86a56 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -24,6 +24,8 @@ # include "config.h" #endif +#define ERTS_WANT_BREAK_HANDLING + #include <stddef.h> /* offsetof() */ #include "sys.h" #include "erl_vm.h" @@ -49,6 +51,8 @@ #define ERTS_WANT_TIMER_WHEEL_API #include "erl_time.h" #include "erl_nfunc_sched.h" +#include "erl_check_io.h" +#include "erl_poll.h" #define ERTS_CHECK_TIME_REDS CONTEXT_REDS #define ERTS_DELAYED_WAKEUP_INFINITY (~(Uint64) 0) @@ -170,7 +174,6 @@ extern BeamInstr beam_exit[]; extern BeamInstr beam_continue_exit[]; int ERTS_WRITE_UNLIKELY(erts_default_spo_flags) = SPO_ON_HEAP_MSGQ; -int ERTS_WRITE_UNLIKELY(erts_eager_check_io) = 1; int ERTS_WRITE_UNLIKELY(erts_sched_compact_load); int ERTS_WRITE_UNLIKELY(erts_sched_balance_util) = 0; Uint ERTS_WRITE_UNLIKELY(erts_no_schedulers); @@ -198,7 +201,7 @@ int erts_dio_sched_thread_suggested_stack_size = -1; ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE]; #endif -static struct { +static struct ErtsSchedBusyWait_ { int aux_work; int tse; int sys_schedule; @@ -207,25 +210,26 @@ static struct { int erts_disable_proc_not_running_opt; static ErtsAuxWorkData *aux_thread_aux_work_data; +static ErtsAuxWorkData *poll_thread_aux_work_data; #define ERTS_SCHDLR_SSPND_CHNG_NMSB (((erts_aint32_t) 1) << 0) #define ERTS_SCHDLR_SSPND_CHNG_MSB (((erts_aint32_t) 1) << 1) #define ERTS_SCHDLR_SSPND_CHNG_ONLN (((erts_aint32_t) 1) << 2) #define ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN (((erts_aint32_t) 1) << 3) -typedef struct { +typedef struct ErtsMultiSchedulingBlock_ { int ongoing; ErtsProcList *blckrs; ErtsProcList *chngq; } ErtsMultiSchedulingBlock; -typedef struct { +typedef struct ErtsSchedTypeCounters_ { Uint32 normal; Uint32 dirty_cpu; Uint32 dirty_io; } ErtsSchedTypeCounters; -static struct { +static struct ErtsSchedSuspend_ { erts_mtx_t mtx; ErtsSchedTypeCounters online; ErtsSchedTypeCounters curr_online; @@ -364,9 +368,6 @@ erts_sched_stat_t erts_sched_stat; static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key); -static erts_atomic32_t function_calls; - -static erts_atomic32_t doing_sys_schedule; static erts_atomic32_t no_empty_run_queues; long erts_runq_supervision_interval = 0; static ethr_event runq_supervision_event; @@ -434,6 +435,7 @@ typedef union { static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info; static ErtsAlignedSchedulerSleepInfo *aligned_dirty_cpu_sched_sleep_info; static ErtsAlignedSchedulerSleepInfo *aligned_dirty_io_sched_sleep_info; +static ErtsAlignedSchedulerSleepInfo *aligned_poll_thread_sleep_info; static Uint last_reductions; static Uint last_exact_reductions; @@ -501,9 +503,13 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, 200, ERTS_ALC_T_PROC_LIST) +#define ERTS_POLL_THREAD_SLEEP_INFO_IX(IX) \ + (ASSERT(0 <= ((int) (IX)) \ + && ((int) (IX)) < ((int) erts_no_poll_threads)), \ + &aligned_poll_thread_sleep_info[(IX)].ssi) #define ERTS_SCHED_SLEEP_INFO_IX(IX) \ - (ASSERT(-1 <= ((int) (IX)) \ - && ((int) (IX)) < ((int) erts_no_schedulers)), \ + (ASSERT(((int)-1) <= ((int) (IX)) \ + && ((int) (IX)) < ((int) erts_no_schedulers)), \ &aligned_sched_sleep_info[(IX)].ssi) #define ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(IX) \ (ASSERT(0 <= ((int) (IX)) \ @@ -1135,13 +1141,11 @@ dirty_sched_wall_time_change(ErtsSchedulerData *esdp, int working) mod++; erts_atomic32_set_nob(&esdp->sched_wall_time.u.mod, mod); -#if 0 if (!working) { - ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_BUSY_WAIT); + ERTS_MSACC_SET_STATE_X(ERTS_MSACC_STATE_BUSY_WAIT); } else { - ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_OTHER); + ERTS_MSACC_SET_STATE_X(ERTS_MSACC_STATE_OTHER); } -#endif } @@ -1559,18 +1563,19 @@ erts_psd_set_init(Process *p, int ix, void *data) void -erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flags) +erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, + erts_aint32_t flags) { switch (flags & ERTS_SSI_FLGS_SLEEP_TYPE) { case ERTS_SSI_FLG_POLL_SLEEPING: - erts_sys_schedule_interrupt(1); + erts_check_io_interrupt(ssi->psi, 1); break; case ERTS_SSI_FLG_POLL_SLEEPING|ERTS_SSI_FLG_TSE_SLEEPING: /* * Thread progress blocking while poll sleeping; need * to signal on both... */ - erts_sys_schedule_interrupt(1); + erts_check_io_interrupt(ssi->psi, 1); /* fall through */ case ERTS_SSI_FLG_TSE_SLEEPING: erts_tse_set(ssi->event); @@ -2830,36 +2835,6 @@ erts_set_aux_work_timeout(int ix, erts_aint32_t type, int enable) return old; } - - -static ERTS_INLINE void -sched_waiting_sys(Uint no, ErtsRunQueue *rq) -{ - ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); - ASSERT(rq->waiting >= 0); - (void) ERTS_RUNQ_FLGS_SET(rq, (ERTS_RUNQ_FLG_OUT_OF_WORK - | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); - rq->waiting++; - rq->waiting *= -1; - rq->woken = 0; - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(no), am_inactive); -} - -static ERTS_INLINE void -sched_active_sys(Uint no, ErtsRunQueue *rq) -{ - ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); - - ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); - - ASSERT(rq->waiting < 0); - rq->waiting *= -1; - rq->waiting--; - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(no), am_active); -} - Uint erts_active_schedulers(void) { @@ -2870,49 +2845,6 @@ erts_active_schedulers(void) return as; } - -static ERTS_INLINE void -clear_sys_scheduling(void) -{ - erts_atomic32_set_mb(&doing_sys_schedule, 0); -} - -static ERTS_INLINE int -try_set_sys_scheduling(void) -{ - return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0); -} - - -static ERTS_INLINE int -prepare_for_sys_schedule(int non_blocking) -{ - if (non_blocking && erts_eager_check_io) { - return try_set_sys_scheduling(); - } - else { - while (!erts_port_task_have_outstanding_io_tasks() - && try_set_sys_scheduling()) { - if (!erts_port_task_have_outstanding_io_tasks()) - return 1; - clear_sys_scheduling(); - } - return 0; - } -} - - -static ERTS_INLINE void -sched_change_waiting_sys_to_waiting(Uint no, ErtsRunQueue *rq) -{ - ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); - - ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); - - ASSERT(rq->waiting < 0); - rq->waiting *= -1; -} - static ERTS_INLINE void sched_waiting(Uint no, ErtsRunQueue *rq) { @@ -3084,7 +3016,7 @@ sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, erts_aint32_t sleep_type) erts_tse_reset(ssi->event); else { ASSERT(sleep_type == ERTS_SSI_FLG_POLL_SLEEPING); - erts_sys_schedule_interrupt(0); + erts_check_io_interrupt(ssi->psi, 0); } while (1) { @@ -3152,6 +3084,12 @@ thr_prgr_fin_wait(void *vssi) static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp); +void +erts_aux_thread_poke() +{ + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(-1)); +} + static void * aux_thread(void *unused) { @@ -3160,6 +3098,7 @@ aux_thread(void *unused) erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; int thr_prgr_active = 1; + ERTS_MSACC_DECLARE_CACHE(); #ifdef ERTS_ENABLE_LOCK_CHECK { @@ -3168,6 +3107,7 @@ aux_thread(void *unused) } #endif + erts_port_task_pre_alloc_init_thread(); ssi->event = erts_tse_fetch(); erts_msacc_init_thread("aux", 1, 1); @@ -3182,9 +3122,14 @@ aux_thread(void *unused) init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; +#if ERTS_POLL_USE_FALLBACK + ssi->psi = erts_create_pollset_thread(-1); +#endif sched_prep_spin_wait(ssi); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); + while (1) { erts_aint32_t flgs; @@ -3193,30 +3138,54 @@ aux_thread(void *unused) if (!thr_prgr_active) erts_thr_progress_active(NULL, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); + ERTS_MSACC_UPDATE_CACHE(); if (aux_work && erts_thr_progress_update(NULL)) erts_thr_progress_leader_update(NULL); } if (!aux_work) { + +#ifdef ERTS_BREAK_REQUESTED + if (ERTS_BREAK_REQUESTED) + erts_do_break_handling(); +#endif + if (thr_prgr_active) erts_thr_progress_active(NULL, thr_prgr_active = 0); - erts_thr_progress_prepare_wait(NULL); + +#if ERTS_POLL_USE_FALLBACK flgs = sched_spin_wait(ssi, 0); if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(ssi->psi); + } + } +#else + erts_thr_progress_prepare_wait(NULL); + + flgs = sched_spin_wait(ssi, 0); + + if (flgs & ERTS_SSI_FLG_SLEEPING) { flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); if (flgs & ERTS_SSI_FLG_SLEEPING) { - int res; + int res; ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - do { - res = erts_tse_wait(ssi->event); - } while (res == EINTR); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); } - } - erts_thr_progress_finalize_wait(NULL); + } + erts_thr_progress_finalize_wait(NULL); +#endif } flgs = sched_prep_spin_wait(ssi); @@ -3224,8 +3193,79 @@ aux_thread(void *unused) return NULL; } -static void suspend_scheduler(ErtsSchedulerData *esdp); +static void * +poll_thread(void *arg) +{ + int id = (int)(UWord)arg; + ErtsAuxWorkData *awdp = poll_thread_aux_work_data+id; + ErtsSchedulerSleepInfo *ssi = ERTS_POLL_THREAD_SLEEP_INFO_IX(id); + erts_aint32_t aux_work; + ErtsThrPrgrCallbacks callbacks; + int thr_prgr_active = 1; + struct erts_poll_thread *psi = erts_create_pollset_thread(id); + ERTS_MSACC_DECLARE_CACHE(); + +#ifdef ERTS_ENABLE_LOCK_CHECK + { + char buf[] = "poll_thread"; + erts_lc_set_thread_name(buf); + } +#endif + + erts_port_task_pre_alloc_init_thread(); + ssi->event = erts_tse_fetch(); + + erts_msacc_init_thread("poll", id, 0); + + callbacks.arg = (void *) ssi; + callbacks.wakeup = thr_prgr_wakeup; + callbacks.prepare_wait = thr_prgr_prep_wait; + callbacks.wait = thr_prgr_wait; + callbacks.finalize_wait = thr_prgr_fin_wait; + + erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + init_aux_work_data(awdp, NULL, NULL); + awdp->ssi = ssi; + ssi->psi = psi; + + sched_prep_spin_wait(ssi); + + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); + while (1) { + erts_aint32_t flgs; + + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + if (aux_work) { + if (!thr_prgr_active) + erts_thr_progress_active(NULL, thr_prgr_active = 1); + aux_work = handle_aux_work(awdp, aux_work, 1); + ERTS_MSACC_UPDATE_CACHE(); + if (aux_work && erts_thr_progress_update(NULL)) + erts_thr_progress_leader_update(NULL); + } + + if (!aux_work) { + if (thr_prgr_active) + erts_thr_progress_active(NULL, thr_prgr_active = 0); + + flgs = sched_spin_wait(ssi, 0); + + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(psi); + } + } + } + + flgs = sched_prep_spin_wait(ssi); + } + return NULL; +} static void scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) @@ -3260,318 +3300,136 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) dirty_active(esdp, -1); } - /* - * If all schedulers are waiting, one of them *should* - * be waiting in erl_sys_schedule() - */ - - if (ERTS_SCHEDULER_IS_DIRTY(esdp) || !prepare_for_sys_schedule(0)) { - - sched_waiting(esdp->no, rq); - - erts_runq_unlock(rq); - - spincount = sched_busy_wait.tse; + sched_waiting(esdp->no, rq); - tse_wait: - - if (ERTS_SCHEDULER_IS_DIRTY(esdp)) - dirty_sched_wall_time_change(esdp, working = 0); - else if (thr_prgr_active != working) - sched_wall_time_change(esdp, working = thr_prgr_active); - - while (1) { - ErtsMonotonicTime current_time = 0; + erts_runq_unlock(rq); - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); - ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); - } + spincount = sched_busy_wait.tse; - if (aux_work) { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - flgs = erts_atomic32_read_acqb(&ssi->flags); - current_time = erts_get_monotonic_time(esdp); - if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - erts_bump_timers(esdp->timer_wheel, current_time); - } - } - } - else { - ErtsMonotonicTime timeout_time; - int do_timeout = 0; - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - timeout_time = erts_check_next_timeout_time(esdp); - current_time = erts_get_monotonic_time(esdp); - do_timeout = (current_time >= timeout_time); - } else { - current_time = 0; - timeout_time = ERTS_MONOTONIC_TIME_MAX; - } - if (do_timeout) { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - } - else { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); - sched_wall_time_change(esdp, 0); - } - erts_thr_progress_prepare_wait(esdp); - } + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) + dirty_sched_wall_time_change(esdp, working = 0); + else if (thr_prgr_active != working) + sched_wall_time_change(esdp, working = thr_prgr_active); - flgs = sched_spin_wait(ssi, spincount); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - int res; - ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : - erts_get_monotonic_time(esdp); - do { - Sint64 timeout; - if (current_time >= timeout_time) - break; - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - timeout = ERTS_MONOTONIC_TO_NSEC(timeout_time - - current_time - - 1) + 1; - } else - timeout = -1; - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - res = erts_tse_twait(ssi->event, timeout); - ERTS_MSACC_POP_STATE_M(); - current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : - erts_get_monotonic_time(esdp); - } while (res == EINTR); - } - } - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) - erts_thr_progress_finalize_wait(esdp); - } - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time) - erts_bump_timers(esdp->timer_wheel, current_time); - } + while (1) { + ErtsMonotonicTime current_time = 0; - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - break; - } + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); + ERTS_MSACC_UPDATE_CACHE(); + if (aux_work && erts_thr_progress_update(esdp)) + erts_thr_progress_leader_update(esdp); + } - flgs = sched_prep_cont_spin_wait(ssi); - spincount = sched_busy_wait.aux_work; + if (aux_work) { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + flgs = erts_atomic32_read_acqb(&ssi->flags); + current_time = erts_get_monotonic_time(esdp); + if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + erts_bump_timers(esdp->timer_wheel, current_time); + } + } + } + else { + ErtsMonotonicTime timeout_time; + int do_timeout = 0; + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + timeout_time = erts_check_next_timeout_time(esdp); + current_time = erts_get_monotonic_time(esdp); + do_timeout = (current_time >= timeout_time); + } else { + current_time = 0; + timeout_time = ERTS_MONOTONIC_TIME_MAX; + } + if (do_timeout) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + } + else { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + erts_thr_progress_prepare_wait(esdp); + } - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - break; - } + flgs = sched_spin_wait(ssi, spincount); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + int res; + ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : + erts_get_monotonic_time(esdp); + do { + Sint64 timeout; + if (current_time >= timeout_time) + break; + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + timeout = ERTS_MONOTONIC_TO_NSEC(timeout_time + - current_time + - 1) + 1; + } else + timeout = -1; + ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); + res = erts_tse_twait(ssi->event, timeout); + ERTS_MSACC_POP_STATE_M(); + current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : + erts_get_monotonic_time(esdp); + } while (res == EINTR); + } + } + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) + erts_thr_progress_finalize_wait(esdp); + } + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time) + erts_bump_timers(esdp->timer_wheel, current_time); + } - } + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + break; + } - if (flgs & ~(ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC)) - erts_atomic32_read_band_nob(&ssi->flags, - (ERTS_SSI_FLG_SUSPENDED - | ERTS_SSI_FLG_MSB_EXEC)); + flgs = sched_prep_cont_spin_wait(ssi); + spincount = sched_busy_wait.aux_work; - if (ERTS_SCHEDULER_IS_DIRTY(esdp)) - dirty_sched_wall_time_change(esdp, working = 1); - else if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + break; } - erts_runq_lock(rq); - sched_active(esdp->no, rq); - } - else - { - - erts_atomic32_set_relb(&function_calls, 0); - *fcalls = 0; - - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); - - sched_waiting_sys(esdp->no, rq); - - erts_runq_unlock(rq); - - ASSERT(working); - sched_wall_time_change(esdp, working = 0); - - spincount = sched_busy_wait.sys_schedule; - if (spincount == 0) - goto sys_aux_work; - - while (spincount-- > 0) { - ErtsMonotonicTime current_time; - - sys_poll_aux_work: - - if (working) - sched_wall_time_change(esdp, working = 0); - - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_CHECK_IO); - - ASSERT(!erts_port_task_have_outstanding_io_tasks()); - LTTNG2(scheduler_poll, esdp->no, 1); - erl_sys_schedule(1); /* Might give us something to do */ - - ERTS_MSACC_POP_STATE_M(); - - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - current_time = erts_get_monotonic_time(esdp); - if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) - erts_bump_timers(esdp->timer_wheel, current_time); - } - - sys_aux_work: - - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (!working) - sched_wall_time_change(esdp, working = 1); - if (!thr_prgr_active) - erts_thr_progress_active(esdp, thr_prgr_active = 1); - aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); - ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); - } - - flgs = erts_atomic32_read_acqb(&ssi->flags); - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - goto sys_woken; - } - - /* - * If we got new I/O tasks we aren't allowed to - * call erl_sys_schedule() until it is handled. - */ - if (erts_port_task_have_outstanding_io_tasks()) { - clear_sys_scheduling(); - /* - * Got to check that we still got I/O tasks; otherwise - * we have to continue checking for I/O... - */ - if (!prepare_for_sys_schedule(0)) { - spincount *= ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT; - goto tse_wait; - } - } - } - - erts_runq_lock(rq); - - /* - * If we got new I/O tasks we aren't allowed to - * sleep in erl_sys_schedule(). - */ - if (erts_port_task_have_outstanding_io_tasks()) { - clear_sys_scheduling(); - - /* - * Got to check that we still got I/O tasks; otherwise - * we have to wait in erl_sys_schedule() after all... - */ - if (!prepare_for_sys_schedule(0)) { - /* - * Not allowed to wait in erl_sys_schedule; - * do tse wait instead... - */ - sched_change_waiting_sys_to_waiting(esdp->no, rq); - erts_runq_unlock(rq); - spincount = 0; - goto tse_wait; - } - } - if (aux_work) { - erts_runq_unlock(rq); - goto sys_poll_aux_work; - } - flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); - if (!(flgs & ERTS_SSI_FLG_SLEEPING)) { - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - goto sys_locked_woken; - } - erts_runq_unlock(rq); - flgs = sched_prep_cont_spin_wait(ssi); - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - goto sys_woken; - } - ASSERT(!erts_port_task_have_outstanding_io_tasks()); - goto sys_poll_aux_work; - } - - ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - - erts_runq_unlock(rq); - - if (working) - sched_wall_time_change(esdp, working = 0); - - if (thr_prgr_active) - erts_thr_progress_active(esdp, thr_prgr_active = 0); - - ASSERT(!erts_port_task_have_outstanding_io_tasks()); - - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_CHECK_IO); - LTTNG2(scheduler_poll, esdp->no, 0); - erl_sys_schedule(0); + if (flgs & ~(ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC)) + erts_atomic32_read_band_nob(&ssi->flags, + (ERTS_SSI_FLG_SUSPENDED + | ERTS_SSI_FLG_MSB_EXEC)); - ERTS_MSACC_POP_STATE_M(); - - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - ErtsMonotonicTime current_time = erts_get_monotonic_time(esdp); - if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) - erts_bump_timers(esdp->timer_wheel, current_time); - } - - flgs = sched_prep_cont_spin_wait(ssi); - if (flgs & ERTS_SSI_FLG_WAITING) - goto sys_aux_work; - - sys_woken: - if (!thr_prgr_active) - erts_thr_progress_active(esdp, thr_prgr_active = 1); - erts_runq_lock(rq); - sys_locked_woken: - if (!thr_prgr_active) { - erts_runq_unlock(rq); - erts_thr_progress_active(esdp, thr_prgr_active = 1); - erts_runq_lock(rq); - } - clear_sys_scheduling(); - if (flgs & ~(ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC)) - erts_atomic32_read_band_nob(&ssi->flags, - (ERTS_SSI_FLG_SUSPENDED - | ERTS_SSI_FLG_MSB_EXEC)); - if (!working) - sched_wall_time_change(esdp, working = 1); - sched_active_sys(esdp->no, rq); + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) + dirty_sched_wall_time_change(esdp, working = 1); + else if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); } + erts_runq_lock(rq); + sched_active(esdp->no, rq); + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) dirty_active(esdp, 1); @@ -5872,6 +5730,7 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num, shadow_proc->static_flags = ERTS_STC_FLG_SHADOW_PROC; } + ssi->esdp = esdp; esdp->ssi = ssi; esdp->current_process = NULL; esdp->current_port = NULL; @@ -5902,8 +5761,8 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num, } void -erts_init_scheduling(int no_schedulers, int no_schedulers_online - , int no_dirty_cpu_schedulers, int no_dirty_cpu_schedulers_online, +erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_threads, + int no_dirty_cpu_schedulers, int no_dirty_cpu_schedulers_online, int no_dirty_io_schedulers ) { @@ -5930,6 +5789,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online ASSERT(no_dirty_cpu_schedulers >= 1); ASSERT(no_dirty_cpu_schedulers_online <= no_schedulers_online); ASSERT(no_dirty_cpu_schedulers_online >= 1); + ASSERT(erts_no_poll_threads == no_poll_threads); /* Create and initialize run queues */ @@ -6027,7 +5887,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online erts_no_total_schedulers += no_dirty_io_schedulers; /* Create and initialize scheduler sleep info */ - no_ssi = n+1; + no_ssi = n + 1 /* aux thread */; aligned_sched_sleep_info = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_SLP_INFO, @@ -6038,12 +5898,13 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online ssi->next = NULL; ssi->prev = NULL; #endif + ssi->esdp = NULL; erts_atomic32_init_nob(&ssi->flags, 0); ssi->event = NULL; /* initialized in sched_thread_func */ erts_atomic32_init_nob(&ssi->aux_work, 0); } - aligned_sched_sleep_info++; + aligned_sched_sleep_info += 1 /* aux thread */; aligned_dirty_cpu_sched_sleep_info = erts_alloc_permanent_cache_aligned( @@ -6066,6 +5927,18 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online erts_atomic32_init_nob(&ssi->aux_work, 0); } + aligned_poll_thread_sleep_info = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_SLP_INFO, + no_poll_threads*sizeof(ErtsAlignedSchedulerSleepInfo)); + for (ix = 0; ix < no_poll_threads; ix++) { + ErtsSchedulerSleepInfo *ssi = &aligned_poll_thread_sleep_info[ix].ssi; + ssi->esdp = NULL; + erts_atomic32_init_nob(&ssi->flags, 0); + ssi->event = NULL; /* initialized in poll_thread */ + erts_atomic32_init_nob(&ssi->aux_work, 0); + } + /* Create and initialize scheduler specific data */ daww_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE((sizeof(ErtsDelayedAuxWorkWakeupJob) @@ -6122,11 +5995,14 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online erts_atomic32_init_nob(&debug_wait_completed_count, 0); /* debug only */ debug_wait_completed_flags = 0; - aux_thread_aux_work_data = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, sizeof(ErtsAuxWorkData)); + poll_thread_aux_work_data = + erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, + no_poll_threads * sizeof(ErtsAuxWorkData)); + init_no_runqs(no_schedulers_online, no_schedulers_online); balance_info.last_active_runqs = no_schedulers; erts_mtx_init(&balance_info.update_mtx, "migration_info_update", NIL, @@ -6205,13 +6081,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online erts_atomic32_set_nob(&schdlr_sspnd.changing, set_schdlr_sspnd_change_flags); - erts_atomic32_init_nob(&doing_sys_schedule, 0); - init_misc_aux_work(); - erts_atomic32_init_nob(&function_calls, 0); - /* init port tasks */ erts_port_task_init(); @@ -7107,17 +6979,19 @@ sched_spin_suspended(ErtsSchedulerSleepInfo *ssi, int spincount) } static erts_aint32_t -sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi) +sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi, + erts_aint32_t sleep_type) { erts_aint32_t oflgs; - erts_aint32_t nflgs = (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_TSE_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED); + erts_aint32_t nflgs = ((ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED) + | sleep_type); erts_aint32_t xflgs = (ERTS_SSI_FLG_SLEEPING | ERTS_SSI_FLG_WAITING | ERTS_SSI_FLG_SUSPENDED); + ASSERT(sleep_type == ERTS_SSI_FLG_TSE_SLEEPING); erts_tse_reset(ssi->event); while (1) { @@ -7370,17 +7244,6 @@ msb_scheduler_type_switch(ErtsSchedType sched_type, if (exec_type != ERTS_SCHED_NORMAL) schdlr_sspnd.last_msb_dirty_type = exec_type; else { - erts_aint32_t calls; - /* - * Going back to normal scheduler after - * dirty execution; make sure it will check - * for I/O... - */ - if (ERTS_USE_MODIFIED_TIMING()) - calls = ERTS_MODIFIED_TIMING_INPUT_REDS + 1; - else - calls = INPUT_REDUCTIONS + 1; - erts_atomic32_set_nob(&function_calls, calls); if ((nrml_prio == ERTS_MSB_NONE_PRIO_BIT) & ((dcpu_prio != ERTS_MSB_NONE_PRIO_BIT) @@ -7454,6 +7317,34 @@ msb_scheduler_type_switch(ErtsSchedType sched_type, } +static ERTS_INLINE void +suspend_normal_scheduler_sleep(ErtsSchedulerData *esdp) +{ + ErtsSchedulerSleepInfo *ssi = esdp->ssi; + erts_aint32_t flgs = sched_spin_suspended(ssi, + ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + flgs = sched_set_suspended_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_TSE_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + int res; + + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + } + } +} + +static ERTS_INLINE void +suspend_dirty_scheduler_sleep(ErtsSchedulerData *esdp) +{ + suspend_normal_scheduler_sleep(esdp); +} static void suspend_scheduler(ErtsSchedulerData *esdp) @@ -7642,14 +7533,15 @@ suspend_scheduler(ErtsSchedulerData *esdp) schdlr_sspnd_resume_procs(sched_type, &resume); while (1) { - ErtsMonotonicTime current_time; - erts_aint32_t flgs; - if (sched_type != ERTS_SCHED_NORMAL) - aux_work = 0; - else { + suspend_dirty_scheduler_sleep(esdp); + else + { + ErtsMonotonicTime current_time, timeout_time; int evacuate = no == 1 ? 0 : !ERTS_EMPTY_RUNQ(esdp->run_queue); + ASSERT(sched_type == ERTS_SCHED_NORMAL); + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work|evacuate) { @@ -7671,95 +7563,34 @@ suspend_scheduler(ErtsSchedulerData *esdp) } } - } - if (aux_work) { - ASSERT(sched_type == ERTS_SCHED_NORMAL); - current_time = erts_get_monotonic_time(esdp); - if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - erts_bump_timers(esdp->timer_wheel, current_time); - } - } - else { - ErtsMonotonicTime timeout_time; - int do_timeout; + if (aux_work) + timeout_time = erts_next_timeout_time(esdp->next_tmo_ref); + else + timeout_time = erts_check_next_timeout_time(esdp); - if (sched_type == ERTS_SCHED_NORMAL) { - timeout_time = erts_check_next_timeout_time(esdp); - current_time = erts_get_monotonic_time(esdp); - do_timeout = (current_time >= timeout_time); - } - else { - timeout_time = ERTS_MONOTONIC_TIME_MAX; - current_time = 0; - do_timeout = 0; - } + current_time = erts_get_monotonic_time(esdp); - if (do_timeout) { - ASSERT(sched_type == ERTS_SCHED_NORMAL); - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - } - else { - if (sched_type == ERTS_SCHED_NORMAL) { - if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); - sched_wall_time_change(esdp, 0); - } - erts_thr_progress_prepare_wait(esdp); - } - flgs = sched_spin_suspended(ssi, - ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - flgs = sched_set_suspended_sleeptype(ssi); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_TSE_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - int res; - - if (sched_type == ERTS_SCHED_NORMAL) - current_time = erts_get_monotonic_time(esdp); - else - current_time = 0; - - do { - Sint64 timeout; - if (current_time >= timeout_time) - break; - if (sched_type != ERTS_SCHED_NORMAL) - timeout = -1; - else - timeout = ERTS_MONOTONIC_TO_NSEC(timeout_time - - current_time - - 1) + 1; - res = erts_tse_twait(ssi->event, timeout); - - if (sched_type == ERTS_SCHED_NORMAL) - current_time = erts_get_monotonic_time(esdp); - else - current_time = 0; - - } while (res == EINTR); - } - } - if (sched_type == ERTS_SCHED_NORMAL) - erts_thr_progress_finalize_wait(esdp); - } + if (!aux_work && current_time < timeout_time) { + /* go to sleep... */ + if (thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + erts_thr_progress_prepare_wait(NULL); + suspend_normal_scheduler_sleep(esdp); + erts_thr_progress_finalize_wait(NULL); + current_time = erts_get_monotonic_time(esdp); + } - if (current_time >= timeout_time) { - ASSERT(sched_type == ERTS_SCHED_NORMAL); - erts_bump_timers(esdp->timer_wheel, current_time); - } - } + if (current_time >= timeout_time) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + erts_bump_timers(esdp->timer_wheel, current_time); + } + } flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING | ERTS_SSI_FLG_SUSPENDED)); @@ -8425,6 +8256,7 @@ sched_thread_func(void *vesdp) Uint no = esdp->no; erts_tse_t *tse; + erts_port_task_pre_alloc_init_thread(); erts_sched_init_time_sup(esdp); if (no == 1) @@ -8578,17 +8410,17 @@ sched_dirty_io_thread_func(void *vesdp) return NULL; } -static ethr_tid aux_tid; - void erts_start_schedulers(void) { + ethr_tid tid; int res = 0; Uint actual; Uint wanted = erts_no_schedulers; Uint wanted_no_schedulers = erts_no_schedulers; char name[16]; ethr_thr_opts opts = ETHR_THR_OPTS_DEFAULT_INITER; + int ix; opts.detached = 1; @@ -8635,7 +8467,6 @@ erts_start_schedulers(void) erts_no_schedulers = actual; { - int ix; for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); erts_snprintf(opts.name, 16, "%d_dirty_cpu_scheduler", ix + 1); @@ -8658,10 +8489,18 @@ erts_start_schedulers(void) erts_snprintf(opts.name, 16, "aux"); - res = ethr_thr_create(&aux_tid, aux_thread, NULL, &opts); + res = ethr_thr_create(&tid, aux_thread, NULL, &opts); if (res != 0) erts_exit(ERTS_ERROR_EXIT, "Failed to create aux thread, error = %d\n", res); + for (ix = 0; ix < erts_no_poll_threads; ix++) { + erts_snprintf(opts.name, 16, "%d_poller", ix); + + res = ethr_thr_create(&tid, poll_thread, (void*)(UWord)ix, &opts); + if (res != 0) + erts_exit(ERTS_ERROR_EXIT, "Failed to create poll thread\n"); + } + if (actual < 1) erts_exit(ERTS_ERROR_EXIT, "Failed to create any scheduler-threads: %s (%d)\n", @@ -9728,7 +9567,6 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) ErtsRunQueue *rq; int context_reds; int fcalls; - int input_reductions; int actual_reds; int reds; Uint32 flags; @@ -9748,11 +9586,9 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) if (ERTS_USE_MODIFIED_TIMING()) { context_reds = ERTS_MODIFIED_TIMING_CONTEXT_REDS; - input_reductions = ERTS_MODIFIED_TIMING_INPUT_REDS; } else { context_reds = CONTEXT_REDS; - input_reductions = INPUT_REDUCTIONS; } ERTS_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(erts_get_scheduler_data()) @@ -9772,7 +9608,6 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) } rq = erts_get_runq_current(esdp); ASSERT(esdp); - fcalls = (int) erts_atomic32_read_acqb(&function_calls); actual_reds = reds = 0; erts_runq_lock(rq); } else { @@ -9798,7 +9633,6 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) reds = ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST; esdp->virtual_reds = 0; - fcalls = (int) erts_atomic32_add_read_acqb(&function_calls, reds); ASSERT(esdp && esdp == erts_get_scheduler_data()); rq = erts_get_runq_current(esdp); @@ -9888,16 +9722,6 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) ERTS_CHK_NO_PROC_LOCKS; - if (is_normal_sched) { - if (esdp->check_time_reds >= ERTS_CHECK_TIME_REDS) - (void) erts_get_monotonic_time(esdp); - - if (esdp->last_monotonic_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { - erts_runq_unlock(rq); - erts_bump_timers(esdp->timer_wheel, esdp->last_monotonic_time); - erts_runq_lock(rq); - } - } } ERTS_LC_ASSERT(!is_normal_sched || !erts_thr_progress_is_blocking()); @@ -9908,6 +9732,16 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) ErtsMigrationPath *mp; if (is_normal_sched) { + + if (esdp->check_time_reds >= ERTS_CHECK_TIME_REDS) + (void) erts_get_monotonic_time(esdp); + + if (esdp->last_monotonic_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { + erts_runq_unlock(rq); + erts_bump_timers(esdp->timer_wheel, esdp->last_monotonic_time); + erts_runq_lock(rq); + } + if (rq->check_balance_reds <= 0) check_balance(rq); @@ -10037,38 +9871,6 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) goto check_activities_to_run; } - else if (is_normal_sched - && (fcalls > input_reductions - && prepare_for_sys_schedule(!0))) { - ErtsMonotonicTime current_time; - /* - * Schedule system-level activities. - */ - - ERTS_MSACC_PUSH_STATE_CACHED_M(); - - erts_atomic32_set_relb(&function_calls, 0); - fcalls = 0; - -#if 0 /* Not needed since we wont wait in sys schedule */ - erts_sys_schedule_interrupt(0); -#endif - erts_runq_unlock(rq); - - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_CHECK_IO); - LTTNG2(scheduler_poll, esdp->no, 1); - - erl_sys_schedule(1); - ERTS_MSACC_POP_STATE_M(); - - current_time = erts_get_monotonic_time(esdp); - if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) - erts_bump_timers(esdp->timer_wheel, current_time); - - erts_runq_lock(rq); - clear_sys_scheduling(); - goto continue_check_activities_to_run; - } if (flags & ERTS_RUNQ_FLG_MISC_OP) exec_misc_ops(rq); @@ -10082,29 +9884,9 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) flags = ERTS_RUNQ_FLGS_GET_NOB(rq); if (flags & PORT_BIT) { - int have_outstanding_io; - have_outstanding_io = erts_port_task_execute(rq, &esdp->current_port); - if ((!erts_eager_check_io - && have_outstanding_io - && fcalls > 2*input_reductions) - || (flags & ERTS_RUNQ_FLG_HALTING)) { - /* - * If we have performed more than 2*INPUT_REDUCTIONS since - * last call to erl_sys_schedule() and we still haven't - * handled all I/O tasks we stop running processes and - * focus completely on ports. - * - * One could argue that this is a strange behavior. The - * reason for doing it this way is that it is similar - * to the behavior before port tasks were introduced. - * We don't want to change the behavior too much, at - * least not at the time of writing. This behavior - * might change in the future. - * - * /rickard - */ - goto check_activities_to_run; - } + erts_port_task_execute(rq, &esdp->current_port); + if (flags & ERTS_RUNQ_FLG_HALTING) + goto check_activities_to_run; } /* diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index e63da2d9db..66d7848f89 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -105,7 +105,6 @@ struct saved_calls { }; extern Export exp_send, exp_receive, exp_timeout; -extern int erts_eager_check_io; extern int erts_sched_compact_load; extern int erts_sched_balance_util; extern Uint erts_no_schedulers; @@ -364,10 +363,12 @@ typedef struct { } ErtsSchedulerSleepList; struct ErtsSchedulerSleepInfo_ { + struct ErtsSchedulerData_ *esdp; ErtsSchedulerSleepInfo *next; ErtsSchedulerSleepInfo *prev; erts_atomic32_t flags; erts_tse_t *event; + struct erts_poll_thread *psi; erts_atomic32_t aux_work; }; @@ -1531,9 +1532,7 @@ extern int erts_system_profile_ts_type; void erts_pre_init_process(void); void erts_late_init_process(void); void erts_early_init_scheduling(int); -void erts_init_scheduling(int, int - , int, int, int - ); +void erts_init_scheduling(int, int, int, int, int, int); void erts_execute_dirty_system_task(Process *c_p); int erts_set_gc_state(Process *c_p, int enable); Eterm erts_sched_wall_time_request(Process *c_p, int set, int enable, @@ -2470,6 +2469,7 @@ void erts_notify_inc_runq(ErtsRunQueue *runq); void erts_sched_finish_poke(ErtsSchedulerSleepInfo *, erts_aint32_t); ERTS_GLB_INLINE void erts_sched_poke(ErtsSchedulerSleepInfo *ssi); +void erts_aux_thread_poke(void); #if ERTS_GLB_INLINE_INCL_FUNC_DEF diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c index 6cb7ccab8d..ab204303d7 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.c +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c @@ -37,7 +37,7 @@ #include "erl_thr_progress.h" erts_sspa_data_t * -erts_sspa_create(size_t blk_sz, int pa_size) +erts_sspa_create(size_t blk_sz, int pa_size, int nthreads, const char* name) { erts_sspa_data_t *data; size_t tot_size; @@ -48,22 +48,30 @@ erts_sspa_create(size_t blk_sz, int pa_size) int no_blocks = pa_size; int no_blocks_per_chunk; - if (erts_no_schedulers == 1) + if (!name) { /* schedulers only variant */ + ASSERT(!nthreads); + nthreads = erts_no_schedulers; + } + else { + ASSERT(nthreads > 0); + } + + if (nthreads == 1) no_blocks_per_chunk = no_blocks; else { int extra = (no_blocks - 1)/4 + 1; if (extra == 0) extra = 1; no_blocks_per_chunk = no_blocks; - no_blocks_per_chunk += extra*erts_no_schedulers; - no_blocks_per_chunk /= erts_no_schedulers; + no_blocks_per_chunk += extra * nthreads; + no_blocks_per_chunk /= nthreads; } - no_blocks = no_blocks_per_chunk * erts_no_schedulers; + no_blocks = no_blocks_per_chunk * nthreads; chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_chunk_header_t)); chunk_mem_size += blk_sz * no_blocks_per_chunk; chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(chunk_mem_size); tot_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t)); - tot_size += chunk_mem_size*erts_no_schedulers; + tot_size += chunk_mem_size * nthreads; p = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_PRE_ALLOC_DATA, tot_size); data = (erts_sspa_data_t *) p; @@ -72,10 +80,16 @@ erts_sspa_create(size_t blk_sz, int pa_size) data->chunks_mem_size = chunk_mem_size; data->start = chunk_start; - data->end = chunk_start + chunk_mem_size*erts_no_schedulers; + data->end = chunk_start + chunk_mem_size * nthreads; + data->nthreads = nthreads; + + if (name) { /* thread variant */ + erts_tsd_key_create(&data->tsd_key, (char*)name); + erts_atomic_init_nob(&data->id_generator, 0); + } /* Initialize all chunks */ - for (cix = 0; cix < erts_no_schedulers; cix++) { + for (cix = 0; cix < nthreads; cix++) { erts_sspa_chunk_t *chnk = erts_sspa_cix2chunk(data, cix); erts_sspa_chunk_header_t *chdr = &chnk->aligned.header; erts_sspa_blk_t *blk; diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h index 1307e65962..d232db0e69 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h @@ -59,6 +59,11 @@ typedef struct { char *start; char *end; int chunks_mem_size; + int nthreads; + + /* Used only by thread variant: */ + erts_tsd_key_t tsd_key; + erts_atomic_t id_generator; } erts_sspa_data_t; typedef union erts_sspa_blk_t_ erts_sspa_blk_t; @@ -140,7 +145,9 @@ check_local_list(erts_sspa_chunk_header_t *chdr) #endif erts_sspa_data_t *erts_sspa_create(size_t blk_sz, - int pa_size); + int pa_size, + int nthreads, + const char* name); void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk, int cinit); @@ -158,7 +165,7 @@ ERTS_GLB_INLINE int erts_sspa_free(erts_sspa_data_t *data, int cix, char *blk); ERTS_GLB_INLINE erts_sspa_chunk_t * erts_sspa_cix2chunk(erts_sspa_data_t *data, int cix) { - ASSERT(0 <= cix && cix < erts_no_schedulers); + ASSERT(0 <= cix && cix < data->nthreads); return (erts_sspa_chunk_t *) (data->start + cix*data->chunks_mem_size); } @@ -171,7 +178,7 @@ erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr) return -1; diff = ((char *) ptr) - data->start; cix = (int) diff / data->chunks_mem_size; - ASSERT(0 <= cix && cix < erts_no_schedulers); + ASSERT(0 <= cix && cix < data->nthreads); return cix; } diff --git a/erts/emulator/beam/erl_threads.h b/erts/emulator/beam/erl_threads.h index e306df818d..aedceb6fc2 100644 --- a/erts/emulator/beam/erl_threads.h +++ b/erts/emulator/beam/erl_threads.h @@ -454,7 +454,6 @@ ERTS_GLB_INLINE void erts_rwmtx_runlock(erts_rwmtx_t *rwmtx); ERTS_GLB_INLINE void erts_rwmtx_rwunlock(erts_rwmtx_t *rwmtx); ERTS_GLB_INLINE int erts_lc_rwmtx_is_rlocked(erts_rwmtx_t *mtx); ERTS_GLB_INLINE int erts_lc_rwmtx_is_rwlocked(erts_rwmtx_t *mtx); - ERTS_GLB_INLINE void erts_spinlock_init(erts_spinlock_t *lock, char *name, Eterm extra, diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c index a07e3642f6..4b996d8fc2 100644 --- a/erts/emulator/beam/erl_trace.c +++ b/erts/emulator/beam/erl_trace.c @@ -1449,7 +1449,6 @@ monitor_long_schedule_port(Port *pp, ErtsPortTaskType type, Uint time) case ERTS_PORT_TASK_TIMEOUT: op = am_timeout; break; case ERTS_PORT_TASK_INPUT: op = am_input; break; case ERTS_PORT_TASK_OUTPUT: op = am_output; break; - case ERTS_PORT_TASK_EVENT: op = am_event; break; case ERTS_PORT_TASK_DIST_CMD: op = am_dist_cmd; break; default: op = am_undefined; break; } diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h index 076767c7cd..295130f60c 100644 --- a/erts/emulator/beam/erl_vm.h +++ b/erts/emulator/beam/erl_vm.h @@ -46,8 +46,6 @@ */ #define ERTS_X_REGS_ALLOCATED (MAX_REG+3) -#define INPUT_REDUCTIONS (2 * CONTEXT_REDS) - #define H_DEFAULT_SIZE 233 /* default (heap + stack) min size */ #define VH_DEFAULT_SIZE 32768 /* default virtual (bin) heap min size (words) */ #define H_DEFAULT_MAX_SIZE 0 /* default max heap size is off */ diff --git a/erts/emulator/beam/erlang_lttng.h b/erts/emulator/beam/erlang_lttng.h index 4e869671f7..feb05f4f4c 100644 --- a/erts/emulator/beam/erlang_lttng.h +++ b/erts/emulator/beam/erlang_lttng.h @@ -159,21 +159,6 @@ TRACEPOINT_EVENT( TRACEPOINT_EVENT( org_erlang_otp, - driver_event, - TP_ARGS( - char*, pid, - char*, port, - char*, driver - ), - TP_FIELDS( - ctf_string(pid, pid) - ctf_string(port, port) - ctf_string(driver, driver) - ) -) - -TRACEPOINT_EVENT( - org_erlang_otp, driver_timeout, TP_ARGS( char*, pid, diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 2b0ad0b98a..09aeba00fa 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -222,8 +222,6 @@ struct erts_driver_t_ { char *buf, ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen, /* Might be NULL */ unsigned int *flags); - void (*event)(ErlDrvData drv_data, ErlDrvEvent event, - ErlDrvEventData event_data); void (*ready_input)(ErlDrvData drv_data, ErlDrvEvent event); void (*ready_output)(ErlDrvData drv_data, ErlDrvEvent event); void (*timeout)(ErlDrvData drv_data); @@ -1103,7 +1101,6 @@ void erts_save_stacktrace(Process* p, struct StackTrace* s, int depth); typedef struct { Eterm delay_time; int context_reds; - int input_reds; } ErtsModifiedTimings; extern Export *erts_delay_trap; diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index bc1b9b6ef4..85013af3ad 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -5390,24 +5390,17 @@ erts_stale_drv_select(Eterm port, switch (mode) { case ERL_DRV_READ | ERL_DRV_WRITE: type = "Input/Output"; - goto deselect; case ERL_DRV_WRITE: type = "Output"; - goto deselect; case ERL_DRV_READ: type = "Input"; - deselect: - if (deselect) { - driver_select(drv_port, hndl, - mode | ERL_DRV_USE_NO_CALLBACK, - 0); - } - break; default: - type = "Event"; - if (deselect) - driver_event(drv_port, hndl, NULL); - break; + type = ""; + } + if (deselect) { + driver_select(drv_port, hndl, + mode | ERL_DRV_USE_NO_CALLBACK, + 0); } dsbufp = erts_create_logger_dsbuf(); @@ -7511,14 +7504,6 @@ no_output_callback(ErlDrvData drv_data, char *buf, ErlDrvSizeT len) } static void -no_event_callback(ErlDrvData drv_data, ErlDrvEvent event, ErlDrvEventData event_data) -{ - Port *prt = get_current_port(); - report_missing_drv_callback(prt, "Event", "event()"); - driver_event(ERTS_Port2ErlDrvPort(prt), event, NULL); -} - -static void no_ready_input_callback(ErlDrvData drv_data, ErlDrvEvent event) { Port *prt = get_current_port(); @@ -7585,7 +7570,6 @@ init_driver(erts_driver_t *drv, ErlDrvEntry *de, DE_Handle *handle) drv->outputv = de->outputv; drv->control = de->control; drv->call = de->call; - drv->event = de->event ? de->event : no_event_callback; drv->ready_input = de->ready_input ? de->ready_input : no_ready_input_callback; drv->ready_output = de->ready_output ? de->ready_output : no_ready_output_callback; drv->timeout = de->timeout ? de->timeout : no_timeout_callback; diff --git a/erts/emulator/beam/safe_hash.c b/erts/emulator/beam/safe_hash.c index ac9ebd4714..73306030ae 100644 --- a/erts/emulator/beam/safe_hash.c +++ b/erts/emulator/beam/safe_hash.c @@ -260,16 +260,17 @@ void* safe_hash_erase(SafeHash* h, void* tmpl) } /* -** Call 'func(obj,func_arg2)' for all objects in table. NOT SAFE!!! +** Call 'func(obj,func_arg2,func_arg3)' for all objects in table. NOT SAFE!!! */ -void safe_hash_for_each(SafeHash* h, void (*func)(void *, void *), void *func_arg2) +void safe_hash_for_each(SafeHash* h, void (*func)(void *, void *, void *), + void *func_arg2, void *func_arg3) { int i; for (i = 0; i <= h->size_mask; i++) { SafeHashBucket* b = h->tab[i]; while (b != NULL) { - (*func)((void *) b, func_arg2); + (*func)((void *) b, func_arg2, func_arg3); b = b->next; } } diff --git a/erts/emulator/beam/safe_hash.h b/erts/emulator/beam/safe_hash.h index 259c58cff9..af97b4cb4d 100644 --- a/erts/emulator/beam/safe_hash.h +++ b/erts/emulator/beam/safe_hash.h @@ -95,7 +95,7 @@ void* safe_hash_get(SafeHash*, void*); void* safe_hash_put(SafeHash*, void*); void* safe_hash_erase(SafeHash*, void*); -void safe_hash_for_each(SafeHash*, void (*func)(void *, void *), void *); +void safe_hash_for_each(SafeHash*, void (*func)(void *, void *, void *), void *, void *); #ifdef ERTS_ENABLE_LOCK_COUNT void erts_lcnt_enable_hash_lock_count(SafeHash*, erts_lock_flags_t, int); diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 68ef0a23f3..9106091ca6 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -575,8 +575,6 @@ __decl_noreturn void __noreturn erts_exit(int n, char*, ...); erts_exit(ERTS_ABORT_EXIT, "%s:%d:%s(): Internal error: %s\n", \ __FILE__, __LINE__, __func__, What) -Eterm erts_check_io_info(void *p); - UWord erts_sys_get_page_size(void); /* Size of misc memory allocated from system dependent code */ @@ -739,8 +737,6 @@ extern char *erts_sys_ddll_error(int code); /* * System interfaces for startup. */ -void erts_sys_schedule_interrupt(int set); -void erts_sys_schedule_interrupt_timed(int, ErtsMonotonicTime); void erts_sys_main_thread(void); extern int erts_sys_prepare_crash_dump(int secs); @@ -794,7 +790,7 @@ void fini_getenv_state(GETENV_STATE *); typedef struct { int no_used_fds; int no_driver_select_structs; - int no_driver_event_structs; + int no_enif_select_structs; } ErtsCheckIoDebugInfo; int erts_check_io_debug(ErtsCheckIoDebugInfo *ip); @@ -996,10 +992,6 @@ erts_refc_read(erts_refc_t *refcp, erts_aint_t min_val) #endif /* #if ERTS_GLB_INLINE_INCL_FUNC_DEF */ -#ifdef ERTS_ENABLE_KERNEL_POLL -extern int erts_use_kernel_poll; -#endif - #define sys_memcpy(s1,s2,n) memcpy(s1,s2,n) #define sys_memmove(s1,s2,n) memmove(s1,s2,n) #define sys_memcmp(s1,s2,n) memcmp(s1,s2,n) diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 834b77eb58..f93d4c1557 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -29,7 +29,6 @@ #endif #define ERL_CHECK_IO_C__ -#define ERTS_WANT_BREAK_HANDLING #ifndef WANT_NONBLOCKING # define WANT_NONBLOCKING #endif @@ -44,71 +43,79 @@ #define ERTS_WANT_TIMER_WHEEL_API #include "erl_time.h" -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS -# include "safe_hash.h" -# define DRV_EV_STATE_HTAB_SIZE 1024 +#if 0 +#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__) +#define DEBUG_PRINT_FD(FMT, STATE, ...) \ + DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \ + (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \ + ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \ + ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \ + (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR) +#define DEBUG_PRINT_MODE +#else +#define DEBUG_PRINT(...) #endif -typedef char EventStateType; -#define ERTS_EV_TYPE_NONE ((EventStateType) 0) -#define ERTS_EV_TYPE_DRV_SEL ((EventStateType) 1) /* driver_select */ -#define ERTS_EV_TYPE_DRV_EV ((EventStateType) 2) /* driver_event */ -#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */ -#define ERTS_EV_TYPE_NIF ((EventStateType) 4) /* enif_select */ -#define ERTS_EV_TYPE_STOP_NIF ((EventStateType) 5) /* pending nif stop */ - -typedef char EventStateFlags; -#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */ -#define ERTS_EV_FLAG_DEFER_IN_EV ((EventStateFlags) 2) -#define ERTS_EV_FLAG_DEFER_OUT_EV ((EventStateFlags) 4) - -#ifdef DEBUG -# define ERTS_ACTIVE_FD_INC 2 -#else -# define ERTS_ACTIVE_FD_INC 128 +#ifndef DEBUG_PRINT_FD +#define DEBUG_PRINT_FD(...) #endif -#define ERTS_CHECK_IO_POLL_RES_LEN 512 +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS +# include "safe_hash.h" +# define DRV_EV_STATE_HTAB_SIZE 1024 +#endif -#if defined(ERTS_KERNEL_POLL_VERSION) -# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp -#elif defined(ERTS_NO_KERNEL_POLL_VERSION) -# define ERTS_CIO_EXPORT(FUNC) FUNC ## _nkp +typedef enum { + ERTS_EV_TYPE_NONE = 0, + ERTS_EV_TYPE_DRV_SEL = 1, /* driver_select */ + ERTS_EV_TYPE_STOP_USE = 2, /* pending stop_select */ + ERTS_EV_TYPE_NIF = 3, /* enif_select */ + ERTS_EV_TYPE_STOP_NIF = 4 /* pending nif stop */ +} EventStateType; + +typedef enum { + ERTS_EV_FLAG_CLEAR = 0, + ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */ +#ifdef ERTS_ENABLE_KERNEL_POLL + ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd + and it was put in the nkp version */ #else -# define ERTS_CIO_EXPORT(FUNC) FUNC + ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR, #endif -#define ERTS_CIO_HAVE_DRV_EVENT \ - (ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL) + /* Combinations */ + ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK +} EventStateFlags; -#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control) -#define ERTS_CIO_POLL_CTLV ERTS_POLL_EXPORT(erts_poll_controlv) -#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait) -#define ERTS_CIO_POLL_INTR ERTS_POLL_EXPORT(erts_poll_interrupt) -#define ERTS_CIO_POLL_INTR_TMD ERTS_POLL_EXPORT(erts_poll_interrupt_timed) -#define ERTS_CIO_NEW_POLLSET ERTS_POLL_EXPORT(erts_poll_create_pollset) -#define ERTS_CIO_FREE_POLLSET ERTS_POLL_EXPORT(erts_poll_destroy_pollset) -#define ERTS_CIO_POLL_MAX_FDS ERTS_POLL_EXPORT(erts_poll_max_fds) -#define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init) -#define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info) +#define flag2str(flags) \ + ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \ + ((flags) == ERTS_EV_FLAG_USED ? "USED" : \ + ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR")))) -#define GET_FD(fd) fd +/* How many events that can be handled at once by one erts_poll_wait call */ +#define ERTS_CHECK_IO_POLL_RES_LEN 512 -static struct pollset_info +/* Each I/O Poll Thread has one ErtsPollThread each. The ps field + can point to either a private ErtsPollSet or a shared one. + At the moment only kqueue and epoll pollsets can be + shared across threads. +*/ +typedef struct erts_poll_thread { - ErtsPollSet ps; - erts_atomic_t in_poll_wait; /* set while doing poll */ - struct { - int six; /* start index */ - int eix; /* end index */ - erts_atomic32_t no; - int size; - ErtsSysFdType *array; - } active_fd; - struct removed_fd* removed_list; /* list of deselected fd's*/ - erts_spinlock_t removed_list_lock; -}pollset; -#define NUM_OF_POLLSETS 1 + ErtsPollSet *ps; + ErtsPollResFd *pollres; + int pollres_len; +} ErtsPollThread; + +/* pollsetv contains pointers to the ErtsPollSets that are in use. + * Which pollset to use is determined by hashing the fd. + */ +static ErtsPollSet **pollsetv; +#if ERTS_POLL_USE_FALLBACK +static ErtsPollSet *flbk_pollset; +#endif +static ErtsPollThread *psiv; typedef struct { #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS @@ -117,92 +124,151 @@ typedef struct { ErtsSysFdType fd; struct { ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */ -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */ -#endif ErtsNifSelectDataState *nif; /* ERTS_EV_TYPE_NIF */ union { erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */ ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */ - }stop; + } stop; } driver; - ErtsPollEvents events; - unsigned short remove_cnt; /* number of removed_fd's referring to this fd */ + ErtsPollEvents events; /* The events that have been selected upon */ + ErtsPollEvents active_events; /* The events currently active in the pollset */ EventStateType type; EventStateFlags flags; } ErtsDrvEventState; -struct removed_fd { - struct removed_fd *next; +struct drv_ev_state_shared { + + union { + erts_mtx_t lck; + byte _cache_line_alignment[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_mtx_t))]; + } locks[ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT]; + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - ErtsSysFdType fd; + int max_fds; + erts_atomic_t len; + ErtsDrvEventState *v; + erts_mtx_t grow_lock; /* prevent lock-hogging of racing growers */ #else - ErtsDrvEventState* state; - #ifdef DEBUG - ErtsSysFdType fd; - #endif + SafeHash tab; + int num_prealloc; + ErtsDrvEventState *prealloc_first; + erts_spinlock_t prealloc_lock; #endif - }; -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static int max_fds = -1; -#endif -#define DRV_EV_STATE_LOCK_CNT 16 -static union { - erts_mtx_t lck; - byte _cache_line_alignment[64]; -}drv_ev_state_locks[DRV_EV_STATE_LOCK_CNT]; +int ERTS_WRITE_UNLIKELY(erts_no_pollsets) = 1; +int ERTS_WRITE_UNLIKELY(erts_no_poll_threads) = 1; +struct drv_ev_state_shared drv_ev_state; -static ERTS_INLINE erts_mtx_t* fd_mtx(ErtsSysFdType fd) -{ +static ERTS_INLINE int fd_hash(ErtsSysFdType fd) { int hash = (int)fd; # ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS hash ^= (hash >> 9); # endif - return &drv_ev_state_locks[hash % DRV_EV_STATE_LOCK_CNT].lck; + return hash; +} + +static ERTS_INLINE erts_mtx_t* fd_mtx(ErtsSysFdType fd) +{ + return &drv_ev_state.locks[fd_hash(fd) % ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT].lck; } #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static erts_atomic_t drv_ev_state_len; -static ErtsDrvEventState *drv_ev_state; -static erts_mtx_t drv_ev_state_grow_lock; /* prevent lock-hogging of racing growers */ +static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd) +{ + return &drv_ev_state.v[(int) fd]; +} -#else -static SafeHash drv_ev_state_tab; -static int num_state_prealloc; -static ErtsDrvEventState *state_prealloc_first; -erts_spinlock_t state_prealloc_lock; +#define new_drv_ev_state(State, fd) (State) +#define erase_drv_ev_state(State) + +static ERTS_INLINE int grow_drv_ev_state(ErtsSysFdType fd) { + int i; + int old_len; + int new_len; + + if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state.len)) { + + if (fd < 0 || fd >= drv_ev_state.max_fds) + return 0; + + erts_mtx_lock(&drv_ev_state.grow_lock); + old_len = erts_atomic_read_nob(&drv_ev_state.len); + if (fd >= old_len) { + new_len = erts_poll_new_table_len(old_len, fd + 1); + if (new_len > drv_ev_state.max_fds) + new_len = drv_ev_state.max_fds; + + for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */ + erts_mtx_lock(&drv_ev_state.locks[i].lck); + } + drv_ev_state.v = (drv_ev_state.v + ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE, + drv_ev_state.v, + sizeof(ErtsDrvEventState)*new_len) + : erts_alloc(ERTS_ALC_T_DRV_EV_STATE, + sizeof(ErtsDrvEventState)*new_len)); + ERTS_CT_ASSERT(ERTS_EV_TYPE_NONE == 0); + sys_memzero(drv_ev_state.v+old_len, + sizeof(ErtsDrvEventState) * (new_len - old_len)); + for (i = old_len; i < new_len; i++) { + drv_ev_state.v[i].fd = (ErtsSysFdType) i; + } + erts_atomic_set_nob(&drv_ev_state.len, new_len); + for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { + erts_mtx_unlock(&drv_ev_state.locks[i].lck); + } + } + /*else already grown by racing thread */ + + erts_mtx_unlock(&drv_ev_state.grow_lock); + } + return 1; +} -static ERTS_INLINE ErtsDrvEventState *hash_get_drv_ev_state(ErtsSysFdType fd) +static int drv_ev_state_len(void) +{ + return erts_atomic_read_nob(&drv_ev_state.len); +} + +#else /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */ + +static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd) { ErtsDrvEventState tmpl; tmpl.fd = fd; - return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state_tab, (void *) &tmpl); + return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state.tab, (void *) &tmpl); } -static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd) +static ERTS_INLINE ErtsDrvEventState* new_drv_ev_state(ErtsDrvEventState *state, + ErtsSysFdType fd) { ErtsDrvEventState tmpl; + + if (state) + return state; + tmpl.fd = fd; tmpl.driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - tmpl.driver.event = NULL; -#endif tmpl.driver.nif = NULL; tmpl.driver.stop.drv_ptr = NULL; tmpl.events = 0; - tmpl.remove_cnt = 0; + tmpl.active_events = 0; tmpl.type = ERTS_EV_TYPE_NONE; tmpl.flags = 0; - return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state_tab, (void *) &tmpl); + + return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state.tab, (void *) &tmpl); +} + +static ERTS_INLINE void erase_drv_ev_state(ErtsDrvEventState *state) +{ + safe_hash_erase(&drv_ev_state.tab, (void *) state); } -static ERTS_INLINE void hash_erase_drv_ev_state(ErtsDrvEventState *state) +static int drv_ev_state_len(void) { - ASSERT(state->remove_cnt == 0); - safe_hash_erase(&drv_ev_state_tab, (void *) state); + return erts_atomic_read_nob(&drv_ev_state.tab.nitems); } #endif /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */ @@ -222,50 +288,47 @@ static void print_nif_select_op(erts_dsprintf_buf_t*, ErtsSysFdType, static void drv_select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int); static void nif_select_large_fd_error(ErtsSysFdType, int, ErtsResource*,Eterm ref); #endif -#if ERTS_CIO_HAVE_DRV_EVENT -static void drv_event_steal(ErlDrvPort ix, ErtsDrvEventState *state, - ErlDrvEventData event_data); -static void print_drv_event_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort, ErtsSysFdType, ErlDrvEventData); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void event_large_fd_error(ErlDrvPort, ErtsSysFdType, ErlDrvEventData); -#endif -#endif static void steal_pending_stop_use(erts_dsprintf_buf_t*, ErlDrvPort, ErtsDrvEventState*, int mode, int on); static void steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource*, ErtsDrvEventState *state, int mode, int on); - -ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_FD_LIST) +static ERTS_INLINE void +check_fd_cleanup(ErtsDrvEventState *state, + ErtsDrvSelectDataState **free_select, + ErtsNifSelectDataState **free_nif); +static ERTS_INLINE void iready(Eterm id, ErtsDrvEventState *state); +static ERTS_INLINE void oready(Eterm id, ErtsDrvEventState *state); +#ifdef DEBUG_PRINT_MODE +static char *drvmode2str(int mode); +static char *nifmode2str(enum ErlNifSelectFlags mode); +#endif static ERTS_INLINE void -init_iotask(ErtsIoTask *io_task) +init_iotask(ErtsIoTask *io_task, ErtsSysFdType fd) { erts_port_task_handle_init(&io_task->task); - erts_atomic_init_nob(&io_task->executed_time, ~((erts_aint_t) 0)); + io_task->fd = fd; } static ERTS_INLINE int -is_iotask_active(ErtsIoTask *io_task, erts_aint_t current_cio_time) -{ +is_iotask_active(ErtsIoTask *io_task) +{ if (erts_port_task_is_scheduled(&io_task->task)) return 1; - if (erts_atomic_read_nob(&io_task->executed_time) == current_cio_time) - return 1; return 0; } static ERTS_INLINE ErtsDrvSelectDataState * -alloc_drv_select_data(void) +alloc_drv_select_data(ErtsSysFdType fd) { ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE, sizeof(ErtsDrvSelectDataState)); dsp->inport = NIL; dsp->outport = NIL; - init_iotask(&dsp->iniotask); - init_iotask(&dsp->outiotask); + init_iotask(&dsp->iniotask, fd); + init_iotask(&dsp->outiotask, fd); return dsp; } @@ -276,8 +339,6 @@ alloc_nif_select_data(void) sizeof(ErtsNifSelectDataState)); dsp->in.pid = NIL; dsp->out.pid = NIL; - dsp->in.ddeselect_cnt = 0; - dsp->out.ddeselect_cnt = 0; return dsp; } @@ -295,201 +356,136 @@ free_nif_select_data(ErtsNifSelectDataState *dsp) erts_free(ERTS_ALC_T_NIF_SEL_D_STATE, dsp); } -#if ERTS_CIO_HAVE_DRV_EVENT - -static ERTS_INLINE ErtsDrvEventDataState * -alloc_drv_event_data(void) +static ERTS_INLINE int +get_pollset_id(ErtsSysFdType fd) { - ErtsDrvEventDataState *dep = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE, - sizeof(ErtsDrvEventDataState)); - dep->port = NIL; - dep->data = NULL; - dep->removed_events = 0; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - dep->deferred_events = 0; -#endif - init_iotask(&dep->iotask); - return dep; + return fd_hash(fd) % erts_no_pollsets; } -static ERTS_INLINE void -free_drv_event_data(ErtsDrvEventDataState *dep) +static ERTS_INLINE ErtsPollSet * +get_pollset(ErtsSysFdType fd) { - ASSERT(!erts_port_task_is_scheduled(&dep->iotask.task)); - erts_free(ERTS_ALC_T_DRV_EV_D_STATE, dep); + return pollsetv[get_pollset_id(fd)]; } -#endif /* ERTS_CIO_HAVE_DRV_EVENT */ +#if ERTS_POLL_USE_FALLBACK +static ERTS_INLINE ErtsPollSet * +get_fallback(void) +{ + return flbk_pollset; +} +#endif -static ERTS_INLINE void -remember_removed(ErtsDrvEventState *state, struct pollset_info* psi) +/* + * Place a fd within a pollset. This will automatically use + * the fallback ps if needed. + */ +static ERTS_INLINE ErtsPollEvents +erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op, + ErtsPollEvents pe, int *wake_poller) { - struct removed_fd *fdlp; + ErtsSysFdType fd = state->fd; + ErtsPollEvents res = 0; + EventStateFlags flags = state->flags; + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); - if (erts_atomic_read_nob(&psi->in_poll_wait)) { - state->remove_cnt++; - ASSERT(state->remove_cnt > 0); - fdlp = removed_fd_alloc(); - #if defined(ERTS_SYS_CONTINOUS_FD_NUMBERS) || defined(DEBUG) - fdlp->fd = state->fd; - #endif - #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - fdlp->state = state; - #endif - erts_spin_lock(&psi->removed_list_lock); - fdlp->next = psi->removed_list; - psi->removed_list = fdlp; - erts_spin_unlock(&psi->removed_list_lock); + + if (!(flags & ERTS_EV_FLAG_FALLBACK)) { + res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + +#if ERTS_POLL_USE_FALLBACK + if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) { + /* When an add fails with NVAL, the poll/kevent operation could not + put that fd in the pollset, so we instead put it into a fallback pollset */ + state->flags |= ERTS_EV_FLAG_FALLBACK; + res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + } + } else { + ASSERT(op != ERTS_POLL_OP_ADD); + res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); +#endif } -} + return res; +} -static ERTS_INLINE int -is_removed(ErtsDrvEventState *state) +static ERTS_INLINE ErtsPollEvents +erts_io_control(ErtsDrvEventState *state, ErtsPollOp op, ErtsPollEvents pe) { - /* Note that there is a possible race here, where an fd is removed - (increasing remove_cnt) and then added again just before erts_poll_wait - is called by erts_check_io. Any polled event on the re-added fd will then - be falsely ignored. But that does not matter, as the event will trigger - again next time erl_check_io is called. */ - return state->remove_cnt > 0; + int wake_poller = 0; + return erts_io_control_wakeup(state, op, pe, &wake_poller); } -static void -forget_removed(struct pollset_info* psi) +/* ToDo: Was inline in erl_check_io.h but now need struct erts_poll_thread */ +void +erts_io_notify_port_task_executed(ErtsPortTaskType type, + ErtsPortTaskHandle *pthp, + void (*reset_handle)(ErtsPortTaskHandle *)) { - struct removed_fd* fdlp; - struct removed_fd* tofree; + ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task); + ErtsSysFdType fd = itp->fd; + erts_mtx_t *mtx = fd_mtx(fd); + int active_events; + ErtsDrvEventState *state; + ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; - /* Fast track: if (atomic_ptr(removed_list)==NULL) return; */ + erts_mtx_lock(mtx); + state = get_drv_ev_state(fd); - erts_spin_lock(&psi->removed_list_lock); - fdlp = psi->removed_list; - psi->removed_list = NULL; - erts_spin_unlock(&psi->removed_list_lock); + active_events = state->active_events; - while (fdlp) { - ErtsResource* resource = NULL; - erts_driver_t* drv_ptr = NULL; - erts_mtx_t* mtx; - ErtsSysFdType fd; - ErtsDrvEventState *state; + switch (type) { + case ERTS_PORT_TASK_INPUT: -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - fd = fdlp->fd; - mtx = fd_mtx(fd); - erts_mtx_lock(mtx); - state = &drv_ev_state[(int) fd]; -#else - state = fdlp->state; - fd = state->fd; - ASSERT(fd == fdlp->fd); - mtx = fd_mtx(fd); - erts_mtx_lock(mtx); -#endif - ASSERT(state->remove_cnt > 0); - if (--state->remove_cnt == 0) { - switch (state->type) { - case ERTS_EV_TYPE_STOP_NIF: - /* Now we can call stop */ - resource = state->driver.stop.resource; - state->driver.stop.resource = NULL; - ASSERT(resource); - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - goto case_ERTS_EV_TYPE_NONE; - - case ERTS_EV_TYPE_STOP_USE: - /* Now we can call stop_select */ - drv_ptr = state->driver.stop.drv_ptr; - ASSERT(drv_ptr); - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.stop.drv_ptr = NULL; - /* Fall through */ - case ERTS_EV_TYPE_NONE: - case_ERTS_EV_TYPE_NONE: -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - hash_erase_drv_ev_state(state); -#endif - break; - case ERTS_EV_TYPE_DRV_SEL: - case ERTS_EV_TYPE_DRV_EV: - break; - default: - ASSERT(0); - } - } - erts_mtx_unlock(mtx); - if (drv_ptr) { - int was_unmasked = erts_block_fpe(); - DTRACE1(driver_stop_select, drv_ptr->name); - LTTNG1(driver_stop_select, drv_ptr->name); - (*drv_ptr->stop_select) ((ErlDrvEvent) fd, NULL); - erts_unblock_fpe(was_unmasked); - if (drv_ptr->handle) { - erts_ddll_dereference_driver(drv_ptr->handle); - } - } - if (resource) { - erts_resource_stop(resource, (ErlNifEvent)fd, 0); - enif_release_resource(resource->data); - } + DEBUG_PRINT_FD("executed ready_input", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); + if (state->events & ERTS_POLL_EV_IN) + active_events |= ERTS_POLL_EV_IN; + break; + case ERTS_PORT_TASK_OUTPUT: - tofree = fdlp; - fdlp = fdlp->next; - removed_fd_free(tofree); + DEBUG_PRINT_FD("executed ready_output", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); + if (state->events & ERTS_POLL_EV_OUT) + active_events |= ERTS_POLL_EV_OUT; + break; + default: + erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); + break; } -} -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void -grow_drv_ev_state(int min_ix) -{ - int i; - int old_len; - int new_len; + reset_handle(pthp); - erts_mtx_lock(&drv_ev_state_grow_lock); - old_len = erts_atomic_read_nob(&drv_ev_state_len); - if (min_ix >= old_len) { - new_len = erts_poll_new_table_len(old_len, min_ix + 1); - if (new_len > max_fds) - new_len = max_fds; + if (active_events) { + /* This is not needed if active_events has not changed */ + if (state->active_events != active_events) { + ErtsPollEvents new_events; + state->active_events = active_events; + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events); - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */ - erts_mtx_lock(&drv_ev_state_locks[i].lck); - } - drv_ev_state = (drv_ev_state - ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE, - drv_ev_state, - sizeof(ErtsDrvEventState)*new_len) - : erts_alloc(ERTS_ALC_T_DRV_EV_STATE, - sizeof(ErtsDrvEventState)*new_len)); - for (i = old_len; i < new_len; i++) { - drv_ev_state[i].fd = (ErtsSysFdType) i; - drv_ev_state[i].driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - drv_ev_state[i].driver.event = NULL; -#endif - drv_ev_state[i].driver.stop.drv_ptr = NULL; - drv_ev_state[i].driver.nif = NULL; - drv_ev_state[i].events = 0; - drv_ev_state[i].remove_cnt = 0; - drv_ev_state[i].type = ERTS_EV_TYPE_NONE; - drv_ev_state[i].flags = 0; - } - erts_atomic_set_nob(&drv_ev_state_len, new_len); - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { - erts_mtx_unlock(&drv_ev_state_locks[i].lck); - } + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } + } + } else { + check_fd_cleanup(state, &free_select, &free_nif); } - /*else already grown by racing thread */ - erts_mtx_unlock(&drv_ev_state_grow_lock); -} -#endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */ + erts_mtx_unlock(mtx); + if (free_select) + free_drv_select_data(free_select); + if (free_nif) + free_nif_select_data(free_nif); +} static ERTS_INLINE void abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type) @@ -506,13 +502,6 @@ abort_tasks(ErtsDrvEventState *state, int mode) switch (mode) { case 0: check_type: switch (state->type) { -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - abort_task(state->driver.event->port, - &state->driver.event->iotask.task, - ERTS_EV_TYPE_DRV_EV); - return; -#endif case ERTS_EV_TYPE_NIF: case ERTS_EV_TYPE_NONE: return; @@ -542,16 +531,14 @@ abort_tasks(ErtsDrvEventState *state, int mode) static void deselect(ErtsDrvEventState *state, int mode) { - int do_wake = 0; ErtsPollEvents rm_events; ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); - ASSERT(state->events); abort_tasks(state, mode); - if (!mode) + if (!mode) { rm_events = state->events; - else { + } else { rm_events = 0; ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); if (mode & ERL_DRV_READ) { @@ -564,67 +551,63 @@ deselect(ErtsDrvEventState *state, int mode) } } - state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, rm_events, 0, &do_wake); + state->events &= ~rm_events; + state->active_events &= ~rm_events; if (!(state->events)) { + erts_io_control(state, ERTS_POLL_OP_DEL, 0); switch (state->type) { case ERTS_EV_TYPE_NIF: state->driver.nif->in.pid = NIL; state->driver.nif->out.pid = NIL; - state->driver.nif->in.ddeselect_cnt = 0; - state->driver.nif->out.ddeselect_cnt = 0; - enif_release_resource(state->driver.stop.resource); + enif_release_resource(state->driver.stop.resource->data); state->driver.stop.resource = NULL; break; case ERTS_EV_TYPE_DRV_SEL: state->driver.select->inport = NIL; state->driver.select->outport = NIL; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - state->driver.event->port = NIL; - state->driver.event->data = NULL; - state->driver.event->removed_events = (ErtsPollEvents) 0; - break; -#endif case ERTS_EV_TYPE_NONE: break; default: ASSERT(0); break; } - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - remember_removed(state, &pollset); + state->flags = 0; + } else { + ErtsPollEvents new_events = + erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } } } #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0) +# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE) #else # define IS_FD_UNKNOWN(state) ((state) == NULL) #endif static ERTS_INLINE void check_fd_cleanup(ErtsDrvEventState *state, -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState **free_event, -#endif ErtsDrvSelectDataState **free_select, ErtsNifSelectDataState **free_nif) { - erts_aint_t current_cio_time; - ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); - - current_cio_time = erts_atomic_read_acqb(&erts_check_io_time); *free_select = NULL; if (state->driver.select && (state->type != ERTS_EV_TYPE_DRV_SEL) - && !is_iotask_active(&state->driver.select->iniotask, current_cio_time) - && !is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { - + && !is_iotask_active(&state->driver.select->iniotask) + && !is_iotask_active(&state->driver.select->outiotask)) { + *free_select = state->driver.select; state->driver.select = NULL; } @@ -635,382 +618,85 @@ check_fd_cleanup(ErtsDrvEventState *state, state->driver.nif = NULL; } -#if ERTS_CIO_HAVE_DRV_EVENT - *free_event = NULL; - if (state->driver.event - && (state->type != ERTS_EV_TYPE_DRV_EV) - && !is_iotask_active(&state->driver.event->iotask, current_cio_time)) { - - *free_event = state->driver.event; - state->driver.event = NULL; - } -#endif - -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS if (((state->type != ERTS_EV_TYPE_NONE) - | state->remove_cnt -#if ERTS_CIO_HAVE_DRV_EVENT - | (state->driver.event != NULL) -#endif + | (state->driver.nif != NULL) | (state->driver.select != NULL)) == 0) { - hash_erase_drv_ev_state(state); - + erase_drv_ev_state(state); } -#endif } -static ERTS_INLINE int -check_cleanup_active_fd(ErtsSysFdType fd, -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollControlEntry *pce, - int *pce_ix, -#endif - erts_aint_t current_cio_time, - int may_sleep) -{ - ErtsDrvEventState *state; - int active = 0; - erts_mtx_t *mtx = fd_mtx(fd); - void *free_select = NULL; - void *free_nif = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - void *free_event = NULL; -#endif -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents evon = 0, evoff = 0; -#endif - - erts_mtx_lock(mtx); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; +#ifdef __WIN32__ +# define MUST_DEFER(MAY_SLEEP) 1 #else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ - if (state) -#endif - { - if (state->driver.select) { -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)) { - active = 1; - if ((state->events & ERTS_POLL_EV_IN) - && !(state->flags & ERTS_EV_FLAG_DEFER_IN_EV)) { - evoff |= ERTS_POLL_EV_IN; - state->flags |= ERTS_EV_FLAG_DEFER_IN_EV; - } - } - else if (state->flags & ERTS_EV_FLAG_DEFER_IN_EV) { - if (state->events & ERTS_POLL_EV_IN) - evon |= ERTS_POLL_EV_IN; - state->flags &= ~ERTS_EV_FLAG_DEFER_IN_EV; - } - if (is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { - active = 1; - if ((state->events & ERTS_POLL_EV_OUT) - && !(state->flags & ERTS_EV_FLAG_DEFER_OUT_EV)) { - evoff |= ERTS_POLL_EV_OUT; - state->flags |= ERTS_EV_FLAG_DEFER_OUT_EV; - } - } - else if (state->flags & ERTS_EV_FLAG_DEFER_OUT_EV) { - if (state->events & ERTS_POLL_EV_OUT) - evon |= ERTS_POLL_EV_OUT; - state->flags &= ~ERTS_EV_FLAG_DEFER_OUT_EV; - } - if (active) - (void) 0; - else -#else - if (is_iotask_active(&state->driver.select->iniotask, current_cio_time) - || is_iotask_active(&state->driver.select->outiotask, current_cio_time)) - active = 1; - else -#endif - if (state->type != ERTS_EV_TYPE_DRV_SEL) { - free_select = state->driver.select; - state->driver.select = NULL; - } - } - - if (state->driver.nif) { - ErtsPollEvents rm_events = 0; - if (state->driver.nif->in.ddeselect_cnt) { - ASSERT(state->type == ERTS_EV_TYPE_NIF); - ASSERT(state->events & ERTS_POLL_EV_IN); - ASSERT(is_nil(state->driver.nif->in.pid)); - if (may_sleep || state->driver.nif->in.ddeselect_cnt == 1) { - rm_events = ERTS_POLL_EV_IN; - state->driver.nif->in.ddeselect_cnt = 0; - } - } - if (state->driver.nif->out.ddeselect_cnt) { - ASSERT(state->type == ERTS_EV_TYPE_NIF); - ASSERT(state->events & ERTS_POLL_EV_OUT); - ASSERT(is_nil(state->driver.nif->out.pid)); - if (may_sleep || state->driver.nif->out.ddeselect_cnt == 1) { - rm_events |= ERTS_POLL_EV_OUT; - state->driver.nif->out.ddeselect_cnt = 0; - } - } - if (rm_events) { - int do_wake = 0; - state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, - rm_events, 0, &do_wake); - } - if (state->events) - active = 1; - else if (state->type != ERTS_EV_TYPE_NIF) { - free_nif = state->driver.nif; - state->driver.nif = NULL; - } - } - -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) { - if (is_iotask_active(&state->driver.event->iotask, current_cio_time)) { -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents evs = state->events & ~state->driver.event->deferred_events; - if (evs) { - evoff |= evs; - state->driver.event->deferred_events |= evs; - } -#endif - active = 1; - } - else if (state->type != ERTS_EV_TYPE_DRV_EV) { - free_event = state->driver.event; - state->driver.event = NULL; - } -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - else { - ErtsPollEvents evs = state->events & state->driver.event->deferred_events; - if (evs) { - evon |= evs; - state->driver.event->deferred_events = 0; - } - } -#endif - - } -#endif - -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (((state->type != ERTS_EV_TYPE_NONE) | state->remove_cnt | active) == 0) - hash_erase_drv_ev_state(state); -#endif - - } - - erts_mtx_unlock(mtx); - - if (free_select) - free_drv_select_data(free_select); - if (free_nif) - free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); +# define MUST_DEFER(MAY_SLEEP) (MAY_SLEEP) #endif -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - if (evoff) { - ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; - pcep->fd = fd; - pcep->events = evoff; - pcep->on = 0; - } - if (evon) { - ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; - pcep->fd = fd; - pcep->events = evon; - pcep->on = 1; - } -#endif - - return active; -} - -static void -check_cleanup_active_fds(erts_aint_t current_cio_time, int may_sleep) -{ - int six = pollset.active_fd.six; - int eix = pollset.active_fd.eix; - erts_aint32_t no = erts_atomic32_read_dirty(&pollset.active_fd.no); - int size = pollset.active_fd.size; - int ix = six; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - /* every fd might add two entries */ - Uint pce_sz = 2*sizeof(ErtsPollControlEntry)*no; - ErtsPollControlEntry *pctrl_entries = (pce_sz - ? erts_alloc(ERTS_ALC_T_TMP, pce_sz) - : NULL); - int pctrl_ix = 0; -#endif - - while (ix != eix) { - ErtsSysFdType fd = pollset.active_fd.array[ix]; - int nix = ix + 1; - if (nix >= size) - nix = 0; - ASSERT(fd != ERTS_SYS_FD_INVALID); - if (!check_cleanup_active_fd(fd, -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - pctrl_entries, - &pctrl_ix, -#endif - current_cio_time, - may_sleep)) { - no--; - if (ix == six) { -#ifdef DEBUG - pollset.active_fd.array[ix] = ERTS_SYS_FD_INVALID; -#endif - six = nix; - } - else { - pollset.active_fd.array[ix] = pollset.active_fd.array[six]; -#ifdef DEBUG - pollset.active_fd.array[six] = ERTS_SYS_FD_INVALID; -#endif - six++; - if (six >= size) - six = 0; - } - } - ix = nix; - } - -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ASSERT(pctrl_ix <= pce_sz/sizeof(ErtsPollControlEntry)); - if (pctrl_ix) - ERTS_CIO_POLL_CTLV(pollset.ps, pctrl_entries, pctrl_ix); - if (pctrl_entries) - erts_free(ERTS_ALC_T_TMP, pctrl_entries); -#endif - - pollset.active_fd.six = six; - pollset.active_fd.eix = eix; - erts_atomic32_set_relb(&pollset.active_fd.no, no); -} - -static void grow_active_fds(void) -{ - ASSERT(pollset.active_fd.six == pollset.active_fd.eix); - pollset.active_fd.six = 0; - pollset.active_fd.eix = pollset.active_fd.size; - pollset.active_fd.size += ERTS_ACTIVE_FD_INC; - pollset.active_fd.array = erts_realloc(ERTS_ALC_T_ACTIVE_FD_ARR, - pollset.active_fd.array, - pollset.active_fd.size*sizeof(ErtsSysFdType)); -#ifdef DEBUG - { - int i; - for (i = pollset.active_fd.eix + 1; i < pollset.active_fd.size; i++) - pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; - } -#endif -} - -static ERTS_INLINE void -add_active_fd(ErtsSysFdType fd) -{ - int eix = pollset.active_fd.eix; - int size = pollset.active_fd.size; - - pollset.active_fd.array[eix] = fd; - - erts_atomic32_set_relb(&pollset.active_fd.no, - (erts_atomic32_read_dirty(&pollset.active_fd.no) - + 1)); - - eix++; - if (eix >= size) - eix = 0; - pollset.active_fd.eix = eix; - - if (pollset.active_fd.six == eix) { - grow_active_fds(); - } -} - int -ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, - ErlDrvEvent e, - int mode, - int on) +driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) { void (*stop_select_fn)(ErlDrvEvent, void*) = NULL; Port *prt = erts_drvport2port(ix); Eterm id = erts_drvport2id(ix); ErtsSysFdType fd = (ErtsSysFdType) e; ErtsPollEvents ctl_events = (ErtsPollEvents) 0; - ErtsPollEvents new_events, old_events; + ErtsPollEvents old_events; + ErtsPollEvents new_events; + ErtsPollOp ctl_op = ERTS_POLL_OP_MOD; ErtsDrvEventState *state; - int wake_poller; + int wake_poller = 0; int ret; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event = NULL; -#endif ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; #ifdef USE_VM_PROBES DTRACE_CHARBUF(name, 64); #endif + ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO); - if (prt == ERTS_INVALID_ERL_DRV_PORT) + if (prt == ERTS_INVALID_ERL_DRV_PORT) { + ERTS_MSACC_POP_STATE(); return -1; + } ERTS_LC_ASSERT(erts_lc_is_port_locked(prt)); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) { - return -1; - } - if (fd >= max_fds) { - drv_select_large_fd_error(ix, fd, mode, on); - return -1; - } - grow_drv_ev_state(fd); + if (!grow_drv_ev_state(fd)) { + if (fd > 0) drv_select_large_fd_error(ix, fd, mode, on); + ERTS_MSACC_POP_STATE(); + return -1; } #endif erts_mtx_lock(fd_mtx(fd)); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ -#endif + state = get_drv_ev_state(fd); /* may be NULL! */ - if (!on && (mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { - if (IS_FD_UNKNOWN(state)) { - /* fast track to stop_select callback */ - stop_select_fn = prt->drv_ptr->stop_select; -#ifdef USE_VM_PROBES - strncpy(name, prt->drv_ptr->name, - sizeof(DTRACE_CHARBUF_NAME(name))-1); - name[sizeof(name)-1] = '\0'; -#endif - ret = 0; - goto done_unknown; - } - mode |= (ERL_DRV_READ | ERL_DRV_WRITE); - wake_poller = 1; /* to eject fd from pollset (if needed) */ - } - else wake_poller = 0; + DEBUG_PRINT_FD("driver_select(%T, %p, %s, %d)", + state, id, fd, drvmode2str(mode), on); -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state == NULL) { - state = hash_new_drv_ev_state(fd); + if (!on) { + if (IS_FD_UNKNOWN(state)) { + if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + /* fast track to stop_select callback */ + stop_select_fn = prt->drv_ptr->stop_select; + #ifdef USE_VM_PROBES + strncpy(name, prt->drv_ptr->name, + sizeof(DTRACE_CHARBUF_NAME(name))-1); + name[sizeof(name)-1] = '\0'; + #endif + } + ret = 0; + goto done_unknown; + } + else if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + mode |= (ERL_DRV_READ | ERL_DRV_WRITE); + } } -#endif + + state = new_drv_ev_state(state, fd); switch (state->type) { -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: -#endif case ERTS_EV_TYPE_NIF: drv_select_steal(ix, state, mode, on); break; @@ -1032,7 +718,9 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, ASSERT(state->type == ERTS_EV_TYPE_NONE); break; - }} + } + default: break; + } if (mode & ERL_DRV_READ) { if (state->type == ERTS_EV_TYPE_DRV_SEL) { @@ -1040,7 +728,7 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, if (owner != id && is_not_nil(owner)) drv_select_steal(ix, state, mode, on); } - ctl_events |= ERTS_POLL_EV_IN; + ctl_events = ERTS_POLL_EV_IN; } if (mode & ERL_DRV_WRITE) { if (state->type == ERTS_EV_TYPE_DRV_SEL) { @@ -1049,100 +737,105 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, drv_select_steal(ix, state, mode, on); } ctl_events |= ERTS_POLL_EV_OUT; - } + } + ASSERT((state->type == ERTS_EV_TYPE_DRV_SEL) || (state->type == ERTS_EV_TYPE_NONE && !state->events)); - if (!on && !(state->flags & ERTS_EV_FLAG_USED) - && state->events && !(state->events & ~ctl_events)) { - /* Old driver removing all events. At least wake poller. - It will not make close() 100% safe but it will prevent - actions delayed by poll timeout. */ - wake_poller = 1; - } + old_events = state->events; - new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller); + if (on) { + ctl_events &= ~old_events; + state->events |= ctl_events; + if (ctl_events & ERTS_POLL_EV_IN && (!state->driver.select || !is_iotask_active(&state->driver.select->iniotask))) + state->active_events |= ERTS_POLL_EV_IN; + if (ctl_events & ERTS_POLL_EV_OUT && (!state->driver.select || !is_iotask_active(&state->driver.select->outiotask))) + state->active_events |= ERTS_POLL_EV_OUT; + if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) { + ctl_op = ERTS_POLL_OP_ADD; + } + } + else { + ctl_events &= old_events; + state->events &= ~ctl_events; + state->active_events &= ~ctl_events; - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.select->inport = NIL; - state->driver.select->outport = NIL; - } - ret = -1; - goto done; + if (!state->events) { + if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE) + ctl_op = ERTS_POLL_OP_DEL; + } } - old_events = state->events; + if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { - ASSERT(on - ? (new_events == (state->events | ctl_events)) - : (new_events == (state->events & ~ctl_events))); + new_events = erts_io_control_wakeup(state, ctl_op, + state->active_events, + &wake_poller); - ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL - || state->type == ERTS_EV_TYPE_NONE); + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE); + } - state->events = new_events; - if (ctl_events) { - if (on) { + if (on) { + if (ctl_events) { if (!state->driver.select) - state->driver.select = alloc_drv_select_data(); + state->driver.select = alloc_drv_select_data(state->fd); if (state->type == ERTS_EV_TYPE_NONE) state->type = ERTS_EV_TYPE_DRV_SEL; ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); - if (ctl_events & ERTS_POLL_EV_IN) + if (ctl_events & ERTS_POLL_EV_IN) { state->driver.select->inport = id; - if (ctl_events & ERTS_POLL_EV_OUT) + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) + iready(id, state); + } + if (ctl_events & ERTS_POLL_EV_OUT) { state->driver.select->outport = id; - if (mode & ERL_DRV_USE) { + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) + oready(id, state); + } + if (mode & ERL_DRV_USE) state->flags |= ERTS_EV_FLAG_USED; - } - } - else { /* off */ - if (state->type == ERTS_EV_TYPE_DRV_SEL) { - if (ctl_events & ERTS_POLL_EV_IN) { - abort_tasks(state, ERL_DRV_READ); - state->driver.select->inport = NIL; - } - if (ctl_events & ERTS_POLL_EV_OUT) { - abort_tasks(state, ERL_DRV_WRITE); - state->driver.select->outport = NIL; - } - if (new_events == 0) { - if (old_events != 0) { - remember_removed(state, &pollset); - } - if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - } - /*else keep it, as fd will probably be selected upon again */ - } - } - if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { - erts_driver_t* drv_ptr = prt->drv_ptr; - ASSERT(new_events==0); - if (state->remove_cnt == 0 || !wake_poller) { - /* Safe to close fd now as it is not in pollset - or there was no need to eject fd (kernel poll) */ - stop_select_fn = drv_ptr->stop_select; + } + } + else { /* off */ + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + if (ctl_events & ERTS_POLL_EV_IN) { + abort_tasks(state, ERL_DRV_READ); + state->driver.select->inport = NIL; + } + if (ctl_events & ERTS_POLL_EV_OUT) { + abort_tasks(state, ERL_DRV_WRITE); + state->driver.select->outport = NIL; + } + if (state->events == 0) { + if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + } + /*else keep it, as fd will probably be selected upon again */ + } + } + if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + erts_driver_t* drv_ptr = prt->drv_ptr; + ASSERT(state->events==0); + if (!wake_poller) { + /* Safe to close fd now as it is not in pollset + or there was no need to eject fd (kernel poll) */ + stop_select_fn = drv_ptr->stop_select; #ifdef USE_VM_PROBES - strncpy(name, prt->drv_ptr->name, sizeof(name)-1); - name[sizeof(name)-1] = '\0'; + strncpy(name, prt->drv_ptr->name, sizeof(name)-1); + name[sizeof(name)-1] = '\0'; #endif - } - else { - /* Not safe to close fd, postpone stop_select callback. */ - state->type = ERTS_EV_TYPE_STOP_USE; - state->driver.stop.drv_ptr = drv_ptr; - if (drv_ptr->handle) { - erts_ddll_reference_referenced_driver(drv_ptr->handle); - } - } - } - } + } + else { + /* Not safe to close fd, postpone stop_select callback. */ + state->type = ERTS_EV_TYPE_STOP_USE; + state->driver.stop.drv_ptr = drv_ptr; + if (drv_ptr->handle) { + erts_ddll_reference_referenced_driver(drv_ptr->handle); + } + } + } } ret = 0; @@ -1150,9 +843,6 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, done: check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif &free_select, &free_nif); @@ -1170,61 +860,47 @@ done_unknown: if (free_nif) free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif + ERTS_MSACC_POP_STATE(); + return ret; } int -ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, - ErlNifEvent e, - enum ErlNifSelectFlags mode, - void* obj, - const ErlNifPid* pid, - Eterm ref) +enif_select(ErlNifEnv* env, + ErlNifEvent e, + enum ErlNifSelectFlags mode, + void* obj, + const ErlNifPid* pid, + Eterm ref) { int on; ErtsResource* resource = DATA_TO_RESOURCE(obj); ErtsSysFdType fd = (ErtsSysFdType) e; ErtsPollEvents ctl_events = (ErtsPollEvents) 0; - ErtsPollEvents new_events, old_events; + ErtsPollEvents old_events; + ErtsPollOp ctl_op = ERTS_POLL_OP_MOD; ErtsDrvEventState *state; - int wake_poller; - int ret; + int ret, wake_poller = 0; enum { NO_STOP=0, CALL_STOP, CALL_STOP_AND_RELEASE } call_stop = NO_STOP; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event = NULL; -#endif ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; -#ifdef USE_VM_PROBES - DTRACE_CHARBUF(name, 64); -#endif ASSERT(!(resource->monitors && resource->monitors->is_dying)); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) { - return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; - } - if (fd >= max_fds) { - nif_select_large_fd_error(fd, mode, resource, ref); - return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; - } - grow_drv_ev_state(fd); + if (!grow_drv_ev_state(fd)) { + if (fd > 0) nif_select_large_fd_error(fd, mode, resource, ref); + return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; } #endif erts_mtx_lock(fd_mtx(fd)); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ -#endif + state = get_drv_ev_state(fd); /* may be NULL! */ + + DEBUG_PRINT_FD("enif_select(%T, %d, %s, %p, %T, %T)", + state, env->proc->common.id, fd, nifmode2str(mode), resource, + pid ? pid->pid : THE_NON_VALUE, ref); if (mode & ERL_NIF_SELECT_STOP) { ASSERT(resource->type->stop); @@ -1236,13 +912,12 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } on = 0; mode = ERL_DRV_READ | ERL_DRV_WRITE | ERL_DRV_USE; - wake_poller = 1; /* to eject fd from pollset (if needed) */ ctl_events = ERTS_POLL_EV_IN | ERTS_POLL_EV_OUT; + ctl_op = ERTS_POLL_OP_DEL; } else { on = 1; ASSERT(mode); - wake_poller = 0; if (mode & ERL_DRV_READ) { ctl_events |= ERTS_POLL_EV_IN; } @@ -1251,11 +926,7 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } } -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state == NULL) { - state = hash_new_drv_ev_state(fd); - } -#endif + state = new_drv_ev_state(state,fd); switch (state->type) { case ERTS_EV_TYPE_NIF: @@ -1266,9 +937,6 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, if (state->driver.stop.resource != resource) nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, ref); break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: -#endif case ERTS_EV_TYPE_DRV_SEL: nif_select_steal(state, mode, resource, ref); break; @@ -1289,37 +957,52 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } ASSERT(state->type == ERTS_EV_TYPE_NONE); break; - }} + } + default: break; + } ASSERT((state->type == ERTS_EV_TYPE_NIF) || (state->type == ERTS_EV_TYPE_NONE && !state->events)); - new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller); + old_events = state->events; - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (state->type == ERTS_EV_TYPE_NIF && !state->events) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.nif->in.pid = NIL; - state->driver.nif->out.pid = NIL; - state->driver.nif->in.ddeselect_cnt = 0; - state->driver.nif->out.ddeselect_cnt = 0; - state->driver.stop.resource = NULL; - } - ret = INT_MIN | ERL_NIF_SELECT_FAILED; - goto done; + if (on) { + ctl_events &= ~old_events; + state->events |= ctl_events; + state->active_events |= ctl_events; + if (state->type == ERTS_EV_TYPE_NONE) + ctl_op = ERTS_POLL_OP_ADD; + } + else { + ctl_events &= old_events; + state->events &= ~ctl_events; + state->active_events &= ~ctl_events; } - old_events = state->events; + if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { + ErtsPollEvents new_events; - ASSERT(on - ? (new_events == (state->events | ctl_events)) - : (new_events == (state->events & ~ctl_events))); + new_events = erts_io_control_wakeup(state, ctl_op, + state->active_events, + &wake_poller); + + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->type == ERTS_EV_TYPE_NIF && !old_events) { + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + state->driver.nif->in.pid = NIL; + state->driver.nif->out.pid = NIL; + state->driver.stop.resource = NULL; + } + ret = INT_MIN | ERL_NIF_SELECT_FAILED; + goto done; + } + ASSERT(new_events == state->events); + } ASSERT(state->type == ERTS_EV_TYPE_NIF || state->type == ERTS_EV_TYPE_NONE); - state->events = new_events; if (on) { const Eterm recipient = pid ? pid->pid : env->proc->common.id; Uint32* refn; @@ -1332,7 +1015,7 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } ASSERT(state->type == ERTS_EV_TYPE_NIF); ASSERT(state->driver.stop.resource == resource); - if (ctl_events & ERTS_POLL_EV_IN) { + if (mode & ERL_DRV_READ) { state->driver.nif->in.pid = recipient; if (is_immed(ref)) { state->driver.nif->in.immed = ref; @@ -1340,13 +1023,11 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, ASSERT(is_internal_ref(ref)); refn = internal_ref_numbers(ref); state->driver.nif->in.immed = THE_NON_VALUE; - state->driver.nif->in.refn[0] = refn[0]; - state->driver.nif->in.refn[1] = refn[1]; - state->driver.nif->in.refn[2] = refn[2]; + sys_memcpy(state->driver.nif->in.refn, refn, + sizeof(state->driver.nif->in.refn)); } - state->driver.nif->in.ddeselect_cnt = 0; } - if (ctl_events & ERTS_POLL_EV_OUT) { + if (mode & ERL_DRV_WRITE) { state->driver.nif->out.pid = recipient; if (is_immed(ref)) { state->driver.nif->out.immed = ref; @@ -1354,11 +1035,9 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, ASSERT(is_internal_ref(ref)); refn = internal_ref_numbers(ref); state->driver.nif->out.immed = THE_NON_VALUE; - state->driver.nif->out.refn[0] = refn[0]; - state->driver.nif->out.refn[1] = refn[1]; - state->driver.nif->out.refn[2] = refn[2]; + sys_memcpy(state->driver.nif->out.refn, refn, + sizeof(state->driver.nif->out.refn)); } - state->driver.nif->out.ddeselect_cnt = 0; } ret = 0; } @@ -1366,14 +1045,9 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, if (state->type == ERTS_EV_TYPE_NIF) { state->driver.nif->in.pid = NIL; state->driver.nif->out.pid = NIL; - state->driver.nif->in.ddeselect_cnt = 0; - state->driver.nif->out.ddeselect_cnt = 0; - if (old_events != 0) { - remember_removed(state, &pollset); - } } - ASSERT(new_events==0); - if (state->remove_cnt == 0 || !wake_poller) { + ASSERT(state->events==0); + if (!wake_poller) { /* * Safe to close fd now as it is not in pollset * or there was no need to eject fd (kernel poll) @@ -1405,9 +1079,6 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, done: check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif &free_select, &free_nif); @@ -1424,160 +1095,7 @@ done_unknown: if (free_nif) free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif - return ret; -} - - -int -ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, - ErlDrvEvent e, - ErlDrvEventData event_data) -{ -#if !ERTS_CIO_HAVE_DRV_EVENT - return -1; -#else - ErtsSysFdType fd = (ErtsSysFdType) e; - ErtsPollEvents events; - ErtsPollEvents add_events; - ErtsPollEvents remove_events; - Eterm id = erts_drvport2id(ix); - ErtsDrvEventState *state; - int do_wake = 0; - int ret; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event; -#endif - ErtsDrvSelectDataState *free_select; - ErtsNifSelectDataState *free_nif; - Port *prt = erts_drvport2port(ix); - - if (prt == ERTS_INVALID_ERL_DRV_PORT) - return -1; - - ERTS_LC_ASSERT(erts_lc_is_port_locked(prt)); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) - return -1; - if (fd >= max_fds) { - event_large_fd_error(ix, fd, event_data); - return -1; - } - grow_drv_ev_state(fd); - } -#endif - - erts_mtx_lock(fd_mtx(fd)); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - /* Could use hash_new directly, but want to keep the normal case fast */ - state = hash_get_drv_ev_state(fd); - if (state == NULL) { - state = hash_new_drv_ev_state(fd); - } -#endif - - switch (state->type) { - case ERTS_EV_TYPE_DRV_EV: - if (state->driver.event->port == id) break; - /*fall through*/ - case ERTS_EV_TYPE_DRV_SEL: - drv_event_steal(ix, state, event_data); - break; - case ERTS_EV_TYPE_STOP_USE: { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_drv_event_op(dsbufp, ix, fd, event_data); - steal_pending_stop_use(dsbufp, ix, state, 0, 1); - break; - } - } - - ASSERT(state->type == ERTS_EV_TYPE_DRV_EV - || state->type == ERTS_EV_TYPE_NONE); - - events = state->events; - - if (!event_data) { - remove_events = events; - add_events = 0; - } - else { - remove_events = ~event_data->events & events; - add_events = ~events & event_data->events; - } - - if (add_events) { - events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, add_events, 1, &do_wake); - if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - ret = -1; - goto done; - } - } - if (remove_events) { - events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, remove_events, 0, &do_wake); - if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - ret = -1; - goto done; - } - } - if (event_data && event_data->events != 0) { - if (state->type == ERTS_EV_TYPE_DRV_EV) { - state->driver.event->removed_events &= ~add_events; - state->driver.event->removed_events |= remove_events; - } - else { - if (!state->driver.event) - state->driver.event = alloc_drv_event_data(); - state->driver.event->port = id; - state->driver.event->removed_events = (ErtsPollEvents) 0; - state->type = ERTS_EV_TYPE_DRV_EV; - } - state->driver.event->data = event_data; - } - else { - if (state->type == ERTS_EV_TYPE_DRV_EV) { - abort_tasks(state, 0); - state->driver.event->port = NIL; - state->driver.event->data = NULL; - state->driver.event->removed_events = (ErtsPollEvents) 0; - } - state->type = ERTS_EV_TYPE_NONE; - remember_removed(state, &pollset); - } - state->events = events; - ASSERT(event_data ? events == event_data->events : events == 0); - - ret = 0; - -done: - - check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif - &free_select, - &free_nif); - - erts_mtx_unlock(fd_mtx(fd)); - - if (free_select) - free_drv_select_data(free_select); - if (free_nif) - free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif - return ret; -#endif } static ERTS_INLINE int @@ -1611,11 +1129,6 @@ need2steal(ErtsDrvEventState *state, int mode) do_steal = 1; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - do_steal |= chk_stale(state->driver.event->port, state, 0); - break; -#endif case ERTS_EV_TYPE_STOP_USE: case ERTS_EV_TYPE_STOP_NIF: ASSERT(0); @@ -1649,7 +1162,7 @@ print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id) static void steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) { - erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd); switch (state->type) { case ERTS_EV_TYPE_DRV_SEL: { int deselect_mode = 0; @@ -1673,7 +1186,7 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) if (deselect_mode) deselect(state, deselect_mode); else { - erts_dsprintf(dsbufp, "no one", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "no one", (int) state->fd); ASSERT(0); } erts_dsprintf(dsbufp, "\n"); @@ -1698,30 +1211,13 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) erts_dsprintf(dsbufp, "\n"); break; } -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: { - Eterm eid = state->driver.event->port; - if (is_nil(eid)) { - erts_dsprintf(dsbufp, "no one", (int) state->fd); - ASSERT(0); - } - else { - erts_dsprintf(dsbufp, "event driver "); - print_driver_name(dsbufp, eid); - erts_dsprintf(dsbufp, "%T ", eid); - } - erts_dsprintf(dsbufp, "\n"); - deselect(state, 0); - break; - } -#endif case ERTS_EV_TYPE_STOP_USE: case ERTS_EV_TYPE_STOP_NIF: { ASSERT(0); break; } default: - erts_dsprintf(dsbufp, "no one\n", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "no one\n", (int) state->fd); ASSERT(0); } } @@ -1735,7 +1231,7 @@ print_drv_select_op(erts_dsprintf_buf_t *dsbufp, "driver_select(%p, %d,%s%s%s%s, %d) " "by ", ix, - (int) GET_FD(fd), + (int) fd, mode & ERL_DRV_READ ? " ERL_DRV_READ" : "", mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "", mode & ERL_DRV_USE ? " ERL_DRV_USE" : "", @@ -1752,7 +1248,7 @@ print_nif_select_op(erts_dsprintf_buf_t *dsbufp, { erts_dsprintf(dsbufp, "enif_select(_, %d,%s%s%s, %T:%T, %T) ", - (int) GET_FD(fd), + (int) fd, mode & ERL_NIF_SELECT_READ ? " READ" : "", mode & ERL_NIF_SELECT_WRITE ? " WRITE" : "", mode & ERL_NIF_SELECT_STOP ? " STOP" : "", @@ -1791,7 +1287,7 @@ large_fd_error_common(erts_dsprintf_buf_t *dsbufp, ErtsSysFdType fd) { erts_dsprintf(dsbufp, "fd=%d is larger than the largest allowed fd=%d\n", - (int) fd, max_fds - 1); + (int) fd, drv_ev_state.max_fds - 1); } static void @@ -1847,7 +1343,7 @@ steal_pending_stop_use(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix, erts_ddll_dereference_driver(state->driver.stop.drv_ptr->handle); } state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; + state->flags = 0; state->driver.stop.drv_ptr = NULL; } else { @@ -1884,9 +1380,9 @@ steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource* resource, erts_dsprintf(dsbufp, "called before stop was called for NIF resource %T:%T\n", rt->module, rt->name); - enif_release_resource(state->driver.stop.resource); + enif_release_resource(state->driver.stop.resource->data); state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; + state->flags = 0; state->driver.stop.resource = NULL; } else { @@ -1896,59 +1392,9 @@ steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource* resource, } - -#if ERTS_CIO_HAVE_DRV_EVENT - -static void -print_drv_event_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) -{ - Port *pp = erts_drvport2port(ix); - erts_dsprintf(dsbufp, "driver_event(%p, %d, ", ix, (int) fd); - if (!event_data) - erts_dsprintf(dsbufp, "NULL"); - else - erts_dsprintf(dsbufp, "{0x%x, 0x%x}", - (unsigned int) event_data->events, - (unsigned int) event_data->revents); - erts_dsprintf(dsbufp, ") by "); - if (pp != ERTS_INVALID_ERL_DRV_PORT) - print_driver_name(dsbufp, pp->common.id); - erts_dsprintf(dsbufp, "driver %T ", pp != ERTS_INVALID_ERL_DRV_PORT ? pp->common.id : NIL); -} - -static void -drv_event_steal(ErlDrvPort ix, ErtsDrvEventState *state, ErlDrvEventData event_data) -{ - if (need2steal(state, ERL_DRV_READ|ERL_DRV_WRITE)) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_drv_event_op(dsbufp, ix, state->fd, event_data); - steal(dsbufp, state, ERL_DRV_READ|ERL_DRV_WRITE); - erts_send_error_to_logger_nogl(dsbufp); - } - else if (state->type == ERTS_EV_TYPE_DRV_SEL) { - ASSERT(state->flags & ERTS_EV_FLAG_USED); - deselect(state, 0); - } -} - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void -event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) -{ - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_drv_event_op(dsbufp, ix, fd, event_data); - erts_dsprintf(dsbufp, "failed: "); - large_fd_error_common(dsbufp, fd); - erts_send_error_to_logger_nogl(dsbufp); -} -#endif -#endif - static ERTS_INLINE int io_task_schedule_allowed(ErtsDrvEventState *state, - ErtsPortTaskType type, - erts_aint_t current_cio_time) + ErtsPortTaskType type) { ErtsIoTask *io_task; @@ -1956,71 +1402,53 @@ io_task_schedule_allowed(ErtsDrvEventState *state, case ERTS_PORT_TASK_INPUT: if (!state->driver.select) return 0; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - return 0; -#endif io_task = &state->driver.select->iniotask; break; case ERTS_PORT_TASK_OUTPUT: if (!state->driver.select) return 0; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - return 0; -#endif io_task = &state->driver.select->outiotask; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_PORT_TASK_EVENT: - if (!state->driver.event) - return 0; - if (state->driver.select) - return 0; - io_task = &state->driver.event->iotask; - break; -#endif default: ERTS_INTERNAL_ERROR("Invalid I/O-task type"); return 0; } - return !is_iotask_active(io_task, current_cio_time); + return !is_iotask_active(io_task); } static ERTS_INLINE void -iready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) +iready(Eterm id, ErtsDrvEventState *state) { if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_INPUT, - current_cio_time)) { + ERTS_PORT_TASK_INPUT)) { ErtsIoTask *iotask = &state->driver.select->iniotask; - erts_atomic_set_nob(&iotask->executed_time, current_cio_time); if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_INPUT, (ErlDrvEvent) state->fd) != 0) { stale_drv_select(id, state, ERL_DRV_READ); - } - add_active_fd(state->fd); + } else { + DEBUG_PRINT_FD("schedule ready_input(%T, %d)", + state, id, state->fd); + } } } static ERTS_INLINE void -oready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) +oready(Eterm id, ErtsDrvEventState *state) { if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_OUTPUT, - current_cio_time)) { + ERTS_PORT_TASK_OUTPUT)) { ErtsIoTask *iotask = &state->driver.select->outiotask; - erts_atomic_set_nob(&iotask->executed_time, current_cio_time); if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_OUTPUT, (ErlDrvEvent) state->fd) != 0) { stale_drv_select(id, state, ERL_DRV_WRITE); - } - add_active_fd(state->fd); + } else { + DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd); + } } } @@ -2072,131 +1500,59 @@ send_event_tuple(struct erts_nif_select_event* e, ErtsResource* resource, erts_proc_unlock(rp, rp_locks); } - -#if ERTS_CIO_HAVE_DRV_EVENT -static ERTS_INLINE void -eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data, - erts_aint_t current_cio_time) -{ - if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_EVENT, - current_cio_time)) { - ErtsIoTask *iotask = &state->driver.event->iotask; - erts_atomic_set_nob(&iotask->executed_time, current_cio_time); - if (erts_port_task_schedule(id, - &iotask->task, - ERTS_PORT_TASK_EVENT, - (ErlDrvEvent) state->fd, - event_data) != 0) { - stale_drv_select(id, state, 0); - } - add_active_fd(state->fd); - } -} -#endif - static void bad_fd_in_pollset(ErtsDrvEventState *, Eterm inport, Eterm outport); - void -ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set) +erts_check_io_interrupt(ErtsPollThread *psi, int set) { - ERTS_CIO_POLL_INTR(pollset.ps, set); + if (psi) { +#if ERTS_POLL_USE_FALLBACK + if (psi->ps == get_fallback()) { + erts_poll_interrupt_flbk(psi->ps, set); + return; + } +#endif + erts_poll_interrupt(psi->ps, set); + } } -void -ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set, - ErtsMonotonicTime timeout_time) -{ - ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, timeout_time); +ErtsPollThread * +erts_create_pollset_thread(int id) { + return psiv+id; } -#if !ERTS_CIO_DEFER_ACTIVE_EVENTS -/* - * Number of ignored events, for a lingering fd added by enif_select(), - * until we deselect fd-event from pollset. - */ -# define ERTS_NIF_DELAYED_DESELECT 20 -#else -/* Disable delayed deselect as pollset cannot handle active events */ -# define ERTS_NIF_DELAYED_DESELECT 1 -#endif - void -ERTS_CIO_EXPORT(erts_check_io)(int do_wait) +erts_check_io(ErtsPollThread *psi) { - ErtsPollResFd *pollres; int pollres_len; - ErtsMonotonicTime timeout_time; int poll_ret, i; - erts_aint_t current_cio_time; - ErtsSchedulerData *esdp = erts_get_scheduler_data(); - - ASSERT(esdp); + ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO); restart: -#ifdef ERTS_BREAK_REQUESTED - if (ERTS_BREAK_REQUESTED) - erts_do_break_handling(); -#endif - -#ifdef ERTS_SIGNAL_STATE /* ifndef ERTS_SMP */ - if (ERTS_SIGNAL_STATE) { - erts_handle_signal_state(); - } -#endif - - /* Figure out timeout value */ - timeout_time = (do_wait - ? erts_check_next_timeout_time(esdp) - : ERTS_POLL_NO_TIMEOUT /* poll only */); - - /* - * No need for an atomic inc op when incrementing - * erts_check_io_time, since only one thread can - * check io at a time. - */ - current_cio_time = erts_atomic_read_dirty(&erts_check_io_time); - current_cio_time++; - erts_atomic_set_relb(&erts_check_io_time, current_cio_time); - - check_cleanup_active_fds(current_cio_time, - timeout_time != ERTS_POLL_NO_TIMEOUT); - #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif - pollres_len = erts_atomic32_read_dirty(&pollset.active_fd.no) + ERTS_CHECK_IO_POLL_RES_LEN; + pollres_len = psi->pollres_len; - pollres = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollResFd)*pollres_len); +#if ERTS_POLL_USE_FALLBACK + if (psi->ps == get_fallback()) { - erts_atomic_set_nob(&pollset.in_poll_wait, 1); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len); - poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, timeout_time); + } else +#endif + { + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len); + } #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif -#ifdef ERTS_BREAK_REQUESTED - if (ERTS_BREAK_REQUESTED) - erts_do_break_handling(); -#endif - - -#ifdef ERTS_SIGNAL_STATE /* ifndef ERTS_SMP */ - if (ERTS_SIGNAL_STATE) { - erts_handle_signal_state(); - } -#endif - - if (poll_ret != 0) { - erts_atomic_set_nob(&pollset.in_poll_wait, 0); - forget_removed(&pollset); - erts_free(ERTS_ALC_T_TMP, pollres); + if (poll_ret == EAGAIN) { goto restart; } @@ -2212,65 +1568,86 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) erl_errno_id(poll_ret), poll_ret); erts_send_error_to_logger_nogl(dsbufp); } + ERTS_MSACC_POP_STATE(); return; } for (i = 0; i < pollres_len; i++) { - ErtsSysFdType fd = (ErtsSysFdType) pollres[i].fd; + erts_driver_t* drv_ptr = NULL; + ErtsResource* resource = NULL; + ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; + ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]); ErtsDrvEventState *state; + ErtsPollEvents revents; erts_mtx_lock(fd_mtx(fd)); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[ (int) fd]; -#else - state = hash_get_drv_ev_state(fd); + state = get_drv_ev_state(fd); + if (!state) { - goto next_pollres; + erts_mtx_unlock(fd_mtx(fd)); + continue; } -#endif - /* Skip this fd if it was removed from pollset */ - if (is_removed(state)) { - goto next_pollres; - } + revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); + + DEBUG_PRINT_FD("triggered %s", state, ev2str(revents)); + + if (revents & ERTS_POLL_EV_ERR) { + /* + * Handle error events by triggering all in/out events + * that has been selected on. + * We *do not* want to call a callback that corresponds + * to an event not selected. + */ + revents = state->active_events; + state->active_events = 0; + } else { + + /* Disregard any events that are not active at the moment, + for instance this could happen if the driver/nif does + select/deselect in rapid succession. */ + revents &= state->active_events | ERTS_POLL_EV_NVAL; + state->active_events &= ~revents; + + /* Reactivate the poll op if there are still active events */ + if (state->active_events) { + ErtsPollEvents new_events; + DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events)); + + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + + /* Unable to re-enable the fd, signal all callbacks */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + revents |= state->active_events; + state->active_events = 0; + } + } + } switch (state->type) { case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */ - ErtsPollEvents revents = pollres[i].events; - - if (revents & ERTS_POLL_EV_ERR) { - /* - * Handle error events by triggering all in/out events - * that the driver has selected. - * We *do not* want to call a callback that corresponds - * to an event not selected. - */ - revents = state->events; - } - else { - revents &= (state->events | ERTS_POLL_EV_NVAL); - } if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { if (revents & ERTS_POLL_EV_OUT) { - oready(state->driver.select->outport, state, current_cio_time); + oready(state->driver.select->outport, state); } /* Someone might have deselected input since revents was read (true also on the non-smp emulator since oready() may have been called); therefore, update revents... */ - revents &= state->events; + revents &= state->events; if (revents & ERTS_POLL_EV_IN) { - iready(state->driver.select->inport, state, current_cio_time); + iready(state->driver.select->inport, state); } } else if (revents & ERTS_POLL_EV_NVAL) { bad_fd_in_pollset(state, state->driver.select->inport, state->driver.select->outport); - add_active_fd(state->fd); + check_fd_cleanup(state, &free_select, &free_nif); } break; } @@ -2279,108 +1656,116 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) struct erts_nif_select_event in = {NIL}; struct erts_nif_select_event out = {NIL}; ErtsResource* resource = NULL; - ErtsPollEvents revents = pollres[i].events; - - if (revents & ERTS_POLL_EV_ERR) { - /* - * Handle error events by triggering all in/out events - * that the NIF has selected. - * We *do not* want to send a message that corresponds - * to an event not selected. - */ - revents = state->events; - } - else { - revents &= (state->events | ERTS_POLL_EV_NVAL); - } if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { if (revents & ERTS_POLL_EV_OUT) { if (is_not_nil(state->driver.nif->out.pid)) { out = state->driver.nif->out; resource = state->driver.stop.resource; - state->driver.nif->out.ddeselect_cnt = ERTS_NIF_DELAYED_DESELECT; state->driver.nif->out.pid = NIL; - add_active_fd(state->fd); - } - else { - ASSERT(state->driver.nif->out.ddeselect_cnt >= 2); - state->driver.nif->out.ddeselect_cnt--; } } if (revents & ERTS_POLL_EV_IN) { if (is_not_nil(state->driver.nif->in.pid)) { in = state->driver.nif->in; resource = state->driver.stop.resource; - state->driver.nif->in.ddeselect_cnt = ERTS_NIF_DELAYED_DESELECT; state->driver.nif->in.pid = NIL; - add_active_fd(state->fd); - } - else { - ASSERT(state->driver.nif->in.ddeselect_cnt >= 2); - state->driver.nif->in.ddeselect_cnt--; } } + state->events &= ~revents; } else if (revents & ERTS_POLL_EV_NVAL) { bad_fd_in_pollset(state, NIL, NIL); - add_active_fd(state->fd); + check_fd_cleanup(state, &free_select, &free_nif); } erts_mtx_unlock(fd_mtx(fd)); + if (is_not_nil(in.pid)) { send_event_tuple(&in, resource, am_ready_input); } if (is_not_nil(out.pid)) { send_event_tuple(&out, resource, am_ready_output); } - goto next_pollres_unlocked; + continue; } -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: { /* Requested via driver_event()... */ - ErlDrvEventData event_data; - ErtsPollEvents revents; - ASSERT(state->driver.event); - ASSERT(state->driver.event->data); - event_data = state->driver.event->data; - revents = pollres[i].events; - revents &= ~state->driver.event->removed_events; - - if (revents) { - event_data->events = state->events; - event_data->revents = revents; - eready(state->driver.event->port, state, event_data, current_cio_time); - } - break; - } -#endif + case ERTS_EV_TYPE_STOP_NIF: { + resource = state->driver.stop.resource; + state->type = ERTS_EV_TYPE_NONE; + goto case_ERTS_EV_TYPE_NONE; + } + case ERTS_EV_TYPE_STOP_USE: { +#if ERTS_POLL_USE_FALLBACK + ASSERT(psi->ps == get_fallback()); +#endif + drv_ptr = state->driver.stop.drv_ptr; + state->type = ERTS_EV_TYPE_NONE; + /* fallthrough */ case ERTS_EV_TYPE_NONE: /* Deselected ... */ + case_ERTS_EV_TYPE_NONE: + ASSERT(!state->events && !state->active_events && !state->flags); + check_fd_cleanup(state, &free_select, &free_nif); break; + } default: { /* Error */ erts_dsprintf_buf_t *dsbufp; dsbufp = erts_create_logger_dsbuf(); erts_dsprintf(dsbufp, "Invalid event request type for fd in erts_poll()! " - "fd=%d, event request type=%sd\n", (int) state->fd, + "fd=%d, event request type=%d\n", (int) state->fd, (int) state->type); ASSERT(0); deselect(state, 0); - add_active_fd(state->fd); break; } } - next_pollres:; erts_mtx_unlock(fd_mtx(fd)); - next_pollres_unlocked:; - } - erts_atomic_set_nob(&pollset.in_poll_wait, 0); - erts_free(ERTS_ALC_T_TMP, pollres); - forget_removed(&pollset); + if (drv_ptr) { + int was_unmasked = erts_block_fpe(); + DTRACE1(driver_stop_select, drv_ptr->name); + LTTNG1(driver_stop_select, drv_ptr->name); + (*drv_ptr->stop_select)((ErlDrvEvent) fd, NULL); + erts_unblock_fpe(was_unmasked); + if (drv_ptr->handle) { + erts_ddll_dereference_driver(drv_ptr->handle); + } + } + if (resource) { + erts_resource_stop(resource, (ErlNifEvent)fd, 1); + enif_release_resource(resource->data); + } + if (free_select) + free_drv_select_data(free_select); + if (free_nif) + free_nif_select_data(free_nif); + } + + /* The entire pollres array was filled with events, + * grow it for the next call. We do this for two reasons: + * 1. Pulling out more events in on go will increase throughput + * 2. If the polling implementation is not fair, this will make + * sure that we get all fds that we can. i.e. if 12 fds are + * constantly active, but we only have a pollres_len of 10, + * two of the fds may never be triggered depending on what the + * kernel decides to do. + **/ + if (pollres_len == psi->pollres_len) { + int ev_state_len = drv_ev_state_len(); + erts_free(ERTS_ALC_T_POLLSET, psi->pollres); + psi->pollres_len *= 2; + /* Never grow it larger than the current drv_ev_state.len size */ + if (psi->pollres_len > ev_state_len) + psi->pollres_len = ev_state_len; + psi->pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * psi->pollres_len); + } + + ERTS_MSACC_POP_STATE(); } static void @@ -2474,16 +1859,16 @@ static int drv_ev_state_cmp(void *des1, void *des2) static void *drv_ev_state_alloc(void *des_tmpl) { ErtsDrvEventState *evstate; - erts_spin_lock(&state_prealloc_lock); - if (state_prealloc_first == NULL) { - erts_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + if (drv_ev_state.prealloc_first == NULL) { + erts_spin_unlock(&drv_ev_state.prealloc_lock); evstate = (ErtsDrvEventState *) erts_alloc(ERTS_ALC_T_DRV_EV_STATE, sizeof(ErtsDrvEventState)); } else { - evstate = state_prealloc_first; - state_prealloc_first = (ErtsDrvEventState *) evstate->hb.next; - --num_state_prealloc; - erts_spin_unlock(&state_prealloc_lock); + evstate = drv_ev_state.prealloc_first; + drv_ev_state.prealloc_first = (ErtsDrvEventState *) evstate->hb.next; + --drv_ev_state.num_prealloc; + erts_spin_unlock(&drv_ev_state.prealloc_lock); } /* XXX: Already valid data if prealloced, could ignore template! */ *evstate = *((ErtsDrvEventState *) des_tmpl); @@ -2493,58 +1878,200 @@ static void *drv_ev_state_alloc(void *des_tmpl) static void drv_ev_state_free(void *des) { - erts_spin_lock(&state_prealloc_lock); - ((ErtsDrvEventState *) des)->hb.next = &state_prealloc_first->hb; - state_prealloc_first = (ErtsDrvEventState *) des; - ++num_state_prealloc; - erts_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + ((ErtsDrvEventState *) des)->hb.next = &drv_ev_state.prealloc_first->hb; + drv_ev_state.prealloc_first = (ErtsDrvEventState *) des; + ++drv_ev_state.num_prealloc; + erts_spin_unlock(&drv_ev_state.prealloc_lock); } #endif +#define ERTS_MAX_NO_OF_POLL_THREADS ERTS_MAX_NO_OF_SCHEDULERS + +static char * +get_arg(char* rest, char** argv, int* ip) +{ + int i = *ip; + if (*rest == '\0') { + if (argv[i+1] == NULL) { + erts_fprintf(stderr, "too few arguments\n"); + erts_usage(); + } + argv[i++] = NULL; + rest = argv[i]; + } + argv[i] = NULL; + *ip = i; + return rest; +} + +static void +parse_args(int *argc, char **argv, int concurrent_waiters) +{ + int i = 0, j; + int no_pollsets = 0, no_poll_threads = 0, + no_pollsets_percentage = 0, + no_poll_threads_percentage = 0; + ASSERT(argc && argv); + while (i < *argc) { + if(argv[i][0] == '-') { + switch (argv[i][1]) { + case 'I': { + if (strncmp(argv[i]+2, "Ot", 2) == 0) { + char *arg = get_arg(argv[i]+4, argv, &i); + if (sscanf(arg, "%d", &no_poll_threads) != 1 || + no_poll_threads < 1 || + ERTS_MAX_NO_OF_POLL_THREADS < no_poll_threads) { + erts_fprintf(stderr,"bad I/O poll threads number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "Op", 3) == 0) { + char *arg = get_arg(argv[i]+4, argv, &i); + if (sscanf(arg, "%d", &no_pollsets) != 1 || + no_pollsets < 1) { + erts_fprintf(stderr,"bad I/O pollset number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "OPt", 4) == 0) { + char *arg = get_arg(argv[i]+5, argv, &i); + if (sscanf(arg, "%d", &no_poll_threads_percentage) != 1 || + no_poll_threads_percentage < 0 || + no_poll_threads_percentage > 100) { + erts_fprintf(stderr,"bad I/O poll thread percentage number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "OPp", 4) == 0) { + char *arg = get_arg(argv[i]+5, argv, &i); + if (sscanf(arg, "%d", &no_pollsets_percentage) != 1 || + no_pollsets_percentage < 0 || + no_pollsets_percentage > 100) { + erts_fprintf(stderr,"bad I/O pollset percentage number: %s\n", arg); + erts_usage(); + } + } else { + break; + } + break; + } + case 'K': + (void)get_arg(argv[i]+2, argv, &i); + break; + case '-': + goto args_parsed; + default: + break; + } + } + i++; + } + +args_parsed: + + if (!concurrent_waiters) { + no_pollsets = no_poll_threads; + no_pollsets_percentage = 100; + } + + if (no_poll_threads == 0) { + if (no_poll_threads_percentage == 0) + no_poll_threads = 1; /* This is the default */ + else { + no_poll_threads = erts_no_schedulers * no_poll_threads_percentage / 100; + if (no_poll_threads < 1) + no_poll_threads = 1; + } + } + + if (no_pollsets == 0) { + if (no_pollsets_percentage == 0) + no_pollsets = 1; /* This is the default */ + else { + no_pollsets = no_poll_threads * no_pollsets_percentage / 100; + if (no_pollsets < 1) + no_pollsets = 1; + } + } + + if (no_poll_threads < no_pollsets) { + erts_fprintf(stderr, + "number of IO poll threads has to be greater or equal to " + "the number of \nIO pollsets. Current values are set to: \n" + " -IOt %d -IOp %d\n", + no_poll_threads, no_pollsets); + erts_usage(); + } + + /* Handled arguments have been marked with NULL. Slide arguments + not handled towards the beginning of argv. */ + for (i = 0, j = 0; i < *argc; i++) { + if (argv[i]) + argv[j++] = argv[i]; + } + *argc = j; + + erts_no_pollsets = no_pollsets; + erts_no_poll_threads = no_poll_threads; +} + void -ERTS_CIO_EXPORT(erts_init_check_io)(void) +erts_init_check_io(int *argc, char **argv) { + int j, concurrent_waiters, no_poll_threads; ERTS_CT_ASSERT((INT_MIN & (ERL_NIF_SELECT_STOP_CALLED | ERL_NIF_SELECT_STOP_SCHEDULED | ERL_NIF_SELECT_INVALID_EVENT | ERL_NIF_SELECT_FAILED)) == 0); - erts_atomic_init_nob(&erts_check_io_time, 0); - erts_atomic_init_nob(&pollset.in_poll_wait, 0); - - ERTS_CIO_POLL_INIT(); - pollset.ps = ERTS_CIO_NEW_POLLSET(); - - pollset.active_fd.six = 0; - pollset.active_fd.eix = 0; - erts_atomic32_init_nob(&pollset.active_fd.no, 0); - pollset.active_fd.size = ERTS_ACTIVE_FD_INC; - pollset.active_fd.array = erts_alloc(ERTS_ALC_T_ACTIVE_FD_ARR, - sizeof(ErtsSysFdType)*ERTS_ACTIVE_FD_INC); -#ifdef DEBUG - { - int i; - for (i = 0; i < ERTS_ACTIVE_FD_INC; i++) - pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; - } + + + erts_poll_init(&concurrent_waiters); +#if ERTS_POLL_USE_FALLBACK + erts_poll_init_flbk(NULL); #endif + parse_args(argc, argv, concurrent_waiters); - init_removed_fd_alloc(); - pollset.removed_list = NULL; - erts_spinlock_init(&pollset.removed_list_lock, "pollset_rm_list", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - { - int i; - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { - erts_mtx_init(&drv_ev_state_locks[i].lck, "drv_ev_state", make_small(i), - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - } + /* Create the actual pollsets */ + pollsetv = erts_alloc(ERTS_ALC_T_POLLSET,sizeof(ErtsPollSet *) * erts_no_pollsets); + + for (j=0; j < erts_no_pollsets; j++) + pollsetv[j] = erts_poll_create_pollset(j); + +#if ERTS_POLL_USE_FALLBACK + flbk_pollset = erts_poll_create_pollset_flbk(-1); +#endif + + no_poll_threads = erts_no_poll_threads; +#if ERTS_POLL_USE_FALLBACK + no_poll_threads++; +#endif + + psiv = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollThread) * no_poll_threads); + +#if ERTS_POLL_USE_FALLBACK + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_fallback(); + psiv++; +#endif + + for (j = 0; j < erts_no_poll_threads; j++) { + psiv[j].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[j].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[j].ps = pollsetv[j % erts_no_pollsets]; } + + for (j=0; j < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; j++) { + erts_mtx_init(&drv_ev_state.locks[j].lck, "drv_ev_state", make_small(j), + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); + } + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - max_fds = ERTS_CIO_POLL_MAX_FDS(); - erts_atomic_init_nob(&drv_ev_state_len, 0); - drv_ev_state = NULL; - erts_mtx_init(&drv_ev_state_grow_lock, "drv_ev_state_grow", NIL, + drv_ev_state.max_fds = erts_poll_max_fds(); + erts_atomic_init_nob(&drv_ev_state.len, 0); + drv_ev_state.v = NULL; + erts_mtx_init(&drv_ev_state.grow_lock, "drv_ev_state_grow", NIL, ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); #else { @@ -2553,153 +2080,182 @@ ERTS_CIO_EXPORT(erts_init_check_io)(void) hf.cmp = &drv_ev_state_cmp; hf.alloc = &drv_ev_state_alloc; hf.free = &drv_ev_state_free; - num_state_prealloc = 0; - state_prealloc_first = NULL; - erts_spinlock_init(&state_prealloc_lock,"state_prealloc", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - - safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state_tab, "drv_ev_state_tab", + drv_ev_state.num_prealloc = 0; + drv_ev_state.prealloc_first = NULL; + erts_spinlock_init(&drv_ev_state.prealloc_lock, "state_prealloc", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); + safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state.tab, "drv_ev_state_tab", ERTS_LOCK_FLAGS_CATEGORY_IO, DRV_EV_STATE_HTAB_SIZE, hf); } #endif } int -ERTS_CIO_EXPORT(erts_check_io_max_files)(void) +erts_check_io_max_files(void) { #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - return max_fds; + return drv_ev_state.max_fds; #else - return ERTS_POLL_EXPORT(erts_poll_max_fds)(); + return erts_poll_max_fds(); #endif } Uint -ERTS_CIO_EXPORT(erts_check_io_size)(void) +erts_check_io_size(void) { - Uint res; + Uint res = 0; ErtsPollInfo pi; - ERTS_CIO_POLL_INFO(pollset.ps, &pi); - res = pi.memory_size; + int i; + +#if ERTS_POLL_USE_FALLBACK + erts_poll_info(get_fallback(), &pi); + res += pi.memory_size; +#endif + + for (i = 0; i < erts_no_pollsets; i++) { + erts_poll_info(pollsetv[i], &pi); + res += pi.memory_size; + } #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - res += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state_len); + res += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len); #else - res += safe_hash_table_sz(&drv_ev_state_tab); + res += safe_hash_table_sz(&drv_ev_state.tab); { SafeHashInfo hi; - safe_hash_get_info(&hi, &drv_ev_state_tab); + safe_hash_get_info(&hi, &drv_ev_state.tab); res += hi.objs * sizeof(ErtsDrvEventState); } - erts_spin_lock(&state_prealloc_lock); - res += num_state_prealloc * sizeof(ErtsDrvEventState); - erts_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + res += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState); + erts_spin_unlock(&drv_ev_state.prealloc_lock); #endif return res; } Eterm -ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) +erts_check_io_info(void *proc) { Process *p = (Process *) proc; - Eterm tags[16], values[16], res; - Uint sz, *szp, *hp, **hpp, memory_size; - Sint i; - ErtsPollInfo pi; - erts_aint_t cio_time = erts_atomic_read_acqb(&erts_check_io_time); - int active_fds = (int) erts_atomic32_read_acqb(&pollset.active_fd.no); + Eterm tags[16], values[16], res, list = NIL; + Uint sz, *szp, *hp, **hpp; + ErtsPollInfo *piv; + Sint i, j = 0, len; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK; + ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); - while (1) { - erts_aint_t post_cio_time; - int post_active_fds; + piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); - ERTS_CIO_POLL_INFO(pollset.ps, &pi); +#if ERTS_POLL_USE_FALLBACK + erts_poll_info_flbk(get_fallback(), &piv[0]); + piv[0].poll_threads = 1; + piv[0].active_fds = 0; + piv++; +#endif - post_cio_time = erts_atomic_read_mb(&erts_check_io_time); - post_active_fds = (int) erts_atomic32_read_acqb(&pollset.active_fd.no); - if (cio_time == post_cio_time && active_fds == post_active_fds) - break; - cio_time = post_cio_time; - active_fds = post_active_fds; + for (j = 0; j < erts_no_pollsets; j++) { + erts_poll_info(pollsetv[j], &piv[j]); + piv[j].active_fds = 0; + piv[j].poll_threads = erts_no_poll_threads / erts_no_pollsets; + if (erts_no_poll_threads % erts_no_pollsets > j) + piv[j].poll_threads++; } - memory_size = pi.memory_size; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - memory_size += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state_len); + i = 0; + erts_mtx_lock(&drv_ev_state.grow_lock); + len = erts_atomic_read_nob(&drv_ev_state.len); + for (i = 0; i < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { + erts_mtx_lock(&drv_ev_state.locks[i].lck); + for (j = i; j < len; j+=ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT) { + ErtsDrvEventState *state = get_drv_ev_state(j); + int pollsetid = get_pollset_id(j); + ASSERT(fd_mtx(j) == &drv_ev_state.locks[i].lck); + if (state->flags & ERTS_EV_FLAG_FALLBACK) + pollsetid = -1; + if (state->driver.select + && (state->type == ERTS_EV_TYPE_DRV_SEL) + && (is_iotask_active(&state->driver.select->iniotask) + || is_iotask_active(&state->driver.select->outiotask))) + piv[pollsetid].active_fds++; + } + erts_mtx_unlock(&drv_ev_state.locks[i].lck); + } + erts_mtx_unlock(&drv_ev_state.grow_lock); + + piv[0].memory_size += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len); #else - memory_size += safe_hash_table_sz(&drv_ev_state_tab); + piv[0].memory_size += safe_hash_table_sz(&drv_ev_state.tab); { - SafeHashInfo hi; - safe_hash_get_info(&hi, &drv_ev_state_tab); - memory_size += hi.objs * sizeof(ErtsDrvEventState); + SafeHashInfo hi; + safe_hash_get_info(&hi, &drv_ev_state.tab); + piv[0].memory_size += hi.objs * sizeof(ErtsDrvEventState); } - erts_spin_lock(&state_prealloc_lock); - memory_size += num_state_prealloc * sizeof(ErtsDrvEventState); - erts_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + piv[0].memory_size += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState); + erts_spin_unlock(&drv_ev_state.prealloc_lock); #endif hpp = NULL; szp = &sz; sz = 0; - bld_it: - i = 0; + piv -= ERTS_POLL_USE_FALLBACK; - tags[i] = erts_bld_atom(hpp, szp, "name"); - values[i++] = erts_bld_atom(hpp, szp, "erts_poll"); + bld_it: - tags[i] = erts_bld_atom(hpp, szp, "primary"); - values[i++] = erts_bld_atom(hpp, szp, pi.primary); + for (j = no_pollsets-1; j >= 0; j--) { + i = 0; - tags[i] = erts_bld_atom(hpp, szp, "fallback"); - values[i++] = erts_bld_atom(hpp, szp, pi.fallback ? pi.fallback : "false"); + tags[i] = erts_bld_atom(hpp, szp, "name"); + values[i++] = erts_bld_atom(hpp, szp, "erts_poll"); - tags[i] = erts_bld_atom(hpp, szp, "kernel_poll"); - values[i++] = erts_bld_atom(hpp, szp, - pi.kernel_poll ? pi.kernel_poll : "false"); + tags[i] = erts_bld_atom(hpp, szp, "primary"); + values[i++] = erts_bld_atom(hpp, szp, piv[j].primary); - tags[i] = erts_bld_atom(hpp, szp, "memory_size"); - values[i++] = erts_bld_uint(hpp, szp, memory_size); + tags[i] = erts_bld_atom(hpp, szp, "kernel_poll"); + values[i++] = erts_bld_atom(hpp, szp, + piv[j].kernel_poll ? piv[j].kernel_poll : "false"); - tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.poll_set_size); + tags[i] = erts_bld_atom(hpp, szp, "memory_size"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].memory_size); - if (pi.fallback) { - tags[i] = erts_bld_atom(hpp, szp, "fallback_poll_set_size"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.fallback_poll_set_size); - } + tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_set_size); - tags[i] = erts_bld_atom(hpp, szp, "lazy_updates"); - values[i++] = pi.lazy_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "lazy_updates"); + values[i++] = piv[j].lazy_updates ? am_true : am_false; - if (pi.lazy_updates) { - tags[i] = erts_bld_atom(hpp, szp, "pending_updates"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.pending_updates); - } + tags[i] = erts_bld_atom(hpp, szp, "pending_updates"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].pending_updates); - tags[i] = erts_bld_atom(hpp, szp, "batch_updates"); - values[i++] = pi.batch_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "batch_updates"); + values[i++] = piv[j].batch_updates ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates"); - values[i++] = pi.concurrent_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates"); + values[i++] = piv[j].concurrent_updates ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "max_fds"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds); + tags[i] = erts_bld_atom(hpp, szp, "fallback"); + values[i++] = piv[j].is_fallback ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "active_fds"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) active_fds); + tags[i] = erts_bld_atom(hpp, szp, "max_fds"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].max_fds); -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups); + tags[i] = erts_bld_atom(hpp, szp, "active_fds"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].active_fds); - tags[i] = erts_bld_atom(hpp, szp, "no_avoided_interrupts"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_interrupts); + tags[i] = erts_bld_atom(hpp, szp, "poll_threads"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_threads); - tags[i] = erts_bld_atom(hpp, szp, "no_interrupt_timed"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_interrupt_timed); -#endif + res = erts_bld_2tup_list(hpp, szp, i, tags, values); - res = erts_bld_2tup_list(hpp, szp, i, tags, values); + if (!hpp) { + *szp += 2; + } + else { + list = CONS(*hpp, res, list); + *hpp += 2; + } + } if (!hpp) { hp = HAlloc(p, sz); @@ -2708,386 +2264,442 @@ ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) goto bld_it; } - return res; + erts_free(ERTS_ALC_T_TMP, piv); + + return list; } static ERTS_INLINE ErtsPollEvents -print_events(ErtsPollEvents ev) +print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev) { int first = 1; + if(ev == ERTS_POLL_EV_NONE) { + erts_dsprintf(dsbufp, "N/A"); + return 0; + } if(ev & ERTS_POLL_EV_IN) { ev &= ~ERTS_POLL_EV_IN; - erts_printf("%s%s", first ? "" : "|", "IN"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "IN"); first = 0; } if(ev & ERTS_POLL_EV_OUT) { ev &= ~ERTS_POLL_EV_OUT; - erts_printf("%s%s", first ? "" : "|", "OUT"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "OUT"); first = 0; } /* The following should not appear... */ if(ev & ERTS_POLL_EV_NVAL) { - erts_printf("%s%s", first ? "" : "|", "NVAL"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "NVAL"); first = 0; } if(ev & ERTS_POLL_EV_ERR) { - erts_printf("%s%s", first ? "" : "|", "ERR"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "ERR"); first = 0; } if (ev) - erts_printf("%s0x%b32x", first ? "" : "|", (Uint32) ev); + erts_dsprintf(dsbufp, "%s0x%b32x", first ? "" : "|", (Uint32) ev); return ev; } +static ERTS_INLINE void +print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f) +{ + const char* delim = ""; + if(f & ERTS_EV_FLAG_USED) { + erts_dsprintf(dsbufp, "%s","USED"); + delim = "|"; + } + if(f & ERTS_EV_FLAG_FALLBACK) { + erts_dsprintf(dsbufp, "%s%s", delim, "FLBK"); + delim = "|"; + } +} + +#ifdef DEBUG_PRINT_MODE + +static ERTS_INLINE char * +drvmode2str(int mode) { + switch (mode) { + case ERL_DRV_READ|ERL_DRV_USE: return "READ|USE"; + case ERL_DRV_WRITE|ERL_DRV_USE: return "WRITE|USE"; + case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE: return "READ|WRITE|USE"; + case ERL_DRV_USE: return "USE"; + case ERL_DRV_READ: return "READ"; + case ERL_DRV_WRITE: return "WRITE"; + case ERL_DRV_READ|ERL_DRV_WRITE: return "READ|WRITE"; + default: return "UNKNOWN"; + } +} + +static ERTS_INLINE char * +nifmode2str(enum ErlNifSelectFlags mode) { + switch (mode) { + case ERL_NIF_SELECT_READ: return "READ"; + case ERL_NIF_SELECT_WRITE: return "WRITE"; + case ERL_NIF_SELECT_STOP: return "STOP"; + default: return "UNKNOWN"; + } +} + +#endif + typedef struct { int used_fds; int num_errors; int no_driver_select_structs; - int no_driver_event_structs; + int no_enif_select_structs; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS int internal_fds; ErtsPollEvents *epep; #endif } IterDebugCounters; -static void doit_erts_check_io_debug(void *vstate, void *vcounters) +static int erts_debug_print_checkio_state(erts_dsprintf_buf_t *dsbufp, + ErtsDrvEventState *state, + ErtsPollEvents ep_events, + int internal) { - ErtsDrvEventState *state = (ErtsDrvEventState *) vstate; - IterDebugCounters *counters = (IterDebugCounters *) vcounters; - ErtsPollEvents cio_events = state->events; - ErtsSysFdType fd = state->fd; -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - int internal = 0; - ErtsPollEvents ep_events = counters->epep[(int) fd]; -#endif - int err = 0; - #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) struct stat stat_buf; #endif - - if (state->driver.select) - counters->no_driver_select_structs++; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - counters->no_driver_event_structs++; -#endif - + ErtsSysFdType fd = state->fd; + ErtsPollEvents cio_events = state->events; + int err = 0; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state->events || ep_events) { - if (ep_events & ERTS_POLL_EV_NVAL) { - ep_events &= ~ERTS_POLL_EV_NVAL; - internal = 1; - counters->internal_fds++; - } - else - counters->used_fds++; -#else - if (state->events) { - counters->used_fds++; + ErtsPollEvents aio_events = state->active_events; #endif - - erts_printf("fd=%d ", (int) fd); - + erts_dsprintf(dsbufp, "pollset=%d fd=%d ", + state->flags & ERTS_EV_FLAG_FALLBACK ? -1 : get_pollset_id(fd), (int) fd); + #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) - if (fstat((int) fd, &stat_buf) < 0) - erts_printf("type=unknown "); - else { - erts_printf("type="); + if (fstat((int) fd, &stat_buf) < 0) + erts_dsprintf(dsbufp, "type=unknown "); + else { + erts_dsprintf(dsbufp, "type="); #ifdef S_ISSOCK - if (S_ISSOCK(stat_buf.st_mode)) - erts_printf("sock "); - else + if (S_ISSOCK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "sock "); + else #endif #ifdef S_ISFIFO if (S_ISFIFO(stat_buf.st_mode)) - erts_printf("fifo "); + erts_dsprintf(dsbufp, "fifo "); else #endif #ifdef S_ISCHR - if (S_ISCHR(stat_buf.st_mode)) - erts_printf("chr "); - else + if (S_ISCHR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "chr "); + else #endif #ifdef S_ISDIR - if (S_ISDIR(stat_buf.st_mode)) - erts_printf("dir "); - else + if (S_ISDIR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "dir "); + else #endif #ifdef S_ISBLK - if (S_ISBLK(stat_buf.st_mode)) - erts_printf("blk "); - else + if (S_ISBLK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "blk "); + else #endif #ifdef S_ISREG - if (S_ISREG(stat_buf.st_mode)) - erts_printf("reg "); - else + if (S_ISREG(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "reg "); + else #endif #ifdef S_ISLNK - if (S_ISLNK(stat_buf.st_mode)) - erts_printf("lnk "); - else + if (S_ISLNK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "lnk "); + else #endif #ifdef S_ISDOOR - if (S_ISDOOR(stat_buf.st_mode)) - erts_printf("door "); - else + if (S_ISDOOR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "door "); + else #endif #ifdef S_ISWHT - if (S_ISWHT(stat_buf.st_mode)) - erts_printf("wht "); - else + if (S_ISWHT(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "wht "); + else #endif #ifdef S_ISXATTR - if (S_ISXATTR(stat_buf.st_mode)) - erts_printf("xattr "); - else + if (S_ISXATTR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "xattr "); + else #endif - erts_printf("unknown "); - } + erts_dsprintf(dsbufp, "unknown "); + } #else - erts_printf("type=unknown "); + erts_dsprintf(dsbufp, "type=unknown "); #endif - if (state->type == ERTS_EV_TYPE_DRV_SEL) { - erts_printf("driver_select "); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - - if (cio_events == ep_events) { - erts_printf("ev="); - if (print_events(cio_events) != 0) - err = 1; - } - else { - err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - erts_printf(" ep_ev="); - print_events(ep_events); - } -#else - if (print_events(cio_events) != 0) - err = 1; -#endif - erts_printf(" "); - if (cio_events & ERTS_POLL_EV_IN) { - Eterm id = state->driver.select->inport; - if (is_nil(id)) { - erts_printf("inport=none inname=none indrv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" inport=%T inname=%s indrv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } - if (cio_events & ERTS_POLL_EV_OUT) { - Eterm id = state->driver.select->outport; - if (is_nil(id)) { - erts_printf("outport=none outname=none outdrv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" outport=%T outname=%s outdrv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } - } - else if (state->type == ERTS_EV_TYPE_NIF) { - ErtsResource* r; - erts_printf("enif_select "); + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + erts_dsprintf(dsbufp, "driver_select "); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - + if (internal) { + erts_dsprintf(dsbufp, "internal "); + err = 1; + } + if (aio_events == cio_events) { if (cio_events == ep_events) { - erts_printf("ev="); - if (print_events(cio_events) != 0) + erts_dsprintf(dsbufp, "ev="); + if (print_events(dsbufp, cio_events) != 0) err = 1; } else { + ErtsPollEvents ev = cio_events; + if (ev != ep_events && ep_events != ERTS_POLL_EV_NONE) + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); + } + } else { + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " aio_ev="); + print_events(dsbufp, aio_events); + if ((aio_events != ep_events && ep_events != ERTS_POLL_EV_NONE) || + (aio_events != 0 && ep_events == ERTS_POLL_EV_NONE)) { + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - erts_printf(" ep_ev="); - print_events(ep_events); } + } #else - if (print_events(cio_events) != 0) + if (print_events(dsbufp, cio_events) != 0) + err = 1; +#endif + erts_dsprintf(dsbufp, " "); + if (cio_events & ERTS_POLL_EV_IN) { + Eterm id = state->driver.select->inport; + if (is_nil(id)) { + erts_dsprintf(dsbufp, "inport=none inname=none indrv=none "); err = 1; -#endif - erts_printf(" inpid=%T dd_cnt=%b32d", state->driver.nif->in.pid, - state->driver.nif->in.ddeselect_cnt); - erts_printf(" outpid=%T dd_cnt=%b32d", state->driver.nif->out.pid, - state->driver.nif->out.ddeselect_cnt); - r = state->driver.stop.resource; - erts_printf(" resource=%p(%T:%T)", r, r->type->module, r->type->name); + } + else { + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, " inport=%T inname=%s indrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } } -#if ERTS_CIO_HAVE_DRV_EVENT - else if (state->type == ERTS_EV_TYPE_DRV_EV) { - Eterm id; - erts_printf("driver_event "); + if (cio_events & ERTS_POLL_EV_OUT) { + Eterm id = state->driver.select->outport; + if (is_nil(id)) { + erts_dsprintf(dsbufp, "outport=none outname=none outdrv=none "); + err = 1; + } + else { + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, " outport=%T outname=%s outdrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } + } + else if (state->type == ERTS_EV_TYPE_NIF) { + ErtsResource* r; + erts_dsprintf(dsbufp, "enif_select "); + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - if (cio_events == ep_events) { - erts_printf("ev=0x%b32x", (Uint32) cio_events); - } - else { - err = 1; - erts_printf("cio_ev=0x%b32x", (Uint32) cio_events); - erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events); - } + if (internal) { + erts_dsprintf(dsbufp, "internal "); + err = 1; + } + + if (cio_events == ep_events) { + erts_dsprintf(dsbufp, "ev="); + if (print_events(dsbufp, cio_events) != 0) + err = 1; + } + else { + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); + } #else - erts_printf("ev=0x%b32x", (Uint32) cio_events); + if (print_events(dsbufp, cio_events) != 0) + err = 1; #endif - id = state->driver.event->port; - if (is_nil(id)) { - erts_printf(" port=none name=none drv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" port=%T name=%s drv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } + erts_dsprintf(dsbufp, " inpid=%T", state->driver.nif->in.pid); + erts_dsprintf(dsbufp, " outpid=%T", state->driver.nif->out.pid); + r = state->driver.stop.resource; + erts_dsprintf(dsbufp, " resource=%p(%T:%T)", r, r->type->module, r->type->name); + } +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + else if (internal) { + erts_dsprintf(dsbufp, "internal "); + if (cio_events) { + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + } + if (ep_events) { + erts_dsprintf(dsbufp, "ep_ev="); + print_events(dsbufp, ep_events); + } + } #endif + else { + err = 1; + erts_dsprintf(dsbufp, "control_type=%d ", (int)state->type); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - else if (internal) { - erts_printf("internal "); - if (cio_events) { - err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - } - if (ep_events) { - erts_printf("ep_ev="); - print_events(ep_events); - } - } + if (cio_events == ep_events) { + erts_dsprintf(dsbufp, "ev="); + print_events(dsbufp, cio_events); + } + else { + erts_dsprintf(dsbufp, "cio_ev="); print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); print_events(dsbufp, ep_events); + } +#else + erts_dsprintf(dsbufp, "ev=0x%b32x", (Uint32) cio_events); #endif - else { - err = 1; - erts_printf("control_type=%d ", (int)state->type); + } + + erts_dsprintf(dsbufp, " flags="); print_flags(dsbufp, state->flags); + if (err) { + erts_dsprintf(dsbufp, " ERROR"); + } + erts_dsprintf(dsbufp, "\r\n"); + return err; +} + +static void doit_erts_check_io_debug(void *vstate, void *vcounters, + erts_dsprintf_buf_t *dsbufp) +{ + ErtsDrvEventState *state = (ErtsDrvEventState *) vstate; + IterDebugCounters *counters = (IterDebugCounters *) vcounters; + int internal = 0; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (cio_events == ep_events) { - erts_printf("ev="); - print_events(cio_events); - } - else { - erts_printf("cio_ev="); print_events(cio_events); - erts_printf(" ep_ev="); print_events(ep_events); - } + ErtsSysFdType fd = state->fd; + ErtsPollEvents ep_events = counters->epep[(int) fd]; #else - erts_printf("ev=0x%b32x", (Uint32) cio_events); + ErtsPollEvents ep_events = ERTS_POLL_EV_NONE; #endif + + if (state->driver.select) { + counters->no_driver_select_structs++; + ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)); + } + if (state->driver.nif) { + counters->no_enif_select_structs++; + ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)); + } + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)) { + if (ep_events & ERTS_POLL_EV_NVAL) { + ep_events &= ~ERTS_POLL_EV_NVAL; + internal = 1; + counters->internal_fds++; } - - if (err) { + else + counters->used_fds++; +#else + if (state->events) { + counters->used_fds++; +#endif + if (erts_debug_print_checkio_state(dsbufp, state, ep_events, internal)) { counters->num_errors++; - erts_printf(" ERROR"); } - erts_printf("\n"); } } - + +/* ciodpi can be NULL when called from etp-commands */ int -ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip) +erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - int fd, len; + int fd, len, i; #endif - IterDebugCounters counters; + IterDebugCounters counters = {0}; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS ErtsDrvEventState null_des; null_des.driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - null_des.driver.event = NULL; -#endif + null_des.driver.nif = NULL; null_des.driver.stop.drv_ptr = NULL; null_des.events = 0; - null_des.remove_cnt = 0; null_des.type = ERTS_EV_TYPE_NONE; + null_des.flags = 0; + + counters.epep = erts_alloc(ERTS_ALC_T_TMP, + sizeof(ErtsPollEvents)*drv_ev_state.max_fds); #endif - erts_printf("--- fds in pollset --------------------------------------\n"); #if defined(ERTS_ENABLE_LOCK_CHECK) erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif - erts_thr_progress_block(); /* stop the world to avoid messy locking */ + if (ciodip) + erts_thr_progress_block(); /* stop the world to avoid messy locking */ #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - counters.epep = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollEvents)*max_fds); - ERTS_POLL_EXPORT(erts_poll_get_selected_events)(pollset.ps, counters.epep, max_fds); - counters.internal_fds = 0; -#endif - counters.used_fds = 0; - counters.num_errors = 0; - counters.no_driver_select_structs = 0; - counters.no_driver_event_structs = 0; + len = erts_atomic_read_nob(&drv_ev_state.len); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - len = erts_atomic_read_nob(&drv_ev_state_len); +#if ERTS_POLL_USE_FALLBACK + erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n"); + erts_poll_get_selected_events_flbk(get_fallback(), counters.epep, + drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { - doit_erts_check_io_debug((void *) &drv_ev_state[fd], (void *) &counters); + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } +#endif + erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); + + for (i = 0; i < erts_no_pollsets; i++) { + erts_poll_get_selected_events(pollsetv[i], + counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) + && get_pollset_id(fd) == i) + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } } - for ( ; fd < max_fds; fd++) { - null_des.fd = fd; - doit_erts_check_io_debug((void *) &null_des, (void *) &counters); + for (fd = len ; fd < drv_ev_state.max_fds; fd++) { + null_des.fd = fd; + doit_erts_check_io_debug(&null_des, &counters, dsbufp); } #else - safe_hash_for_each(&drv_ev_state_tab, &doit_erts_check_io_debug, (void *) &counters); + safe_hash_for_each(&drv_ev_state.tab, &doit_erts_check_io_debug, + &counters, dsbufp); #endif - erts_thr_progress_unblock(); + if (ciodip) + erts_thr_progress_unblock(); - ciodip->no_used_fds = counters.used_fds; - ciodip->no_driver_select_structs = counters.no_driver_select_structs; - ciodip->no_driver_event_structs = counters.no_driver_event_structs; + if (ciodip) { + ciodip->no_used_fds = counters.used_fds; + ciodip->no_driver_select_structs = counters.no_driver_select_structs; + ciodip->no_enif_select_structs = counters.no_enif_select_structs; + } - erts_printf("\n"); - erts_printf("used fds=%d\n", counters.used_fds); - erts_printf("Number of driver_select() structures=%d\n", counters.no_driver_select_structs); -#if ERTS_CIO_HAVE_DRV_EVENT - erts_printf("Number of driver_event() structures=%d\n", counters.no_driver_event_structs); -#endif + erts_dsprintf(dsbufp, "\n"); + erts_dsprintf(dsbufp, "used fds=%d\n", counters.used_fds); + erts_dsprintf(dsbufp, "Number of driver_select() structures=%d\n", counters.no_driver_select_structs); + erts_dsprintf(dsbufp, "Number of enif_select() structures=%d\n", counters.no_enif_select_structs); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - erts_printf("internal fds=%d\n", counters.internal_fds); + erts_dsprintf(dsbufp, "internal fds=%d\n", counters.internal_fds); #endif - erts_printf("---------------------------------------------------------\n"); - fflush(stdout); + erts_dsprintf(dsbufp, "---------------------------------------------------------\n"); + erts_send_error_to_logger_nogl(dsbufp); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS erts_free(ERTS_ALC_T_TMP, (void *) counters.epep); #endif @@ -3096,11 +2708,19 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip) } #ifdef ERTS_ENABLE_LOCK_COUNT -void ERTS_CIO_EXPORT(erts_lcnt_update_cio_locks)(int enable) { +void erts_lcnt_update_cio_locks(int enable) { + int i; #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - erts_lcnt_enable_hash_lock_count(&drv_ev_state_tab, ERTS_LOCK_FLAGS_CATEGORY_IO, enable); + erts_lcnt_enable_hash_lock_count(&drv_ev_state.tab, ERTS_LOCK_FLAGS_CATEGORY_IO, enable); #else (void)enable; #endif + +#if ERTS_POLL_USE_FALLBACK + erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable); +#endif + + for (i = 0; i < erts_no_pollsets; i++) + erts_lcnt_enable_pollset_lock_count(pollsetv[i], enable); } #endif /* ERTS_ENABLE_LOCK_COUNT */ diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index 777942a473..443ef1264c 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -18,10 +18,11 @@ * %CopyrightEnd% */ -/* - * Description: Check I/O +/** + * @description Check I/O, a cross platform IO polling framework for ERTS * - * Author: Rickard Green + * @author Rickard Green + * @author Lukas Larsson */ #ifndef ERL_CHECK_IO_H__ @@ -30,72 +31,79 @@ #include "sys.h" #include "erl_sys_driver.h" -#ifdef ERTS_ENABLE_KERNEL_POLL - -int driver_select_kp(ErlDrvPort, ErlDrvEvent, int, int); -int driver_select_nkp(ErlDrvPort, ErlDrvEvent, int, int); -int enif_select_kp(ErlNifEnv*, ErlNifEvent, enum ErlNifSelectFlags, void*, const ErlNifPid*, Eterm); -int enif_select_nkp(ErlNifEnv*, ErlNifEvent, enum ErlNifSelectFlags, void*, const ErlNifPid*, Eterm); -int driver_event_kp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); -int driver_event_nkp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); -Uint erts_check_io_size_kp(void); -Uint erts_check_io_size_nkp(void); -Eterm erts_check_io_info_kp(void *); -Eterm erts_check_io_info_nkp(void *); -int erts_check_io_max_files_kp(void); -int erts_check_io_max_files_nkp(void); -void erts_check_io_interrupt_kp(int); -void erts_check_io_interrupt_nkp(int); -void erts_check_io_interrupt_timed_kp(int, ErtsMonotonicTime); -void erts_check_io_interrupt_timed_nkp(int, ErtsMonotonicTime); -void erts_check_io_kp(int); -void erts_check_io_nkp(int); -void erts_init_check_io_kp(void); -void erts_init_check_io_nkp(void); -int erts_check_io_debug_kp(ErtsCheckIoDebugInfo *); -int erts_check_io_debug_nkp(ErtsCheckIoDebugInfo *); - -#ifdef ERTS_ENABLE_LOCK_COUNT -void erts_lcnt_update_cio_locks_kp(int enable); -void erts_lcnt_update_cio_locks_nkp(int enable); -#endif - -#else /* !ERTS_ENABLE_KERNEL_POLL */ +/** @brief a structure that is used by each polling thread */ +struct erts_poll_thread; +/** + * Get the memory size of the check io framework + */ Uint erts_check_io_size(void); -Eterm erts_check_io_info(void *); +/** + * Returns an Eterm with information about all the pollsets active at the + * moment. + * + * @param proc the Process* to allocate the result on. It is passed as + * void * because of header include problems. + */ +Eterm erts_check_io_info(void *proc); +/** + * Should be called when a port IO task has been executed in order to re-enable + * or clear the information about the fd. + * + * @param type The type of event that has been completed. + * @param handle The port task handle of the event. + * @param reset A function pointer to be called when the port task handle + * should be reset. + */ +void erts_io_notify_port_task_executed(ErtsPortTaskType type, + ErtsPortTaskHandle *handle, + void (*reset)(ErtsPortTaskHandle *)); +/** + * Returns the maximum number of fds that the check io framework can handle. + */ int erts_check_io_max_files(void); -void erts_check_io_interrupt(int); -void erts_check_io_interrupt_timed(int, ErtsMonotonicTime); -void erts_check_io(int); -void erts_init_check_io(void); - +/** + * Called by any thread that should check for new IO events. This function will + * not return unless erts_check_io_interrupt(pt, 1) is called by another thread. + * + * @param pt the poll thread structure to use. + */ +void erts_check_io(struct erts_poll_thread *pt); +/** + * Initialize the check io framework. This function will parse the arguments + * and delete any entries that it is interested in. + * + * @param argc the number of arguments + * @param argv an array with the arguments + */ +void erts_init_check_io(int *argc, char **argv); +/** + * Interrupt the poll thread so that it can execute other code. + * + * Should be called with set = 0 by the waiting thread before calling + * erts_check_io. + * + * @param pt the poll thread to wake + * @param set whether to set or clear the interrupt flag + */ +void erts_check_io_interrupt(struct erts_poll_thread *pt, int set); +/** + * Create a new poll thread structure that is associated with the number no. + * It is the callers responsibility that no is unique. + */ +struct erts_poll_thread* erts_create_pollset_thread(int no); #ifdef ERTS_ENABLE_LOCK_COUNT +/** + * Toggle lock counting on all check io locks + */ void erts_lcnt_update_cio_locks(int enable); #endif -#endif - -extern erts_atomic_t erts_check_io_time; - typedef struct { ErtsPortTaskHandle task; - erts_atomic_t executed_time; + ErtsSysFdType fd; } ErtsIoTask; -ERTS_GLB_INLINE void erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp); - -#if ERTS_GLB_INLINE_INCL_FUNC_DEF - -ERTS_GLB_INLINE void -erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp) -{ - ErtsIoTask *itp = (ErtsIoTask *) (((char *) pthp) - offsetof(ErtsIoTask, task)); - erts_aint_t ci_time = erts_atomic_read_acqb(&erts_check_io_time); - erts_atomic_set_relb(&itp->executed_time, ci_time); -} - -#endif #endif /* ERL_CHECK_IO_H__ */ @@ -103,6 +111,16 @@ erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp) #define ERL_CHECK_IO_INTERNAL__ #endif +#define ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT 128 + +/* Controls how many pollsets to allocate. Fd's are hashed into + each pollset based on the FD. When doing non-concurrent updates + there will be one pollset per thread. +*/ +extern int erts_no_pollsets; +extern int erts_no_poll_threads; + + #ifndef ERL_CHECK_IO_INTERNAL__ #define ERL_CHECK_IO_INTERNAL__ #include "erl_poll.h" @@ -115,24 +133,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp) */ # define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 #else -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 0 -#endif - -/* - * ErtsDrvEventDataState is used by driver_event() which is almost never - * used. We allocate ErtsDrvEventDataState separate since we dont wan't - * the size of ErtsDrvEventState to increase due to driver_event() - * information. - */ -typedef struct { - Eterm port; - ErlDrvEventData data; - ErtsPollEvents removed_events; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents deferred_events; +# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 #endif - ErtsIoTask iotask; -} ErtsDrvEventDataState; typedef struct { Eterm inport; diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index 7d26839b0f..30a595c17a 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -18,9 +18,8 @@ * %CopyrightEnd% */ -/* - * Description: Poll interface suitable for ERTS with or without - * SMP support. +/** + * @description Poll interface suitable for ERTS * * The interface is currently implemented using: * - select @@ -29,12 +28,36 @@ * - epoll with poll or select as fallback * - kqueue with poll or select as fallback * - * Some time in the future it will also be - * implemented using Solaris ports. * + * @author Rickard Green + * @author Lukas Larsson + * + * There are two major different implementations off IO polling in this + * file. The concurrent and non-concurrent implementations. + * When available epoll/kqueue are used to implement the concurrent + * versions. poll, select and dev/poll use non-concurrent updates. + * + * Concurrent version: + * In the concurrent version erts_poll_control directly modifies + * the kernel pollset without waking the thread that is waiting + * on events. Also the ErtsPollResFd type is directly mapped to + * the native event type, so no extra copying is needed. Note that + * as no locking at all is done, fds can be triggered that have been + * removed from the pollset. The check_io layer has to deal with this. + * + * Non-concurrent version: + * In the non-concurrent version, the pollset has an internal representation + * of the pollset that is updated by erts_poll_control. When an fd is updated, + * its number is placed in the update request queue and then the waiting thread + * is woken in order to see the change. The internal data in the pollset is + * protected by a mutex that has to be taken by both the modifying and waiting + * thread at different times. * + * The non-concurrent pollset cannot have fd's closed in it while a thread is + * waiting on that fd. In order to fix this, when an ERTS_POLL_OP_DEL command + * is issued, the fd is marked as closing and the waiting thread is woken. The + * fd is then returned in the waiting threads results as ERTS_POLL_EV_NONE. * - * Author: Rickard Green */ #ifdef HAVE_CONFIG_H @@ -62,6 +85,8 @@ # ifdef SYS_SELECT_H # include <sys/select.h> # endif +#elif defined(_DARWIN_UNLIMITED_SELECT) +# undef _DARWIN_UNLIMITED_SELECT #endif #ifdef NO_SYSCONF # if ERTS_POLL_USE_SELECT @@ -83,20 +108,35 @@ #error "Missing implementation of erts_poll()" #endif -#if defined(ERTS_KERNEL_POLL_VERSION) && !ERTS_POLL_USE_KERNEL_POLL -#error "Missing kernel poll implementation of erts_poll()" -#endif +#if 0 +#define ERTS_POLL_DEBUG_PRINT 1 + +#define DEBUG_PRINT(FMT, PS, ...) \ + do { \ + int myerrno = errno; \ + erts_printf("%d: " FMT "\r\n", (PS)->id, ##__VA_ARGS__); \ + errno = myerrno; \ + } while(0) -#if defined(ERTS_NO_KERNEL_POLL_VERSION) && ERTS_POLL_USE_KERNEL_POLL -#error "Kernel poll used when it shouldn't be used" +/* Define to print info about modifications done to each fd */ +#define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__) +/* Define to print entry and exit from erts_poll_wait (can be very spammy) */ +//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) + +#else +#define ERTS_POLL_DEBUG_PRINT 0 +#define DEBUG_PRINT(...) #endif -#if 0 -#define ERTS_POLL_DEBUG_PRINT +#ifndef DEBUG_PRINT_FD +#define DEBUG_PRINT_FD(...) +#endif +#ifndef DEBUG_PRINT_WAIT +#define DEBUG_PRINT_WAIT(...) #endif -#ifdef _DARWIN_UNLIMITED_SELECT +#if defined(_DARWIN_UNLIMITED_SELECT) && ERTS_POLL_USE_SELECT typedef struct { size_t sz; fd_set* ptr; @@ -142,25 +182,13 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, # define ERTS_SELECT select #endif -#define ERTS_POLL_USE_BATCH_UPDATE_POLLSET (ERTS_POLL_USE_DEVPOLL \ - || ERTS_POLL_USE_KQUEUE) +#define ERTS_POLL_IS_FALLBACK (ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT) && ERTS_ENABLE_KERNEL_POLL -#define ERTS_POLL_USE_CONCURRENT_UPDATE ERTS_POLL_USE_EPOLL +#define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE) -#define ERTS_POLL_COALESCE_KP_RES (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) - -#define ERTS_POLLSET_LOCK(PS) \ - erts_mtx_lock(&(PS)->mtx) -#define ERTS_POLLSET_UNLOCK(PS) \ - erts_mtx_unlock(&(PS)->mtx) - -#define ERTS_POLLSET_SET_POLLED_CHK(PS) \ - ((int) erts_atomic32_xchg_nob(&(PS)->polled, (erts_aint32_t) 1)) -#define ERTS_POLLSET_UNSET_POLLED(PS) \ - erts_atomic32_set_nob(&(PS)->polled, (erts_aint32_t) 0) -#define ERTS_POLLSET_IS_POLLED(PS) \ - ((int) erts_atomic32_read_nob(&(PS)->polled)) +#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE) +#if !ERTS_POLL_USE_CONCURRENT_UPDATE #define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) \ erts_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 1) @@ -169,17 +197,28 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, #define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) \ ((int) erts_atomic32_read_nob(&(PS)->have_update_requests)) -#if ERTS_POLL_USE_FALLBACK -# if ERTS_POLL_USE_POLL -# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_poll_fds > 1) -# elif ERTS_POLL_USE_SELECT -# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_select_fds > 1) -# endif +#define ERTS_POLLSET_LOCK(PS) \ + erts_mtx_lock(&(PS)->mtx) +#define ERTS_POLLSET_UNLOCK(PS) \ + erts_mtx_unlock(&(PS)->mtx) + +#else + +#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) +#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) +#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) 0 + +#define ERTS_POLLSET_LOCK(PS) +#define ERTS_POLLSET_UNLOCK(PS) + #endif + /* * --- Data types ------------------------------------------------------------ */ +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + #define ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE 128 typedef struct ErtsPollSetUpdateRequestsBlock_ ErtsPollSetUpdateRequestsBlock; @@ -189,237 +228,200 @@ struct ErtsPollSetUpdateRequestsBlock_ { int fds[ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE]; }; - - # define ERTS_POLL_FD_FLG_INURQ (((unsigned short) 1) << 0) -#if ERTS_POLL_USE_FALLBACK -# define ERTS_POLL_FD_FLG_INFLBCK (((unsigned short) 1) << 1) -# define ERTS_POLL_FD_FLG_USEFLBCK (((unsigned short) 1) << 2) -#endif -# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 3) +# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 1) + typedef struct { #if ERTS_POLL_USE_POLL int pix; #endif + ErtsPollEvents used_events; ErtsPollEvents events; -#if ERTS_POLL_COALESCE_KP_RES - unsigned short res_ev_ix; -#endif unsigned short flags; } ErtsFdStatus; - -#if ERTS_POLL_COALESCE_KP_RES -/* res_ev_ix max value */ -#define ERTS_POLL_MAX_RES ((1 << sizeof(unsigned short)*8) - 1) #endif -#if ERTS_POLL_USE_KQUEUE - -#define ERTS_POLL_KQ_OP_HANDLED 1 -#define ERTS_POLL_KQ_OP_DEL_R 2 -#define ERTS_POLL_KQ_OP_DEL_W 3 -#define ERTS_POLL_KQ_OP_ADD_R 4 -#define ERTS_POLL_KQ_OP_ADD_W 5 -#define ERTS_POLL_KQ_OP_ADD2_R 6 -#define ERTS_POLL_KQ_OP_ADD2_W 7 - -#endif - -struct ErtsPollSet_ { - ErtsPollSet next; +/* + * This struct is not really exported, but it's nice to + * get unique names in debugger for kp/nkp + */ +struct ERTS_POLL_EXPORT(erts_pollset) { + int id; int internal_fd_limit; - ErtsFdStatus *fds_status; erts_atomic_t no_of_user_fds; - int fds_status_len; + #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; - int res_events_len; -#if ERTS_POLL_USE_EPOLL - struct epoll_event *res_events; -#elif ERTS_POLL_USE_KQUEUE - struct kevent *res_events; -#elif ERTS_POLL_USE_DEVPOLL - struct pollfd *res_events; -#endif #endif /* ERTS_POLL_USE_KERNEL_POLL */ + #if ERTS_POLL_USE_POLL int next_poll_fds_ix; int no_poll_fds; int poll_fds_len; - struct pollfd*poll_fds; + struct pollfd *poll_fds; #elif ERTS_POLL_USE_SELECT int next_sel_fd; int max_fd; -#if ERTS_POLL_USE_FALLBACK - int no_select_fds; -#endif ERTS_fd_set input_fds; ERTS_fd_set res_input_fds; ERTS_fd_set output_fds; ERTS_fd_set res_output_fds; +#elif ERTS_POLL_USE_DEVPOLL + struct pollfd *poll_fds; + int poll_fds_ix; #endif + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + ErtsFdStatus *fds_status; + int fds_status_len; ErtsPollSetUpdateRequestsBlock update_requests; ErtsPollSetUpdateRequestsBlock *curr_upd_req_block; erts_atomic32_t have_update_requests; - erts_atomic32_t polled; erts_mtx_t mtx; - int wake_fds[2]; -#if ERTS_POLL_USE_TIMERFD - int timer_fd; -#endif -#if ERTS_POLL_USE_FALLBACK - int fallback_used; -#endif erts_atomic32_t wakeup_state; - erts_atomic64_t timeout_time; -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - erts_atomic_t no_avoided_wakeups; - erts_atomic_t no_avoided_interrupts; - erts_atomic_t no_interrupt_timed; +#endif + +#if ERTS_POLL_USE_WAKEUP_PIPE + int wake_fds[2]; #endif }; void erts_silence_warn_unused_result(long unused); static void fatal_error(char *format, ...); -static void fatal_error_async_signal_safe(char *error_str); static int max_fds = -1; -static ErtsPollSet pollsets; -static erts_mtx_t pollsets_lock; #if ERTS_POLL_USE_POLL +#if !ERTS_POLL_IS_FALLBACK +static ERTS_INLINE short ev2pollev(ErtsPollEvents ev) +{ + return ERTS_POLL_EV_E2N(ev); +} + +static ERTS_INLINE ErtsPollEvents pollev2ev(short ev) +{ + return ERTS_POLL_EV_N2E(ev); +} + +#else /* ERTS_POLL_IS_FALLBACK */ + static ERTS_INLINE short ev2pollev(ErtsPollEvents ev) { -#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE - return ERTS_POLL_EV_E2N(ev); -#else /* Note, we only map events we are interested in */ short res_ev = (short) 0; if (ev & ERTS_POLL_EV_IN) - res_ev |= ERTS_POLL_EV_NKP_IN; + res_ev |= ERTS_POLL_EV_NKP_IN; if (ev & ERTS_POLL_EV_OUT) - res_ev |= ERTS_POLL_EV_NKP_OUT; + res_ev |= ERTS_POLL_EV_NKP_OUT; return res_ev; -#endif } static ERTS_INLINE ErtsPollEvents pollev2ev(short ev) { -#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE - return ERTS_POLL_EV_N2E(ev); -#else /* Note, we only map events we are interested in */ ErtsPollEvents res_ev = (ErtsPollEvents) 0; if (ev & ERTS_POLL_EV_NKP_IN) - res_ev |= ERTS_POLL_EV_IN; + res_ev |= ERTS_POLL_EV_IN; if (ev & ERTS_POLL_EV_NKP_OUT) - res_ev |= ERTS_POLL_EV_OUT; + res_ev |= ERTS_POLL_EV_OUT; if (ev & ERTS_POLL_EV_NKP_ERR) - res_ev |= ERTS_POLL_EV_ERR; + res_ev |= ERTS_POLL_EV_ERR; if (ev & ERTS_POLL_EV_NKP_NVAL) - res_ev |= ERTS_POLL_EV_NVAL; - return res_ev; -#endif + res_ev |= ERTS_POLL_EV_NVAL; + return res_ev; } -#endif +#endif /* !ERTS_POLL_IS_FALLBACK */ + +#endif /* ERTS_POLL_USE_POLL */ + #ifdef HARD_DEBUG static void check_poll_result(ErtsPollResFd pr[], int len); -#if ERTS_POLL_USE_DEVPOLL -static void check_poll_status(ErtsPollSet ps); -#endif /* ERTS_POLL_USE_DEVPOLL */ #endif /* HARD_DEBUG */ -#ifdef ERTS_POLL_DEBUG_PRINT +#if ERTS_POLL_USE_DEVPOLL && defined(DEBUG) +static void check_poll_status(ErtsPollSet *ps); +#endif /* ERTS_POLL_USE_DEVPOLL && DEBUG */ static void print_misc_debug_info(void); +#if ERTS_POLL_USE_EPOLL +uint32_t epoll_events(int kp_fd, int fd); #endif -static ERTS_INLINE void -init_timeout_time(ErtsPollSet ps) -{ - erts_atomic64_init_nob(&ps->timeout_time, - (erts_aint64_t) ERTS_MONOTONIC_TIME_MAX); -} - -static ERTS_INLINE void -set_timeout_time(ErtsPollSet ps, ErtsMonotonicTime time) -{ - erts_atomic64_set_relb(&ps->timeout_time, - (erts_aint64_t) time); -} - -static ERTS_INLINE ErtsMonotonicTime -get_timeout_time(ErtsPollSet ps) -{ - return (ErtsMonotonicTime) erts_atomic64_read_acqb(&ps->timeout_time); -} #define ERTS_POLL_NOT_WOKEN 0 #define ERTS_POLL_WOKEN -1 #define ERTS_POLL_WOKEN_INTR 1 +#if !ERTS_POLL_USE_CONCURRENT_UPDATE static ERTS_INLINE void -reset_wakeup_state(ErtsPollSet ps) +reset_wakeup_state(ErtsPollSet *ps) { erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); } +#endif static ERTS_INLINE int -is_woken(ErtsPollSet ps) +is_woken(ErtsPollSet *ps) { +#if !ERTS_POLL_USE_CONCURRENT_UPDATE return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN; +#else + return 0; +#endif } static ERTS_INLINE int -is_interrupted_reset(ErtsPollSet ps) +is_interrupted_reset(ErtsPollSet *ps) { +#if !ERTS_POLL_USE_CONCURRENT_UPDATE return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN) == ERTS_POLL_WOKEN_INTR); +#else + return 0; +#endif } static ERTS_INLINE void -woke_up(ErtsPollSet ps) +woke_up(ErtsPollSet *ps) { +#if !ERTS_POLL_USE_CONCURRENT_UPDATE erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); if (wakeup_state == ERTS_POLL_NOT_WOKEN) (void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, ERTS_POLL_WOKEN, ERTS_POLL_NOT_WOKEN); ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN); +#endif } /* * --- Wakeup pipe ----------------------------------------------------------- */ +#if ERTS_POLL_USE_WAKEUP_PIPE static ERTS_INLINE void -wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe) +wake_poller(ErtsPollSet *ps, int interrupted) { +#if !ERTS_POLL_USE_CONCURRENT_UPDATE int wake; - if (async_signal_safe) - wake = 1; - else { - erts_aint32_t wakeup_state; - if (!interrupted) - wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state, - ERTS_POLL_WOKEN, - ERTS_POLL_NOT_WOKEN); - else - wakeup_state = erts_atomic32_xchg_relb(&ps->wakeup_state, - ERTS_POLL_WOKEN_INTR); - wake = wakeup_state == ERTS_POLL_NOT_WOKEN; - } - /* - * NOTE: This function might be called from signal handlers in the - * non-smp case; therefore, it has to be async-signal safe in - * the non-smp case. - */ - if (wake) { + erts_aint32_t wakeup_state; + if (!interrupted) + wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state, + ERTS_POLL_WOKEN, + ERTS_POLL_NOT_WOKEN); + else + wakeup_state = erts_atomic32_xchg_relb(&ps->wakeup_state, + ERTS_POLL_WOKEN_INTR); + wake = wakeup_state == ERTS_POLL_NOT_WOKEN; + + if (wake) +#endif + { ssize_t res; if (ps->wake_fds[1] < 0) return; /* Not initialized yet */ @@ -428,29 +430,27 @@ wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe) res = write(ps->wake_fds[1], "!", 1); } while (res < 0 && errno == EINTR); if (res <= 0 && errno != ERRNO_BLOCK) { - if (async_signal_safe) - fatal_error_async_signal_safe(__FILE__ - ":XXX:wake_poller(): " - "Failed to write on wakeup pipe\n"); - else - fatal_error("%s:%d:wake_poller(): " - "Failed to write to wakeup pipe fd=%d: " - "%s (%d)\n", - __FILE__, __LINE__, - ps->wake_fds[1], - erl_errno_id(errno), errno); + fatal_error("%s:%d:wake_poller(): " + "Failed to write to wakeup pipe fd=%d: " + "%s (%d)\n", + __FILE__, __LINE__, + ps->wake_fds[1], + erl_errno_id(errno), errno); } } } static ERTS_INLINE void -cleanup_wakeup_pipe(ErtsPollSet ps) +cleanup_wakeup_pipe(ErtsPollSet *ps) { + int intr = 0; int fd = ps->wake_fds[0]; int res; do { char buf[32]; res = read(fd, buf, sizeof(buf)); + if (res > 0) + intr = 1; } while (res > 0 || (res < 0 && errno == EINTR)); if (res < 0 && errno != ERRNO_BLOCK) { fatal_error("%s:%d:cleanup_wakeup_pipe(): " @@ -460,10 +460,14 @@ cleanup_wakeup_pipe(ErtsPollSet ps) fd, erl_errno_id(errno), errno); } +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + if (intr) + erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR); +#endif } static void -create_wakeup_pipe(ErtsPollSet ps) +create_wakeup_pipe(ErtsPollSet *ps) { int do_wake = 0; int wake_fds[2]; @@ -480,20 +484,13 @@ create_wakeup_pipe(ErtsPollSet ps) SET_NONBLOCKING(wake_fds[0]); SET_NONBLOCKING(wake_fds[1]); -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("wakeup fds = {%d, %d}\n", wake_fds[0], wake_fds[1]); -#endif + DEBUG_PRINT("wakeup fds = {%d, %d}", ps, wake_fds[0], wake_fds[1]); ERTS_POLL_EXPORT(erts_poll_control)(ps, wake_fds[0], + ERTS_POLL_OP_ADD, ERTS_POLL_EV_IN, - 1, &do_wake); -#if ERTS_POLL_USE_FALLBACK - /* We depend on the wakeup pipe being handled by kernel poll */ - if (ps->fds_status[wake_fds[0]].flags & ERTS_POLL_FD_FLG_INFLBCK) - fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n", - __FILE__, __LINE__); -#endif + &do_wake); if (ps->internal_fd_limit <= wake_fds[1]) ps->internal_fd_limit = wake_fds[1] + 1; if (ps->internal_fd_limit <= wake_fds[0]) @@ -502,82 +499,16 @@ create_wakeup_pipe(ErtsPollSet ps) ps->wake_fds[1] = wake_fds[1]; } - -/* - * --- timer fd ----------------------------------------------------------- - */ - -#if ERTS_POLL_USE_TIMERFD - -/* We use the timerfd when using epoll_wait to get high accuracy - timeouts, i.e. we want to sleep with < ms accuracy. */ - -static void -create_timerfd(ErtsPollSet ps) -{ - int do_wake = 0; - int timer_fd; - timer_fd = timerfd_create(CLOCK_MONOTONIC,0); - ERTS_POLL_EXPORT(erts_poll_control)(ps, - timer_fd, - ERTS_POLL_EV_IN, - 1, &do_wake); -#if ERTS_POLL_USE_FALLBACK - /* We depend on the wakeup pipe being handled by kernel poll */ - if (ps->fds_status[timer_fd].flags & ERTS_POLL_FD_FLG_INFLBCK) - fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n", - __FILE__, __LINE__); #endif - if (ps->internal_fd_limit <= timer_fd) - ps->internal_fd_limit = timer_fd + 1; - ps->timer_fd = timer_fd; -} - -static ERTS_INLINE void -timerfd_set(ErtsPollSet ps, struct itimerspec *its) -{ -#ifdef DEBUG - struct itimerspec old_its; - int res; - res = timerfd_settime(ps->timer_fd, 0, its, &old_its); - ASSERT(res == 0); - ASSERT(old_its.it_interval.tv_sec == 0 && - old_its.it_interval.tv_nsec == 0 && - old_its.it_value.tv_sec == 0 && - old_its.it_value.tv_nsec == 0); - -#else - timerfd_settime(ps->timer_fd, 0, its, NULL); -#endif -} - -static ERTS_INLINE int -timerfd_clear(ErtsPollSet ps, int res, int max_res) { - - struct itimerspec its; - /* we always have to clear the timer */ - its.it_interval.tv_sec = 0; - its.it_interval.tv_nsec = 0; - its.it_value.tv_sec = 0; - its.it_value.tv_nsec = 0; - timerfd_settime(ps->timer_fd, 0, &its, NULL); - - /* only timeout fd triggered */ - if (res == 1 && ps->res_events[0].data.fd == ps->timer_fd) - return 0; - - return res; -} - -#endif /* ERTS_POLL_USE_TIMERFD */ - /* * --- Poll set update requests ---------------------------------------------- */ +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + static ERTS_INLINE void -enqueue_update_request(ErtsPollSet ps, int fd) +enqueue_update_request(ErtsPollSet *ps, int fd) { ErtsPollSetUpdateRequestsBlock *urqbp; @@ -592,13 +523,11 @@ enqueue_update_request(ErtsPollSet ps, int fd) urqbp = ps->curr_upd_req_block; if (urqbp->len == ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE) { - ASSERT(!urqbp->next); urqbp = erts_alloc(ERTS_ALC_T_POLLSET_UPDREQ, sizeof(ErtsPollSetUpdateRequestsBlock)); - ps->curr_upd_req_block->next = urqbp; - ps->curr_upd_req_block = urqbp; - urqbp->next = NULL; + urqbp->next = ps->curr_upd_req_block; urqbp->len = 0; + ps->curr_upd_req_block = urqbp; } ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INURQ; @@ -606,28 +535,29 @@ enqueue_update_request(ErtsPollSet ps, int fd) } static ERTS_INLINE void -free_update_requests_block(ErtsPollSet ps, +free_update_requests_block(ErtsPollSet *ps, ErtsPollSetUpdateRequestsBlock *urqbp) { if (urqbp != &ps->update_requests) erts_free(ERTS_ALC_T_POLLSET_UPDREQ, (void *) urqbp); else { - urqbp->next = NULL; urqbp->len = 0; } } +#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ /* * --- Growing poll set structures ------------------------------------------- */ -#ifndef ERTS_KERNEL_POLL_VERSION /* only one shared implementation */ +#if !ERTS_NO_KERNEL_POLL_VERSION || !ERTS_ENABLE_KERNEL_POLL +/* only one shared implementation */ #define ERTS_FD_TABLE_MIN_LENGTH 1024 #define ERTS_FD_TABLE_EXP_THRESHOLD (2048*1024) -int erts_poll_new_table_len (int old_len, int need_len) +int erts_poll_new_table_len(int old_len, int need_len) { int new_len; @@ -637,7 +567,7 @@ int erts_poll_new_table_len (int old_len, int need_len) } else { new_len = old_len; - do { + do { if (new_len < ERTS_FD_TABLE_EXP_THRESHOLD) new_len *= 2; else @@ -650,30 +580,9 @@ int erts_poll_new_table_len (int old_len, int need_len) } #endif -#if ERTS_POLL_USE_KERNEL_POLL -static void -grow_res_events(ErtsPollSet ps, int new_len) -{ - size_t new_size = sizeof( -#if ERTS_POLL_USE_EPOLL - struct epoll_event -#elif ERTS_POLL_USE_DEVPOLL - struct pollfd -#elif ERTS_POLL_USE_KQUEUE - struct kevent -#endif - ) * erts_poll_new_table_len(ps->res_events_len, new_len); - /* We do not need to save previously stored data */ - if (ps->res_events) - erts_free(ERTS_ALC_T_POLL_RES_EVS, ps->res_events); - ps->res_events = erts_alloc(ERTS_ALC_T_POLL_RES_EVS, new_size); - ps->res_events_len = new_len; -} -#endif /* ERTS_POLL_USE_KERNEL_POLL */ - #if ERTS_POLL_USE_POLL static void -grow_poll_fds(ErtsPollSet ps, int min_ix) +grow_poll_fds(ErtsPollSet *ps, int min_ix) { int i; int new_len = erts_poll_new_table_len(ps->poll_fds_len, min_ix + 1); @@ -721,8 +630,9 @@ ensure_select_fds(int fd, ERTS_fd_set* in, ERTS_fd_set* out) # define ensure_select_fds(fd, in, out) do {} while(0) #endif /* _DARWIN_UNLIMITED_SELECT */ +#if !ERTS_POLL_USE_CONCURRENT_UPDATE static void -grow_fds_status(ErtsPollSet ps, int min_fd) +grow_fds_status(ErtsPollSet *ps, int min_fd) { int i; int new_len = erts_poll_new_table_len(ps->fds_status_len, min_fd + 1); @@ -741,428 +651,26 @@ grow_fds_status(ErtsPollSet ps, int min_fd) #endif ps->fds_status[i].used_events = (ErtsPollEvents) 0; ps->fds_status[i].events = (ErtsPollEvents) 0; -#if ERTS_POLL_COALESCE_KP_RES - ps->fds_status[i].res_ev_ix = (unsigned short) ERTS_POLL_MAX_RES; -#endif ps->fds_status[i].flags = (unsigned short) 0; } ps->fds_status_len = new_len; } +#endif /* * --- Selecting fd to poll on ----------------------------------------------- */ -#if ERTS_POLL_USE_FALLBACK -static int update_fallback_pollset(ErtsPollSet ps, int fd); -#endif - -static ERTS_INLINE int -need_update(ErtsPollSet ps, int fd) -{ -#if ERTS_POLL_USE_KERNEL_POLL - int reset; -#endif - - ASSERT(fd < ps->fds_status_len); - -#if ERTS_POLL_USE_KERNEL_POLL - reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST); - if (reset && !ps->fds_status[fd].used_events) { - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; - reset = 0; - } -#else - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; -#endif - - if (ps->fds_status[fd].used_events != ps->fds_status[fd].events) - return 1; - -#if ERTS_POLL_USE_KERNEL_POLL - return reset; -#else - return 0; -#endif -} - -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - -#if ERTS_POLL_USE_KQUEUE -#define ERTS_POLL_MIN_BATCH_BUF_SIZE 128 -#else -#define ERTS_POLL_MIN_BATCH_BUF_SIZE 64 -#endif - -typedef struct { - int len; - int size; -#if ERTS_POLL_USE_DEVPOLL - struct pollfd *buf; -#elif ERTS_POLL_USE_KQUEUE - struct kevent *buf; - struct kevent *ebuf; -#endif -} ErtsPollBatchBuf; - - -static ERTS_INLINE void -setup_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) -{ - bbp->len = 0; -#if ERTS_POLL_USE_DEVPOLL - bbp->size = ps->res_events_len; - bbp->buf = ps->res_events; -#elif ERTS_POLL_USE_KQUEUE - bbp->size = ps->res_events_len/2; - bbp->buf = ps->res_events; - bbp->ebuf = bbp->buf + bbp->size; -#endif -} - - -#if ERTS_POLL_USE_DEVPOLL - -static void -write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) -{ - ssize_t wres; - char *buf = (char *) bbp->buf; - size_t buf_size = sizeof(struct pollfd)*bbp->len; - - while (1) { - wres = write(ps->kp_fd, (void *) buf, buf_size); - if (wres < 0) { - if (errno == EINTR) - continue; - fatal_error("%s:%d:write_batch_buf(): " - "Failed to write to /dev/poll: " - "%s (%d)\n", - __FILE__, __LINE__, - erl_errno_id(errno), errno); - } - buf_size -= wres; - if (buf_size <= 0) - break; - buf += wres; - } - - if (buf_size < 0) { - fatal_error("%s:%d:write_devpoll_buf(): Internal error\n", - __FILE__, __LINE__); - } - bbp->len = 0; -} - -#elif ERTS_POLL_USE_KQUEUE - -static void -write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) -{ - int res; - int len = bbp->len; - struct kevent *buf = bbp->buf; - struct timespec ts = {0, 0}; - - do { - res = kevent(ps->kp_fd, buf, len, NULL, 0, &ts); - } while (res < 0 && errno == EINTR); - if (res < 0) { - int i; - struct kevent *ebuf = bbp->ebuf; - do { - res = kevent(ps->kp_fd, buf, len, ebuf, len, &ts); - } while (res < 0 && errno == EINTR); - if (res < 0) { - fatal_error("%s:%d: kevent() failed: %s (%d)\n", - __FILE__, __LINE__, erl_errno_id(errno), errno); - } - for (i = 0; i < res; i++) { - if (ebuf[i].flags & EV_ERROR) { - short filter; - int fd = (int) ebuf[i].ident; - - switch ((int) (long) ebuf[i].udata) { - - /* - * Since we use a lazy update approach EV_DELETE will - * frequently fail. This since kqueue automatically - * removes a file descriptor that is closed from the - * poll set. - */ - case ERTS_POLL_KQ_OP_DEL_R: - case ERTS_POLL_KQ_OP_DEL_W: - case ERTS_POLL_KQ_OP_HANDLED: - break; - - /* - * According to the kqueue man page EVFILT_READ support - * does not imply EVFILT_WRITE support; therefore, - * if an EV_ADD fail, we may have to remove other - * events on this fd in the kqueue pollset before - * adding fd to the fallback pollset. - */ - case ERTS_POLL_KQ_OP_ADD_W: - if (ps->fds_status[fd].used_events & ERTS_POLL_EV_IN) { - filter = EVFILT_READ; - goto rm_add_fb; - } - goto add_fb; - case ERTS_POLL_KQ_OP_ADD_R: - if (ps->fds_status[fd].used_events & ERTS_POLL_EV_OUT) { - filter = EVFILT_WRITE; - goto rm_add_fb; - } - goto add_fb; - case ERTS_POLL_KQ_OP_ADD2_W: - case ERTS_POLL_KQ_OP_ADD2_R: { - int j; - for (j = i+1; j < res; j++) { - if (fd == (int) ebuf[j].ident) { - ebuf[j].udata = (void *) ERTS_POLL_KQ_OP_HANDLED; - if (!(ebuf[j].flags & EV_ERROR)) { - switch ((int) (long) ebuf[j].udata) { - case ERTS_POLL_KQ_OP_ADD2_W: - filter = EVFILT_WRITE; - goto rm_add_fb; - case ERTS_POLL_KQ_OP_ADD2_R: - filter = EVFILT_READ; - goto rm_add_fb; - default: - fatal_error("%s:%d:write_batch_buf(): " - "Internal error", - __FILE__, __LINE__); - break; - } - } - goto add_fb; - } - } - /* The other add succeded... */ - filter = ((((int) (long) ebuf[i].udata) - == ERTS_POLL_KQ_OP_ADD2_W) - ? EVFILT_READ - : EVFILT_WRITE); - rm_add_fb: - { - struct kevent kev; - struct timespec ts = {0, 0}; - EV_SET(&kev, fd, filter, EV_DELETE, 0, 0, 0); - (void) kevent(ps->kp_fd, &kev, 1, NULL, 0, &ts); - } - - add_fb: - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK; - ASSERT(ps->fds_status[fd].used_events); - ps->fds_status[fd].used_events = 0; - erts_atomic_dec_nob(&ps->no_of_user_fds); - update_fallback_pollset(ps, fd); - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); - break; - } - default: - fatal_error("%s:%d:write_batch_buf(): Internal error", - __FILE__, __LINE__); - break; - } - } - } - } - bbp->len = 0; -} - -#endif /* ERTS_POLL_USE_KQUEUE */ - -static ERTS_INLINE void -batch_update_pollset(ErtsPollSet ps, int fd, ErtsPollBatchBuf *bbp) -{ - int buf_len; -#if ERTS_POLL_USE_DEVPOLL - short events; - struct pollfd *buf; -#elif ERTS_POLL_USE_KQUEUE - struct kevent *buf; -#endif - -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Doing lazy update on fd=%d\n", fd); -#endif - - if (!need_update(ps, fd)) - return; - - /* Make sure we have room for at least maximum no of entries - per fd */ - if (bbp->size - bbp->len < 2) - write_batch_buf(ps, bbp); - - buf_len = bbp->len; - buf = bbp->buf; - - ASSERT(fd < ps->fds_status_len); - -#if ERTS_POLL_USE_DEVPOLL - events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); - if (!events) { - buf[buf_len].events = POLLREMOVE; - erts_atomic_dec_nob(&ps->no_of_user_fds); - } - else if (!ps->fds_status[fd].used_events) { - buf[buf_len].events = events; - erts_atomic_inc_nob(&ps->no_of_user_fds); - } - else { - if ((ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) - || (ps->fds_status[fd].used_events & ~events)) { - /* Reset or removed events... */ - buf[buf_len].fd = fd; - buf[buf_len].events = POLLREMOVE; - buf[buf_len++].revents = 0; - } - buf[buf_len].events = events; - } - buf[buf_len].fd = fd; - buf[buf_len++].revents = 0; - -#elif ERTS_POLL_USE_KQUEUE - - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) { - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) - update_fallback_pollset(ps, fd); - else { /* Remove from fallback and try kqueue */ - ErtsPollEvents events = ps->fds_status[fd].events; - ps->fds_status[fd].events = (ErtsPollEvents) 0; - update_fallback_pollset(ps, fd); - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - if (events) { - ps->fds_status[fd].events = events; - goto try_kqueue; - } - } - } - else { - ErtsPollEvents events, used_events; - int mod_w, mod_r; - try_kqueue: - events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); - used_events = ERTS_POLL_EV_E2N(ps->fds_status[fd].used_events); - if (!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) { - if (!used_events && - (events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) - goto do_add_rw; - mod_r = ((events & ERTS_POLL_EV_IN) - != (used_events & ERTS_POLL_EV_IN)); - mod_w = ((events & ERTS_POLL_EV_OUT) - != (used_events & ERTS_POLL_EV_OUT)); - goto do_mod; - } - else { /* Reset */ - if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) { - do_add_rw: - EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_R); - buf_len++; - EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_W); - buf_len++; - - } - else { - mod_r = 1; - mod_w = 1; - do_mod: - if (mod_r) { - if (events & ERTS_POLL_EV_IN) { - EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_R); - buf_len++; - } - else if (used_events & ERTS_POLL_EV_IN) { - EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_DELETE, - 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_R); - buf_len++; - } - } - if (mod_w) { - if (events & ERTS_POLL_EV_OUT) { - EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_W); - buf_len++; - } - else if (used_events & ERTS_POLL_EV_OUT) { - EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_DELETE, - 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_W); - buf_len++; - } - } - } - } - if (used_events) { - if (!events) { - erts_atomic_dec_nob(&ps->no_of_user_fds); - } - } - else { - if (events) - erts_atomic_inc_nob(&ps->no_of_user_fds); - } - ASSERT((events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0); - ASSERT((used_events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0); - } - -#endif - - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; - ps->fds_status[fd].used_events = ps->fds_status[fd].events; - - bbp->len = buf_len; -} - -#else /* !ERTS_POLL_USE_BATCH_UPDATE_POLLSET */ - #if ERTS_POLL_USE_EPOLL static int -#if ERTS_POLL_USE_CONCURRENT_UPDATE -conc_update_pollset(ErtsPollSet ps, int fd, int *update_fallback) -#else -update_pollset(ErtsPollSet ps, int fd) -#endif +update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) { int res; - int op; + int epoll_op = EPOLL_CTL_MOD; struct epoll_event epe_templ; struct epoll_event epe; - ASSERT(fd < ps->fds_status_len); - - if (!need_update(ps, fd)) - return 0; - -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Doing update on fd=%d\n", fd); -#endif - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) { -#if ERTS_POLL_USE_CONCURRENT_UPDATE - if (!*update_fallback) { - *update_fallback = 1; - return 0; - } -#endif - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) { - return update_fallback_pollset(ps, fd); - } - else { /* Remove from fallback and try epoll */ - ErtsPollEvents events = ps->fds_status[fd].events; - ps->fds_status[fd].events = (ErtsPollEvents) 0; - res = update_fallback_pollset(ps, fd); - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - if (!events) - return res; - ps->fds_status[fd].events = events; - } - } - - epe_templ.events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); + epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT; epe_templ.data.fd = fd; #ifdef VALGRIND @@ -1170,30 +678,24 @@ update_pollset(ErtsPollSet ps, int fd) memset((void *) &epe.data, 0, sizeof(epoll_data_t)); #endif - if (epe_templ.events && ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) { - do { - /* We init 'epe' every time since epoll_ctl() may modify it - (not declared const and not documented as const). */ - epe.events = epe_templ.events; - epe.data.fd = epe_templ.data.fd; - res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe); - } while (res != 0 && errno == EINTR); - erts_atomic_dec_nob(&ps->no_of_user_fds); - ps->fds_status[fd].used_events = 0; - } - - if (!epe_templ.events) { + switch (op) { + case ERTS_POLL_OP_DEL: /* A note on EPOLL_CTL_DEL: linux kernel versions before 2.6.9 need a non-NULL event pointer even though it is ignored... */ - op = EPOLL_CTL_DEL; + epoll_op = EPOLL_CTL_DEL; + epe_templ.events = 0; erts_atomic_dec_nob(&ps->no_of_user_fds); - } - else if (!ps->fds_status[fd].used_events) { - op = EPOLL_CTL_ADD; + break; + case ERTS_POLL_OP_ADD: + epoll_op = EPOLL_CTL_ADD; erts_atomic_inc_nob(&ps->no_of_user_fds); - } - else { - op = EPOLL_CTL_MOD; + break; + case ERTS_POLL_OP_MOD: + epoll_op = EPOLL_CTL_MOD; + break; + default: + ASSERT(0); + break; } do { @@ -1201,33 +703,32 @@ update_pollset(ErtsPollSet ps, int fd) (not declared const and not documented as const). */ epe.events = epe_templ.events; epe.data.fd = epe_templ.data.fd; - res = epoll_ctl(ps->kp_fd, op, fd, &epe); + res = epoll_ctl(ps->kp_fd, epoll_op, fd, &epe); } while (res != 0 && errno == EINTR); -#if defined(ERTS_POLL_DEBUG_PRINT) && 1 +#if ERTS_POLL_DEBUG_PRINT { int saved_errno = errno; - erts_printf("%s = epoll_ctl(%d, %s, %d, {Ox%x, %d})\n", - res == 0 ? "0" : erl_errno_id(errno), - ps->kp_fd, - (op == EPOLL_CTL_ADD - ? "EPOLL_CTL_ADD" - : (op == EPOLL_CTL_MOD - ? "EPOLL_CTL_MOD" - : (op == EPOLL_CTL_DEL - ? "EPOLL_CTL_DEL" - : "UNKNOWN"))), - fd, - epe_templ.events, - fd); + DEBUG_PRINT_FD("%s = epoll_ctl(%d, %s, %d, {0x%x, %d})", + ps, fd, + res == 0 ? "0" : erl_errno_id(errno), + ps->kp_fd, + (epoll_op == EPOLL_CTL_ADD + ? "EPOLL_CTL_ADD" + : (epoll_op == EPOLL_CTL_MOD + ? "EPOLL_CTL_MOD" + : (epoll_op == EPOLL_CTL_DEL + ? "EPOLL_CTL_DEL" + : "UNKNOWN"))), + fd, + epe_templ.events, + fd); errno = saved_errno; } #endif - if (res == 0) - ps->fds_status[fd].used_events = ps->fds_status[fd].events; - else { + if (res != 0) { switch (op) { - case EPOLL_CTL_MOD: + case ERTS_POLL_OP_MOD: epe.events = 0; do { /* We init 'epe' every time since epoll_ctl() may modify it @@ -1236,29 +737,18 @@ update_pollset(ErtsPollSet ps, int fd) epe.data.fd = fd; res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe); } while (res != 0 && errno == EINTR); - ps->fds_status[fd].used_events = 0; /* Fall through ... */ - case EPOLL_CTL_ADD: { - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK; + case ERTS_POLL_OP_ADD: { erts_atomic_dec_nob(&ps->no_of_user_fds); -#if ERTS_POLL_USE_CONCURRENT_UPDATE - if (!*update_fallback) { - *update_fallback = 1; - return 0; - } -#endif - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - res = update_fallback_pollset(ps, fd); - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); + res = ERTS_POLL_EV_NVAL; break; } - case EPOLL_CTL_DEL: { + case ERTS_POLL_OP_DEL: { /* * Since we use a lazy update approach EPOLL_CTL_DEL will * frequently fail. This since epoll automatically removes * a filedescriptor that is closed from the poll set. */ - ps->fds_status[fd].used_events = 0; res = 0; break; } @@ -1267,67 +757,277 @@ update_pollset(ErtsPollSet ps, int fd) __FILE__, __LINE__); break; } + } else { + res = events; } - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; return res; } -#if ERTS_POLL_USE_CONCURRENT_UPDATE +#endif /* ERTS_POLL_USE_EPOLL */ + +#if ERTS_POLL_USE_KQUEUE + +/* Some versions of the EV_SET macro used kevp multiple times, + so we define out own version that make sure that it is safe + to do kevp++ in the argument list. */ +#define ERTS_EV_SET(kevp, a, b, c, f) do { \ + struct kevent *kevp_ = kevp; \ + EV_SET(kevp_, a, b, c, 0, 0, f); \ + } while(0) + static int -update_pollset(ErtsPollSet ps, int fd) +update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) { - int update_fallback = 1; - return conc_update_pollset(ps, fd, &update_fallback); -} -#endif - -#endif /* ERTS_POLL_USE_EPOLL */ + int res = 0, len = 0; + struct kevent evts[2]; + struct timespec ts = {0, 0}; -#endif /* ERTS_POLL_USE_BATCH_UPDATE_POLLSET */ +#ifdef EV_DISPATCH + /* If we have EV_DISPATCH we use it. The kevent descriptions for both + read and write are added on OP_ADD and removed on OP_DEL. And then + after than only EV_ENABLE|EV_DISPATCH are used. + + It could be possible to not modify the pollset when disabling and/or + deleting events, but that may cause the poll threads to be awoken + a lot more than they should so we take the cost here instead of + in the poll thread. + + Note: We need to have EV_DISPATCH both when the event is enabled and + disabled, as otherwise the event may be triggered twice on each re-arm. + Not sure if this is intended or not (can't find anything about it in the + man page), but it seems to be the way it works... + */ + + if (op == ERTS_POLL_OP_DEL) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + /* We could probably skip this delete, do we want to? */ + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, EV_DELETE, (void *) 0); + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, EV_DELETE, (void *) 0); + } else if (op == ERTS_POLL_OP_ADD) { + uint32_t flags; + erts_atomic_inc_nob(&ps->no_of_user_fds); + + flags = EV_ADD|EV_DISPATCH; + flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE); + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); + + flags = EV_ADD|EV_DISPATCH; + flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE); + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); + } else { + uint32_t flags; + ASSERT(op == ERTS_POLL_OP_MOD); -#if ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK + flags = EV_DISPATCH; + flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE; + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); -#if ERTS_POLL_USE_FALLBACK -static int update_fallback_pollset(ErtsPollSet ps, int fd) + flags = EV_DISPATCH; + flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE; + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); + } #else -static int update_pollset(ErtsPollSet ps, int fd) + uint32_t flags = EV_ADD|EV_ONESHOT; + + if (op == ERTS_POLL_OP_DEL) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + /* We don't do anything when a delete is issued. The fds will be removed + when they are triggered, or when they are closed. */ + events = 0; + } else if (op == ERTS_POLL_OP_ADD) { + erts_atomic_inc_nob(&ps->no_of_user_fds); + } + + if (events & ERTS_POLL_EV_IN) { + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); + } + if (events & ERTS_POLL_EV_OUT) { + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); + } + #endif -{ -#ifdef ERTS_POLL_DEBUG_PRINT -#if ERTS_POLL_USE_FALLBACK - erts_printf("Doing fallback update on fd=%d\n", fd); + if (len) + do { + res = kevent(ps->kp_fd, evts, len, NULL, 0, &ts); + } while (res < 0 && errno == EINTR); +#if ERTS_POLL_DEBUG_PRINT + { + int saved_errno = errno, i; + char keventb[255], *keventbp = keventb; + if (res < 0) + keventbp += sprintf(keventbp,"%s = ",erl_errno_id(saved_errno)); + else + keventbp += sprintf(keventbp,"%d = ",res); + keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd); + for (i = 0; i < len; i++) { + const char *flags = "UNKNOWN"; + if (evts[i].flags == EV_DELETE) flags = "EV_DELETE"; + if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT"; +#ifdef EV_DISPATCH + if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH"; + if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE"; + if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH"; + if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE"; + if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE"; +#endif + + keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "", + evts[i].ident, + (evts[i].filter == EVFILT_READ + ? "EVFILT_READ" + : (evts[i].filter == EVFILT_WRITE + ? "EVFILT_WRITE" + : "UNKNOWN")), flags); + } + keventbp += sprintf(keventbp, "}, %d)", len); + DEBUG_PRINT_FD("%s", ps, fd, keventb); + errno = saved_errno; + } +#endif + if (res < 0) { + if (op != ERTS_POLL_OP_DEL) { +#ifdef EV_RECEIPT + struct kevent receipt_evts[2]; + len = 0; + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, EV_DELETE|EV_RECEIPT, (void *) 0); + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, EV_DELETE|EV_RECEIPT, (void *) 0); + do { + res = kevent(ps->kp_fd, evts, len, receipt_evts, 2, &ts); + } while (res < 0 && errno == EINTR); #else - erts_printf("Doing update on fd=%d\n", fd); + ERTS_EV_SET(&evts[0], fd, EVFILT_WRITE, EV_DELETE, (void *) 0); + do { + res = kevent(ps->kp_fd, evts, 1, NULL, 0, &ts); + } while (res < 0 && errno == EINTR); + ERTS_EV_SET(&evts[0], fd, EVFILT_READ, EV_DELETE, (void *) 0); + do { + res = kevent(ps->kp_fd, evts, 1, NULL, 0, &ts); + } while (res < 0 && errno == EINTR); +#endif + if (op == ERTS_POLL_OP_ADD) + erts_atomic_dec_nob(&ps->no_of_user_fds); + events = ERTS_POLL_EV_NVAL; + } else + events = 0; + } + return events; +} + +#endif /* ERTS_POLL_USE_KQUEUE */ + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + +static ERTS_INLINE void +init_batch_update(ErtsPollSet *ps, int len) +{ +#if ERTS_POLL_USE_DEVPOLL + ASSERT(ps->poll_fds == NULL); + ps->poll_fds = erts_alloc(ERTS_ALC_T_TMP, sizeof(struct pollfd) * len); + ps->poll_fds_ix = 0; #endif +} + +static ERTS_INLINE void +write_batch_update(ErtsPollSet *ps) +{ +#if ERTS_POLL_USE_DEVPOLL + ssize_t wres; + char *buf = (char *) ps->poll_fds; + size_t buf_size = sizeof(struct pollfd)*ps->poll_fds_ix; + + while (1) { + wres = write(ps->kp_fd, (void *) buf, buf_size); + if (wres < 0) { + if (errno == EINTR) + continue; + fatal_error("%s:%d:write_batch_buf(): " + "Failed to write to /dev/poll: " + "%s (%d)\n", + __FILE__, __LINE__, + erl_errno_id(errno), errno); + } +#if ERTS_POLL_DEBUG_PRINT + { + int saved_errno = errno, i; + char devpollb[2048], *devpollbp = devpollb; + devpollbp += sprintf(devpollbp, "%d = devpoll(%d, {", wres, ps->kp_fd); + for (i = 0; i < wres / sizeof(struct pollfd); i++) { + if (devpollbp == devpollb) + devpollbp += sprintf(devpollbp, "%d = devpoll(%d, {", wres, ps->kp_fd); + devpollbp += sprintf(devpollbp, "%s{fd = %d, events = %s}", + i > 0 ? ", " : "", + ps->poll_fds[i].fd, + ev2str(ps->poll_fds[i].events)); + if (devpollbp - devpollb > 512) { + devpollbp += sprintf(devpollbp, "}, %d)", ps->poll_fds_ix); + DEBUG_PRINT("%s", ps, devpollb); + devpollbp = devpollb; + } + } + devpollbp += sprintf(devpollbp, "}, %d)", ps->poll_fds_ix); + DEBUG_PRINT("%s", ps, devpollb); + errno = saved_errno; + } #endif - ASSERT(fd < ps->fds_status_len); -#if ERTS_POLL_USE_FALLBACK - ASSERT(ps->fds_status[fd].used_events - ? (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) - : (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK)); + buf_size -= wres; + if (buf_size <= 0) + break; + buf += wres; + } + + if (buf_size < 0) { + fatal_error("%s:%d:write_devpoll_buf(): Internal error\n", + __FILE__, __LINE__); + } + erts_free(ERTS_ALC_T_TMP, ps->poll_fds); + ps->poll_fds = NULL; #endif +} - if (!need_update(ps, fd)) - return 0; +static ERTS_INLINE int +need_update(ErtsPollSet *ps, int fd, int *resetp) +{ + int reset; + ASSERT(fd < ps->fds_status_len); -#if ERTS_POLL_USE_FALLBACK + reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST); ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; -#endif + + *resetp = reset; + + if (reset || ps->fds_status[fd].used_events != ps->fds_status[fd].events) + return 1; + + return 0; +} + +static int update_pollset(ErtsPollSet *ps, ErtsPollResFd pr[], int fd) +{ + int res = 0, reset = 0; + ErtsPollEvents events = ps->fds_status[fd].events; + ASSERT(fd < ps->fds_status_len); + + if (!need_update(ps, fd, &reset)) + return res; #if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ - if (!ps->fds_status[fd].events) { + if (!events) { int pix = ps->fds_status[fd].pix; int last_pix; + + if (reset) { + /* When a fd has been reset, we tell the caller of erts_poll_wait + this by setting the fd as ERTS_POLL_EV_NONE */ + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE); + DEBUG_PRINT_FD("trig %s (poll)", ps, fd, ev2str(ERTS_POLL_EV_NONE)); + res++; + } + if (pix < 0) { -#if ERTS_POLL_USE_FALLBACK - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); -#endif - return -1; + return res; } -#if ERTS_POLL_USE_FALLBACK - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); -#endif erts_atomic_dec_nob(&ps->no_of_user_fds); last_pix = --ps->no_poll_fds; if (pix != last_pix) { @@ -1344,126 +1044,151 @@ static int update_pollset(ErtsPollSet ps, int fd) /* Clear this fd status */ ps->fds_status[fd].pix = -1; ps->fds_status[fd].used_events = (ErtsPollEvents) 0; -#if ERTS_POLL_USE_FALLBACK - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK; -#endif + } else { int pix = ps->fds_status[fd].pix; if (pix < 0) { -#if ERTS_POLL_USE_FALLBACK - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) - || fd == ps->kp_fd); -#endif erts_atomic_inc_nob(&ps->no_of_user_fds); ps->fds_status[fd].pix = pix = ps->no_poll_fds++; if (pix >= ps->poll_fds_len) grow_poll_fds(ps, pix); ps->poll_fds[pix].fd = fd; ps->fds_status[fd].pix = pix; -#if ERTS_POLL_USE_FALLBACK - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK; -#endif } -#if ERTS_POLL_USE_FALLBACK - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); -#endif - /* Events to be used in next poll */ - ps->poll_fds[pix].events = ev2pollev(ps->fds_status[fd].events); + ps->poll_fds[pix].events = ev2pollev(events); if (ps->poll_fds[pix].revents) { /* Remove result events that we should not poll for anymore */ ps->poll_fds[pix].revents &= ev2pollev(~(~ps->fds_status[fd].used_events - & ps->fds_status[fd].events)); + & events)); } /* Save events to be used in next poll */ - ps->fds_status[fd].used_events = ps->fds_status[fd].events; + ps->fds_status[fd].used_events = events; } - return 0; + return res; #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - { - ErtsPollEvents events = ps->fds_status[fd].events; + if (!events) { + + if (reset) { + /* When a fd has been reset, we tell the caller of erts_poll_wait + this by setting the fd as ERTS_POLL_EV_NONE */ + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE); + DEBUG_PRINT_FD("trig %s (select)", ps, fd, ev2str(ERTS_POLL_EV_NONE)); + res++; + } + + ERTS_FD_CLR(fd, &ps->input_fds); + ERTS_FD_CLR(fd, &ps->output_fds); + + if (ps->fds_status[fd].used_events) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + ps->fds_status[fd].used_events = (ErtsPollEvents) 0; + } + + if (fd == ps->max_fd) { + int max = ps->max_fd; + for (max = ps->max_fd; max >= 0; max--) + if (ps->fds_status[max].used_events) + break; + ps->max_fd = max; + } + + } else { + ensure_select_fds(fd, &ps->input_fds, &ps->output_fds); - if ((ERTS_POLL_EV_IN & events) - != (ERTS_POLL_EV_IN & ps->fds_status[fd].used_events)) { - if (ERTS_POLL_EV_IN & events) { - ERTS_FD_SET(fd, &ps->input_fds); - } - else { - ERTS_FD_CLR(fd, &ps->input_fds); - } - } - if ((ERTS_POLL_EV_OUT & events) - != (ERTS_POLL_EV_OUT & ps->fds_status[fd].used_events)) { - if (ERTS_POLL_EV_OUT & events) { - ERTS_FD_SET(fd, &ps->output_fds); - } - else { - ERTS_FD_CLR(fd, &ps->output_fds); - } - } - if (!ps->fds_status[fd].used_events) { - ASSERT(events); - erts_atomic_inc_nob(&ps->no_of_user_fds); -#if ERTS_POLL_USE_FALLBACK - ps->no_select_fds++; - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK; -#endif - } - else if (!events) { - ASSERT(ps->fds_status[fd].used_events); - erts_atomic_dec_nob(&ps->no_of_user_fds); - ps->fds_status[fd].events = events; -#if ERTS_POLL_USE_FALLBACK - ps->no_select_fds--; - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK; -#endif - } + if (!ps->fds_status[fd].used_events) + erts_atomic_inc_nob(&ps->no_of_user_fds); + + if (events & ERTS_POLL_EV_IN) + ERTS_FD_SET(fd, &ps->input_fds); + else + ERTS_FD_CLR(fd, &ps->input_fds); + + if (events & ERTS_POLL_EV_OUT) + ERTS_FD_SET(fd, &ps->output_fds); + else + ERTS_FD_CLR(fd, &ps->output_fds); ps->fds_status[fd].used_events = events; - if (events && fd > ps->max_fd) - ps->max_fd = fd; - else if (!events && fd == ps->max_fd) { - int max = ps->max_fd; - for (max = ps->max_fd; max >= 0; max--) - if (ps->fds_status[max].used_events) - break; - ps->max_fd = max; - } + if (fd > ps->max_fd) + ps->max_fd = fd; } - return 0; -#endif -} -#endif /* ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK */ + return res; +#elif ERTS_POLL_USE_DEVPOLL + + if (!events) { + if (reset) { + /* When a fd has been reset, we tell the caller of erts_poll_wait + this by setting the fd as ERTS_POLL_EV_NONE */ + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE); + DEBUG_PRINT_FD("trig %s (devpoll)", ps, fd, ev2str(ERTS_POLL_EV_NONE)); + res++; + } -static void -handle_update_requests(ErtsPollSet ps) -{ - ErtsPollSetUpdateRequestsBlock *urqbp = &ps->update_requests; -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - ErtsPollBatchBuf bb; - setup_batch_buf(ps, &bb); + ps->poll_fds[ps->poll_fds_ix].fd = fd; + ps->poll_fds[ps->poll_fds_ix].revents = 0; + ps->poll_fds[ps->poll_fds_ix++].events = POLLREMOVE; + + if (ps->fds_status[fd].used_events) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + ps->fds_status[fd].used_events = 0; + } + + } else { + if (!ps->fds_status[fd].used_events) { + erts_atomic_inc_nob(&ps->no_of_user_fds); + } + ps->poll_fds[ps->poll_fds_ix].fd = fd; + ps->poll_fds[ps->poll_fds_ix].revents = 0; + ps->poll_fds[ps->poll_fds_ix++].events = ERTS_POLL_EV_E2N(events); + ps->fds_status[fd].used_events = ps->fds_status[fd].events; + } + + return res; #endif +} + +static int +handle_update_requests(ErtsPollSet *ps, ErtsPollResFd pr[], int no_fds) +{ + int res = 0; + ErtsPollSetUpdateRequestsBlock *urqbp = ps->curr_upd_req_block; while (urqbp) { ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp; int i; int len = urqbp->len; + + init_batch_update(ps, len); + for (i = 0; i < len; i++) { int fd = urqbp->fds[i]; ASSERT(fd < ps->fds_status_len); - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ; -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - batch_update_pollset(ps, fd, &bb); -#else - update_pollset(ps, fd); -#endif + ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ); + + /* We have run out of PollResFd slots to put results in, + so we yield here and return later for more. */ + if (res == no_fds && pr != NULL) { + memmove(urqbp->fds, urqbp->fds+i, sizeof(int) * (len - i)); + urqbp->len -= i; + ps->curr_upd_req_block = urqbp; + write_batch_update(ps); + return res; + } + + if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ) { + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ; + res += update_pollset(ps, pr + res, fd); + } } free_urqbp = urqbp; @@ -1471,12 +1196,9 @@ handle_update_requests(ErtsPollSet ps) free_update_requests_block(ps, free_urqbp); - } + write_batch_update(ps); -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - if (bb.len) - write_batch_buf(ps, &bb); -#endif + } ps->curr_upd_req_block = &ps->update_requests; @@ -1485,16 +1207,19 @@ handle_update_requests(ErtsPollSet ps) #endif ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(ps); + return res; } +#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ static ERTS_INLINE ErtsPollEvents -poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake) +poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op, + ErtsPollEvents events, int *do_wake) { ErtsPollEvents new_events; if (fd < ps->internal_fd_limit || fd >= max_fds) { - if (fd < 0) { + if (fd < 0 || fd >= max_fds) { new_events = ERTS_POLL_EV_ERR; goto done; } @@ -1504,115 +1229,55 @@ poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake goto done; } #endif +#if ERTS_POLL_USE_WAKEUP_PIPE if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) { new_events = ERTS_POLL_EV_NVAL; goto done; } -#if ERTS_POLL_USE_TIMERFD - if (fd == ps->timer_fd) { - new_events = ERTS_POLL_EV_NVAL; - goto done; - } #endif } +#if ERTS_POLL_USE_CONCURRENT_UPDATE + + new_events = update_pollset(ps, fd, op, events); + +#else /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ if (fd >= ps->fds_status_len) grow_fds_status(ps, fd); ASSERT(fd < ps->fds_status_len); - new_events = ps->fds_status[fd].events; - - if (events == 0) { - *do_wake = 0; - goto done; - } - - if (on) - new_events |= events; - else - new_events &= ~events; - - if (new_events == (ErtsPollEvents) 0) { - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST; -#if ERTS_POLL_USE_FALLBACK - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_USEFLBCK; -#endif - } - - ps->fds_status[fd].events = new_events; - - if (new_events == ps->fds_status[fd].used_events - && !(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) - ) { - *do_wake = 0; - goto done; - } - - -#if ERTS_POLL_USE_CONCURRENT_UPDATE - if (ERTS_POLLSET_IS_POLLED(ps)) { - int update_fallback = 0; - conc_update_pollset(ps, fd, &update_fallback); - if (!update_fallback) { - *do_wake = 0; /* no need to wake kernel poller */ - goto done; - } + if (op == ERTS_POLL_OP_DEL) { + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST; + ps->fds_status[fd].events = 0; + *do_wake = 1; + } else if (op == ERTS_POLL_OP_ADD) { + ASSERT(ps->fds_status[fd].events == 0); + ps->fds_status[fd].events = events; + *do_wake = 1; + } else { + ASSERT(op == ERTS_POLL_OP_MOD); + ps->fds_status[fd].events = events; + *do_wake = 1; } -#endif + new_events = ps->fds_status[fd].events; enqueue_update_request(ps, fd); - - /* - * If new events have been added, we need to wake up the - * polling thread, but if events have been removed we don't. - */ - if ((new_events && (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) - || (~ps->fds_status[fd].used_events & new_events)) - *do_wake = 1; +#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ done: -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("0x%x = poll_control(ps, %d, 0x%x, %s) do_wake=%d\n", - (int) new_events, fd, (int) events, (on ? "on" : "off"), *do_wake); -#endif + DEBUG_PRINT_FD("%s = %s(%p, %d, %s, %s) do_wake=%d", + ps, fd, ev2str(new_events), __FUNCTION__, ps, + fd, op2str(op), ev2str(events), *do_wake); return new_events; } -void -ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet ps, - ErtsPollControlEntry pcev[], - int len) -{ - int i; - int do_wake; - int final_do_wake = 0; - - ERTS_POLLSET_LOCK(ps); - - for (i = 0; i < len; i++) { - do_wake = 0; - pcev[i].events = poll_control(ps, - pcev[i].fd, - pcev[i].events, - pcev[i].on, - &do_wake); - final_do_wake |= do_wake; - } - - ERTS_POLLSET_UNLOCK(ps); - - if (final_do_wake) - wake_poller(ps, 0, 0); - -} - ErtsPollEvents -ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps, +ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, ErtsSysFdType fd, + ErtsPollOp op, ErtsPollEvents events, - int on, int* do_wake) /* In: Wake up polling thread */ /* Out: Poller is woken */ { @@ -1620,13 +1285,15 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps, ERTS_POLLSET_LOCK(ps); - res = poll_control(ps, fd, events, on, do_wake); + res = poll_control(ps, fd, op, events, do_wake); ERTS_POLLSET_UNLOCK(ps); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE if (*do_wake) { - wake_poller(ps, 0, 0); + wake_poller(ps, 0); } +#endif return res; } @@ -1638,180 +1305,64 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps, #if ERTS_POLL_USE_KERNEL_POLL static ERTS_INLINE int -save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res) +ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { - int res = 0; - int i; - int n = chk_fds_res < max_res ? chk_fds_res : max_res; +#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE + int n = chk_fds_res < max_res ? chk_fds_res : max_res, i; + int res = n; +#if ERTS_POLL_USE_WAKEUP_PIPE int wake_fd = ps->wake_fds[0]; -#if ERTS_POLL_USE_TIMERFD - int timer_fd = ps->timer_fd; #endif for (i = 0; i < n; i++) { - -#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ - - if (ps->res_events[i].events) { - int fd = ps->res_events[i].data.fd; - int ix; - ErtsPollEvents revents; - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - continue; - } -#if ERTS_POLL_USE_TIMERFD - if (fd == timer_fd) { - continue; - } -#endif - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - /* epoll_wait() can repeat the same fd in result array... */ - ix = (int) ps->fds_status[fd].res_ev_ix; - ASSERT(ix >= 0); - if (ix >= res || pr[ix].fd != fd) { - ix = res; - pr[ix].fd = fd; - pr[ix].events = (ErtsPollEvents) 0; - } - - revents = ERTS_POLL_EV_N2E(ps->res_events[i].events); - pr[ix].events |= revents; - if (revents) { - if (res == ix) { - ps->fds_status[fd].res_ev_ix = (unsigned short) ix; - res++; - } - } - } - -#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - - struct kevent *ev; - int fd; - int ix; - - ev = &ps->res_events[i]; - fd = (int) ev->ident; - ASSERT(fd < ps->fds_status_len); - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - ix = (int) ps->fds_status[fd].res_ev_ix; - - ASSERT(ix >= 0); - if (ix >= res || pr[ix].fd != fd) { - ix = res; - pr[ix].fd = (int) ev->ident; - pr[ix].events = (ErtsPollEvents) 0; - } - - if (ev->filter == EVFILT_READ) { - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - continue; - } - pr[ix].events |= ERTS_POLL_EV_IN; - } - else if (ev->filter == EVFILT_WRITE) - pr[ix].events |= ERTS_POLL_EV_OUT; - if (ev->flags & (EV_ERROR|EV_EOF)) { - if ((ev->flags & EV_ERROR) && (((int) ev->data) == EBADF)) - pr[ix].events |= ERTS_POLL_EV_NVAL; - else - pr[ix].events |= ERTS_POLL_EV_ERR; - } - if (pr[ix].events) { - if (res == ix) { - ps->fds_status[fd].res_ev_ix = (unsigned short) ix; - res++; - } - } - -#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ - - if (ps->res_events[i].revents) { - int fd = ps->res_events[i].fd; - ErtsPollEvents revents; - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - continue; - } -#if ERTS_POLL_USE_TIMERFD - if (fd == timer_fd) { - continue; - } -#endif - revents = ERTS_POLL_EV_N2E(ps->res_events[i].events); - pr[res].fd = fd; - pr[res].events = revents; - res++; - } - + int fd = ERTS_POLL_RES_GET_FD(&pr[i]); +#ifdef DEBUG_PRINT_MODE + ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); #endif - } - - return res; -} - -#endif /* ERTS_POLL_USE_KERNEL_POLL */ - -#if ERTS_POLL_USE_FALLBACK - -static int -get_kp_results(ErtsPollSet ps, ErtsPollResFd pr[], int max_res) -{ - int res; + DEBUG_PRINT_FD("trig %s (%s)", ps, fd, + ev2str(evts), #if ERTS_POLL_USE_KQUEUE - struct timespec ts = {0, 0}; + "kqueue" +#elif ERTS_POLL_USE_EPOLL + "epoll" +#else + "/dev/poll" #endif + ); - if (max_res > ps->res_events_len) - grow_res_events(ps, max_res); - - do { -#if ERTS_POLL_USE_EPOLL - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0); -#elif ERTS_POLL_USE_KQUEUE - res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts); +#if ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + if (n == 1) + return 0; + } #endif - } while (res < 0 && errno == EINTR); - - if (res < 0) { - fatal_error("%s:%d: %s() failed: %s (%d)\n", - __FILE__, __LINE__, -#if ERTS_POLL_USE_EPOLL - "epoll_wait", -#elif ERTS_POLL_USE_KQUEUE - "kevent", +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + else { + /* Reset the events to emulate ONESHOT semantics */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); + } #endif - erl_errno_id(errno), errno); } - return save_kp_result(ps, pr, max_res, res); + return res; +#else + ASSERT(chk_fds_res <= max_res); + return chk_fds_res; +#endif } -#endif /* ERTS_POLL_USE_FALLBACK */ - - +#else /* !ERTS_POLL_USE_KERNEL_POLL */ static ERTS_INLINE int -save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, - int chk_fds_res, int ebadf) +ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { -#if ERTS_POLL_USE_DEVPOLL - return save_kp_result(ps, pr, max_res, chk_fds_res); -#elif ERTS_POLL_USE_FALLBACK - if (!ps->fallback_used) - return save_kp_result(ps, pr, max_res, chk_fds_res); - else -#endif /* ERTS_POLL_USE_FALLBACK */ - { - #if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ int res = 0; -#if !ERTS_POLL_USE_FALLBACK int wake_fd = ps->wake_fds[0]; -#endif int i, first_ix, end_ix; /* @@ -1828,23 +1379,30 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, if (ps->poll_fds[i].revents != (short) 0) { int fd = ps->poll_fds[i].fd; ErtsPollEvents revents; -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, &pr[res], max_res-res); - i++; - continue; - } -#else if (fd == wake_fd) { cleanup_wakeup_pipe(ps); i++; continue; } -#endif revents = pollev2ev(ps->poll_fds[i].revents); - pr[res].fd = fd; - pr[res].events = revents; + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], revents); + + /* If an fd returns as error, we may want to check the + update_requests queue to see if it has been reset + before delivering the result?!?! This should allow + the user to do driver_dselect + close without waiting + for stop_select... */ + + DEBUG_PRINT_FD("trig %s (poll)", ps, ERTS_POLL_RES_GET_FD(&pr[res]), + ev2str(ERTS_POLL_RES_GET_EVTS(&pr[res]))); + res++; + + /* Clear the events for this fd in order to mimic + how epoll ONESHOT works */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); } i++; } @@ -1860,9 +1418,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ int res = 0; -#if !ERTS_POLL_USE_FALLBACK int wake_fd = ps->wake_fds[0]; -#endif int fd, first_fd, end_fd; /* @@ -1875,29 +1431,23 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, if (!ebadf) { while (1) { while (fd < end_fd && res < max_res) { - - pr[res].events = (ErtsPollEvents) 0; + ErtsPollEvents events = 0; if (ERTS_FD_ISSET(fd, &ps->res_input_fds)) { -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, &pr[res], max_res-res); - fd++; - continue; - } -#else if (fd == wake_fd) { cleanup_wakeup_pipe(ps); fd++; continue; } -#endif - pr[res].events |= ERTS_POLL_EV_IN; + events |= ERTS_POLL_EV_IN; } if (ERTS_FD_ISSET(fd, &ps->res_output_fds)) - pr[res].events |= ERTS_POLL_EV_OUT; - if (pr[res].events) { - pr[res].fd = fd; + events |= ERTS_POLL_EV_OUT; + if (events) { + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], events); res++; + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); } fd++; } @@ -1930,7 +1480,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, if (ps->fds_status[fd].events & ERTS_POLL_EV_OUT) { oset = &ps->res_output_fds; ERTS_FD_ZERO(oset); - ERTS_FD_SET(fd, oset); + ERTS_FD_SET(fd, oset); } do { /* Initiate 'tv' each time; @@ -1939,49 +1489,31 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, sres = ERTS_SELECT(ps->max_fd+1, iset, oset, NULL, &tv); } while (sres < 0 && errno == EINTR); if (sres < 0) { -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, - &pr[res], - max_res-res); - fd++; - continue; - } -#else if (fd == wake_fd) { cleanup_wakeup_pipe(ps); fd++; continue; } -#endif - pr[res].fd = fd; - pr[res].events = ERTS_POLL_EV_NVAL; + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NVAL); res++; } else if (sres > 0) { - pr[res].fd = fd; + ErtsPollEvents events = 0; + ERTS_POLL_RES_SET_FD(&pr[res], fd); if (iset && ERTS_FD_ISSET(fd, iset)) { -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, - &pr[res], - max_res-res); - fd++; - continue; - } -#else if (fd == wake_fd) { cleanup_wakeup_pipe(ps); fd++; continue; } -#endif - pr[res].events |= ERTS_POLL_EV_IN; + events |= ERTS_POLL_EV_IN; } if (oset && ERTS_FD_ISSET(fd, oset)) { - pr[res].events |= ERTS_POLL_EV_OUT; + events |= ERTS_POLL_EV_OUT; } - ASSERT(pr[res].events); + ASSERT(events); + ERTS_POLL_RES_SET_EVTS(&pr[res], events); res++; } } @@ -1996,353 +1528,145 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, } ps->next_sel_fd = fd; return res; -#endif - } +#endif /* ERTS_POLL_USE_SELECT */ } -static ERTS_INLINE ErtsMonotonicTime -get_timeout(ErtsPollSet ps, - int resolution, - ErtsMonotonicTime timeout_time) -{ - ErtsMonotonicTime timeout, save_timeout_time; - - if (timeout_time == ERTS_POLL_NO_TIMEOUT) { - save_timeout_time = ERTS_MONOTONIC_TIME_MIN; - timeout = 0; - } - else { - ErtsMonotonicTime diff_time, current_time; - current_time = erts_get_monotonic_time(NULL); - diff_time = timeout_time - current_time; - if (diff_time <= 0) { - save_timeout_time = ERTS_MONOTONIC_TIME_MIN; - timeout = 0; - } - else { - save_timeout_time = current_time; - switch (resolution) { - case 1000: - /* Round up to nearest even milli second */ - timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1; - if (timeout > (ErtsMonotonicTime) INT_MAX) - timeout = (ErtsMonotonicTime) INT_MAX; - save_timeout_time += ERTS_MSEC_TO_MONOTONIC(timeout); - timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000); - break; - case 1000000: - /* Round up to nearest even micro second */ - timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1; - save_timeout_time += ERTS_USEC_TO_MONOTONIC(timeout); - timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000); - break; - case 1000000000: - /* Round up to nearest even nano second */ - timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1; - save_timeout_time += ERTS_NSEC_TO_MONOTONIC(timeout); - timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000); - break; - default: - ERTS_INTERNAL_ERROR("Invalid resolution"); - timeout = 0; - save_timeout_time = 0; - break; - } - } - } - set_timeout_time(ps, save_timeout_time); - return timeout; -} - -#if ERTS_POLL_USE_SELECT - -static ERTS_INLINE int -get_timeout_timeval(ErtsPollSet ps, - SysTimeval *tvp, - ErtsMonotonicTime timeout_time) -{ - ErtsMonotonicTime timeout = get_timeout(ps, - 1000*1000, - timeout_time); - - if (!timeout) { - tvp->tv_sec = 0; - tvp->tv_usec = 0; - - return 0; - } - else { - ErtsMonotonicTime sec = timeout/(1000*1000); - tvp->tv_sec = sec; - tvp->tv_usec = timeout - sec*(1000*1000); - - ASSERT(tvp->tv_sec >= 0); - ASSERT(tvp->tv_usec >= 0); - ASSERT(tvp->tv_usec < 1000*1000); - - return !0; - } - -} - -#endif - -#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD - -static ERTS_INLINE int -get_timeout_timespec(ErtsPollSet ps, - struct timespec *tsp, - ErtsMonotonicTime timeout_time) -{ - ErtsMonotonicTime timeout = get_timeout(ps, - 1000*1000*1000, - timeout_time); - - if (!timeout) { - tsp->tv_sec = 0; - tsp->tv_nsec = 0; - return 0; - } - else { - ErtsMonotonicTime sec = timeout/(1000*1000*1000); - tsp->tv_sec = sec; - tsp->tv_nsec = timeout - sec*(1000*1000*1000); - - ASSERT(tsp->tv_sec >= 0); - ASSERT(tsp->tv_nsec >= 0); - ASSERT(tsp->tv_nsec < 1000*1000*1000); - - return !0; - } -} - -#endif - -#if ERTS_POLL_USE_TIMERFD - -static ERTS_INLINE int -get_timeout_itimerspec(ErtsPollSet ps, - struct itimerspec *itsp, - ErtsMonotonicTime timeout_time) -{ - - itsp->it_interval.tv_sec = 0; - itsp->it_interval.tv_nsec = 0; - - return get_timeout_timespec(ps, &itsp->it_value, timeout_time); -} - -#endif +#endif /* !ERTS_POLL_USE_KERNEL_POLL */ static ERTS_INLINE int -check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res) +check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) { int res; - ERTS_MSACC_PUSH_STATE_M(); - if (erts_atomic_read_nob(&ps->no_of_user_fds) == 0 - && timeout_time == ERTS_POLL_NO_TIMEOUT) { - /* Nothing to poll and zero timeout; done... */ - return 0; - } - else { - int timeout; -#if ERTS_POLL_USE_FALLBACK - if (!(ps->fallback_used = ERTS_POLL_NEED_FALLBACK(ps))) { - -#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ - if (max_res > ps->res_events_len) - grow_res_events(ps, max_res); -#if ERTS_POLL_USE_TIMERFD - { - struct itimerspec its; - timeout = get_timeout_itimerspec(ps, &its, timeout_time); - if (timeout) { - erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - timerfd_set(ps, &its); - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, -1); - res = timerfd_clear(ps, res, max_res); - } else { - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0); - } - } -#else /* !ERTS_POLL_USE_TIMERFD */ - timeout = (int) get_timeout(ps, 1000, timeout_time); - if (timeout) { - erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, timeout); -#endif /* !ERTS_POLL_USE_TIMERFD */ -#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - struct timespec ts; - if (max_res > ps->res_events_len) - grow_res_events(ps, max_res); - timeout = get_timeout_timespec(ps, &ts, timeout_time); - if (timeout) { - erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts); -#endif /* ----------------------------------------- */ - } - else /* use fallback (i.e. poll() or select()) */ -#endif /* ERTS_POLL_USE_FALLBACK */ - { + int timeout = do_wait ? -1 : 0; + DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait); + { +#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ + res = epoll_wait(ps->kp_fd, pr, max_res, timeout); + +#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ + struct timespec ts = {0, 0}; + struct timespec *tsp = timeout ? NULL : &ts; + res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp); +#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ + /* + * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds + * is set too high. dp_nfds should not be set greater than + * the maximum number of file descriptors in the poll set. + */ + struct dvpoll poll_res; + int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */; + poll_res.dp_nfds = nfds < max_res ? nfds : max_res; + poll_res.dp_fds = pr; + poll_res.dp_timeout = timeout; + res = ioctl(ps->kp_fd, DP_POLL, &poll_res); -#if ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ - /* - * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds - * is set too high. dp_nfds should not be set greater than - * the maximum number of file descriptors in the poll set. - */ - struct dvpoll poll_res; - int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds); - nfds++; /* Wakeup pipe */ - timeout = (int) get_timeout(ps, 1000, timeout_time); - poll_res.dp_nfds = nfds < max_res ? nfds : max_res; - if (poll_res.dp_nfds > ps->res_events_len) - grow_res_events(ps, poll_res.dp_nfds); - poll_res.dp_fds = ps->res_events; - if (timeout) { - erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - poll_res.dp_timeout = timeout; - res = ioctl(ps->kp_fd, DP_POLL, &poll_res); -#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */ - struct timespec ts; - timeout = get_timeout_timespec(ps, &ts, timeout_time); - if (timeout) { - erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = ppoll(ps->poll_fds, ps->no_poll_fds, &ts, NULL); #elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */ - timeout = (int) get_timeout(ps, 1000, timeout_time); - if (timeout) { - erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = poll(ps->poll_fds, ps->no_poll_fds, timeout); + res = poll(ps->poll_fds, ps->no_poll_fds, timeout); + #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - SysTimeval to; - timeout = get_timeout_timeval(ps, &to, timeout_time); + SysTimeval tv = {0, 0}; + SysTimeval *tvp = timeout ? NULL : &tv; - ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); - ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); + ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); + ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); - if (timeout) { - erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = ERTS_SELECT(ps->max_fd + 1, - &ps->res_input_fds, - &ps->res_output_fds, - NULL, - &to); - if (timeout) { - erts_thr_progress_finalize_wait(NULL); - ERTS_MSACC_POP_STATE_M(); - } - if (res < 0 - && errno == EBADF - && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { - /* - * This may have happened because another thread deselected - * a fd in our poll set and then closed it, i.e. the driver - * behaved correctly. We wan't to avoid looking for a bad - * fd, that may even not exist anymore. Therefore, handle - * update requests and try again. - * - * We don't know how much of the timeout is left; therfore, - * we use a zero timeout. If no error occur and no events - * have triggered, we fake an EAGAIN error and let the caller - * restart us. - */ - to.tv_sec = 0; - to.tv_usec = 0; - ERTS_POLLSET_LOCK(ps); - handle_update_requests(ps); - ERTS_POLLSET_UNLOCK(ps); - res = ERTS_SELECT(ps->max_fd + 1, - &ps->res_input_fds, - &ps->res_output_fds, - NULL, - &to); - if (res == 0) { - errno = EAGAIN; - res = -1; - } - } - return res; + res = ERTS_SELECT(ps->max_fd + 1, + &ps->res_input_fds, + &ps->res_output_fds, + NULL, + tvp); #endif /* ----------------------------------------- */ - } - if (timeout) { - erts_thr_progress_finalize_wait(NULL); - ERTS_MSACC_POP_STATE_M(); - } - return res; } + DEBUG_PRINT_WAIT("Leaving check_fd_events(), res=%d", ps, res); + return res; } int -ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, +ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len, - ErtsMonotonicTime timeout_time) + int *len) { - ErtsMonotonicTime to; - int res, no_fds; + int res, no_fds, used_fds = 0; int ebadf = 0; + int do_wait; int ps_locked = 0; + ERTS_MSACC_DECLARE_CACHE(); no_fds = *len; -#ifdef ERTS_POLL_MAX_RES - if (no_fds >= ERTS_POLL_MAX_RES) - no_fds = ERTS_POLL_MAX_RES; -#endif - *len = 0; + ASSERT(no_fds > 0); -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Entering erts_poll_wait(), timeout_time=%bps\n", - timeout_time); -#endif +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + ERTS_POLLSET_LOCK(ps); + used_fds = handle_update_requests(ps, pr, no_fds); + ERTS_POLLSET_UNLOCK(ps); - if (ERTS_POLLSET_SET_POLLED_CHK(ps)) { - res = EINVAL; /* Another thread is in erts_poll_wait() - on this pollset... */ - goto done; + if (used_fds == no_fds) { + *len = used_fds; + return 0; + } } +#endif - to = (is_woken(ps) - ? ERTS_POLL_NO_TIMEOUT /* Use zero timeout */ - : timeout_time); + do_wait = !is_woken(ps) && used_fds == 0; - if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { - ERTS_POLLSET_LOCK(ps); - handle_update_requests(ps); - ERTS_POLLSET_UNLOCK(ps); + DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait); + + if (do_wait) { + erts_thr_progress_prepare_wait(NULL); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); } while (1) { - res = check_fd_events(ps, to, no_fds); + res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds); + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + if (res < 0 + && errno == EBADF + && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + /* + * This may have happened because another thread deselected + * a fd in our poll set and then closed it, i.e. the driver + * behaved correctly. We wan't to avoid looking for a bad + * fd, that may even not exist anymore. Therefore, handle + * update requests and try again. This behaviour should only + * happen when using SELECT as the polling mechanism. + */ + ERTS_POLLSET_LOCK(ps); + used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); + if (used_fds == no_fds) { + *len = used_fds; + ERTS_POLLSET_UNLOCK(ps); + return 0; + } + res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds); + /* Keep the lock over the non-blocking poll in order to not + get any nasty races happening. */ + ERTS_POLLSET_UNLOCK(ps); + if (res == 0) { + errno = EAGAIN; + res = -1; + } + } +#endif + if (res != 0) break; - if (to == ERTS_POLL_NO_TIMEOUT) - break; - if (erts_get_monotonic_time(NULL) >= timeout_time) - break; + if (!do_wait) + break; + } + + if (do_wait) { + erts_thr_progress_finalize_wait(NULL); + ERTS_MSACC_UPDATE_CACHE(); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO); } woke_up(ps); - if (res == 0) { - res = ETIMEDOUT; - } - else if (res < 0) { + if (res < 0) { #if ERTS_POLL_USE_SELECT if (errno == EBADF) { ebadf = 1; @@ -2359,26 +1683,21 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, ps_locked = 1; ERTS_POLLSET_LOCK(ps); - no_fds = save_poll_result(ps, pr, no_fds, res, ebadf); + used_fds += ERTS_POLL_EXPORT(save_result)(ps, pr + used_fds, no_fds - used_fds, res, ebadf); #ifdef HARD_DEBUG - check_poll_result(pr, no_fds); + check_poll_result(pr, used_fds); #endif - res = (no_fds == 0 ? (is_interrupted_reset(ps) ? EINTR : EAGAIN) : 0); - *len = no_fds; + res = (used_fds == 0 ? (is_interrupted_reset(ps) ? EINTR : EAGAIN) : 0); + *len = used_fds; } if (ps_locked) ERTS_POLLSET_UNLOCK(ps); - ERTS_POLLSET_UNSET_POLLED(ps); - done: - set_timeout_time(ps, ERTS_MONOTONIC_TIME_MAX); -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Leaving %s = erts_poll_wait()\n", - res == 0 ? "0" : erl_errno_id(res)); -#endif + DEBUG_PRINT_WAIT("Leaving %s = %s(len = %d)", ps, + res == 0 ? "0" : erl_errno_id(res), __FUNCTION__, *len); return res; } @@ -2388,40 +1707,14 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, */ void -ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet ps, int set) +ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set) { +#if !ERTS_POLL_USE_CONCURRENT_UPDATE if (!set) reset_wakeup_state(ps); else - wake_poller(ps, 1, 0); -} - - -/* - * erts_poll_interrupt_timed(): - * If 'set' != 0, interrupt thread blocked in erts_poll_wait() if it - * is not guaranteed that it will timeout before 'msec' milli seconds. - */ -void -ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet ps, - int set, - ErtsMonotonicTime timeout_time) -{ - if (!set) - reset_wakeup_state(ps); - else { - ErtsMonotonicTime max_wait_time = get_timeout_time(ps); - if (max_wait_time > timeout_time) - wake_poller(ps, 1, 0); -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - else { - if (ERTS_POLLSET_IS_POLLED(ps)) - erts_atomic_inc_nob(&ps->no_avoided_wakeups); - erts_atomic_inc_nob(&ps->no_avoided_interrupts); - } - erts_atomic_inc_nob(&ps->no_interrupt_timed); + wake_poller(ps, 1); #endif - } } int @@ -2434,14 +1727,19 @@ ERTS_POLL_EXPORT(erts_poll_max_fds)(void) */ void -ERTS_POLL_EXPORT(erts_poll_init)(void) +ERTS_POLL_EXPORT(erts_poll_init)(int *concurrent_updates) { - erts_mtx_init(&pollsets_lock, "pollsets_lock", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - pollsets = NULL; errno = 0; + if (concurrent_updates) { +#if ERTS_POLL_USE_CONCURRENT_UPDATE + *concurrent_updates = 1; +#else + *concurrent_updates = 0; +#endif + } + #if !defined(NO_SYSCONF) max_fds = sysconf(_SC_OPEN_MAX); #elif ERTS_POLL_USE_SELECT @@ -2460,37 +1758,28 @@ ERTS_POLL_EXPORT(erts_poll_init)(void) fatal_error("erts_poll_init(): Failed to get max number of files: %s\n", erl_errno_id(errno)); -#ifdef ERTS_POLL_DEBUG_PRINT print_misc_debug_info(); -#endif } -ErtsPollSet -ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) +ErtsPollSet * +ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id) { #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; #endif - ErtsPollSet ps = erts_alloc(ERTS_ALC_T_POLLSET, - sizeof(struct ErtsPollSet_)); + ErtsPollSet *ps = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(struct ERTS_POLL_EXPORT(erts_pollset))); + ps->id = id; ps->internal_fd_limit = 0; - ps->fds_status = NULL; - ps->fds_status_len = 0; erts_atomic_init_nob(&ps->no_of_user_fds, 0); #if ERTS_POLL_USE_KERNEL_POLL ps->kp_fd = -1; #if ERTS_POLL_USE_EPOLL kp_fd = epoll_create(256); - ps->res_events_len = 0; - ps->res_events = NULL; #elif ERTS_POLL_USE_DEVPOLL kp_fd = open("/dev/poll", O_RDWR); - ps->res_events_len = 0; - ps->res_events = NULL; #elif ERTS_POLL_USE_KQUEUE kp_fd = kqueue(); - ps->res_events_len = 0; - ps->res_events = NULL; #endif if (kp_fd < 0) fatal_error("erts_poll_create_pollset(): Failed to " @@ -2504,10 +1793,6 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) ": %s (%d)\n", erl_errno_id(errno), errno); #endif /* ERTS_POLL_USE_KERNEL_POLL */ -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - /* res_events is also used as write buffer */ - grow_res_events(ps, ERTS_POLL_MIN_BATCH_BUF_SIZE); -#endif #if ERTS_POLL_USE_POLL ps->next_poll_fds_ix = 0; ps->no_poll_fds = 0; @@ -2516,9 +1801,6 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) #elif ERTS_POLL_USE_SELECT ps->next_sel_fd = 0; ps->max_fd = -1; -#if ERTS_POLL_USE_FALLBACK - ps->no_select_fds = 0; -#endif #ifdef _DARWIN_UNLIMITED_SELECT ps->input_fds.sz = 0; ps->input_fds.ptr = NULL; @@ -2535,140 +1817,85 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) ERTS_FD_ZERO(&ps->res_output_fds); #endif #endif +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + ps->fds_status = NULL; + ps->fds_status_len = 0; ps->update_requests.next = NULL; ps->update_requests.len = 0; ps->curr_upd_req_block = &ps->update_requests; erts_atomic32_init_nob(&ps->have_update_requests, 0); - erts_atomic32_init_nob(&ps->polled, 0); erts_mtx_init(&ps->mtx, "pollset", NIL, ERTS_LOCK_FLAGS_CATEGORY_IO); - erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); - create_wakeup_pipe(ps); -#if ERTS_POLL_USE_TIMERFD - create_timerfd(ps); -#endif -#if ERTS_POLL_USE_FALLBACK - if (kp_fd >= ps->fds_status_len) - grow_fds_status(ps, kp_fd); - /* Force kernel poll fd into fallback (poll/select) set */ - ps->fds_status[kp_fd].flags - |= ERTS_POLL_FD_FLG_INFLBCK|ERTS_POLL_FD_FLG_USEFLBCK; - { - int do_wake = 0; - ERTS_POLL_EXPORT(erts_poll_control)(ps, kp_fd, ERTS_POLL_EV_IN, 1, - &do_wake); - } #endif #if ERTS_POLL_USE_KERNEL_POLL if (ps->internal_fd_limit <= kp_fd) ps->internal_fd_limit = kp_fd + 1; ps->kp_fd = kp_fd; #endif - init_timeout_time(ps); -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - erts_atomic_init_nob(&ps->no_avoided_wakeups, 0); - erts_atomic_init_nob(&ps->no_avoided_interrupts, 0); - erts_atomic_init_nob(&ps->no_interrupt_timed, 0); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); + create_wakeup_pipe(ps); + handle_update_requests(ps, NULL, 0); + cleanup_wakeup_pipe(ps); #endif - handle_update_requests(ps); -#if ERTS_POLL_USE_FALLBACK - ps->fallback_used = 0; +#if ERTS_POLL_USE_KERNEL_POLL && (defined(__DARWIN__) || defined(__APPLE__) && defined(__MACH__)) + { + /* + * Using kqueue on OS X is a mess of brokenness... + * + * On OS X version older than 15.6 (i.e. OS X El Capitan released in July 2015), + * a thread waiting in kevent is not woken if an event is inserted into the kqueue + * by another thread and the event becomes ready. However if a new call to kevent + * is done by the waiting thread, the new event is found. + * + * So on effected OS X versions we could trigger the wakeup pipe so that + * the waiters will be woken and re-issue the kevent. However... + * + * On OS X version older then 16 (i.e. OS X Sierra released in September 2016), + * running the emulator driver_SUITE smp_select testcase consistently causes a + * kernel panic. I don't know why or what events that trigger it. But it seems + * like updates of the pollset while another thread is sleeping in it Creates + * some kind of race that triggers the kernel panic. + * + * So to deal with this, the erts configure check what OS X version is run + * and only enabled kernel poll on OS X 16 or newer. In addition, if someone + * attempts to compile Erlang on OS X 16 and then run it on OS X 15, we do the + * run-time check below to disallow this. + */ + int major, minor, build; + os_version(&major,&minor,&build); + if (major < 16) { + erts_fprintf(stderr,"BROKEN KQUEUE!\n" + "Erlang has been compiled with kernel-poll support,\n" + "but this OS X version is known to have kernel bugs\n" + "when using kernel-poll. You have two options:\n" + " 1) update to a newer OS X version (OS X Sierra or newer)\n" + " 2) recompile erlang without kernel-poll support\n"); + erts_exit(1, ""); + } + } #endif erts_atomic_set_nob(&ps->no_of_user_fds, 0); /* Don't count wakeup pipe and fallback fd */ - erts_mtx_lock(&pollsets_lock); - ps->next = pollsets; - pollsets = ps; - erts_mtx_unlock(&pollsets_lock); - return ps; } -void -ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps) -{ - - if (ps->fds_status) - erts_free(ERTS_ALC_T_FD_STATUS, (void *) ps->fds_status); - -#if ERTS_POLL_USE_EPOLL - if (ps->kp_fd >= 0) - close(ps->kp_fd); - if (ps->res_events) - erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events); -#elif ERTS_POLL_USE_DEVPOLL - if (ps->kp_fd >= 0) - close(ps->kp_fd); - if (ps->res_events) - erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events); -#elif ERTS_POLL_USE_POLL - if (ps->poll_fds) - erts_free(ERTS_ALC_T_POLL_FDS, (void *) ps->poll_fds); -#elif ERTS_POLL_USE_SELECT -#ifdef _DARWIN_UNLIMITED_SELECT - if (ps->input_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->input_fds.ptr); - if (ps->res_input_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_input_fds.ptr); - if (ps->output_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->output_fds.ptr); - if (ps->res_output_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_output_fds.ptr); -#endif -#endif - { - ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next; - while (urqbp) { - ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp; - urqbp = urqbp->next; - free_update_requests_block(ps, free_urqbp); - } - } - erts_mtx_destroy(&ps->mtx); - if (ps->wake_fds[0] >= 0) - close(ps->wake_fds[0]); - if (ps->wake_fds[1] >= 0) - close(ps->wake_fds[1]); -#if ERTS_POLL_USE_TIMERFD - if (ps->timer_fd >= 0) - close(ps->timer_fd); -#endif - - erts_mtx_lock(&pollsets_lock); - if (ps == pollsets) - pollsets = pollsets->next; - else { - ErtsPollSet prev_ps; - for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next) - ; - ASSERT(ps == prev_ps->next); - prev_ps->next = ps->next; - } - erts_mtx_unlock(&pollsets_lock); - - erts_free(ERTS_ALC_T_POLLSET, (void *) ps); -} - /* * --- Info ------------------------------------------------------------------ */ void -ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) +ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip) { +#if !ERTS_POLL_USE_CONCURRENT_UPDATE int pending_updates; +#endif Uint size = 0; ERTS_POLLSET_LOCK(ps); - size += sizeof(struct ErtsPollSet_); + size += sizeof(struct ERTS_POLL_EXPORT(erts_pollset)); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE size += ps->fds_status_len*sizeof(ErtsFdStatus); - -#if ERTS_POLL_USE_EPOLL - size += ps->res_events_len*sizeof(struct epoll_event); -#elif ERTS_POLL_USE_DEVPOLL - size += ps->res_events_len*sizeof(struct pollfd); -#elif ERTS_POLL_USE_KQUEUE - size += ps->res_events_len*sizeof(struct kevent); #endif #if ERTS_POLL_USE_POLL @@ -2680,6 +1907,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #endif #endif +#if !ERTS_POLL_USE_CONCURRENT_UPDATE { ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next; pending_updates = ps->update_requests.len; @@ -2689,8 +1917,9 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) urqbp = urqbp->next; } } +#endif - pip->primary = + pip->primary = #if ERTS_POLL_USE_KQUEUE "kqueue" #elif ERTS_POLL_USE_EPOLL @@ -2704,17 +1933,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #endif ; - pip->fallback = -#if !ERTS_POLL_USE_FALLBACK - NULL -#elif ERTS_POLL_USE_POLL - "poll" -#elif ERTS_POLL_USE_SELECT - "select" -#endif - ; - - pip->kernel_poll = + pip->kernel_poll = #if !ERTS_POLL_USE_KERNEL_POLL NULL #elif ERTS_POLL_USE_KQUEUE @@ -2729,40 +1948,21 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) pip->memory_size = size; pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE pip->poll_set_size++; /* Wakeup pipe */ -#if ERTS_POLL_USE_TIMERFD - pip->poll_set_size++; /* timerfd */ -#endif - - pip->fallback_poll_set_size = -#if !ERTS_POLL_USE_FALLBACK - 0 -#elif ERTS_POLL_USE_POLL - ps->no_poll_fds -#elif ERTS_POLL_USE_SELECT - ps->no_select_fds -#endif - ; - -#if ERTS_POLL_USE_FALLBACK - /* If only kp_fd is in fallback poll set we don't use fallback... */ - if (pip->fallback_poll_set_size == 1) - pip->fallback_poll_set_size = 0; - else - pip->poll_set_size++; /* kp_fd */ #endif pip->lazy_updates = +#if !ERTS_POLL_USE_CONCURRENT_UPDATE 1 +#else + 0 +#endif ; pip->pending_updates = +#if !ERTS_POLL_USE_CONCURRENT_UPDATE pending_updates - ; - - pip->batch_updates = -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - 1 #else 0 #endif @@ -2776,13 +1976,23 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #endif ; - pip->max_fds = max_fds; + pip->is_fallback = +#if ERTS_POLL_IS_FALLBACK + 1 +#else + 0 +#endif + ; -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - pip->no_avoided_wakeups = erts_atomic_read_nob(&ps->no_avoided_wakeups); - pip->no_avoided_interrupts = erts_atomic_read_nob(&ps->no_avoided_interrupts); - pip->no_interrupt_timed = erts_atomic_read_nob(&ps->no_interrupt_timed); + pip->batch_updates = +#if ERTS_POLL_USE_DEVPOLL + 1 +#else + 0 #endif + ; + + pip->max_fds = max_fds; ERTS_POLLSET_UNLOCK(ps); @@ -2818,35 +2028,60 @@ fatal_error(char *format, ...) abort(); } -static void -fatal_error_async_signal_safe(char *error_str) +/* + * --- Debug ----------------------------------------------------------------- + */ + +#if ERTS_POLL_USE_EPOLL +uint32_t epoll_events(int kp_fd, int fd) { - if (ERTS_SOMEONE_IS_CRASH_DUMPING || ERTS_GOT_SIGUSR1) { - /* See comment above in fatal_error() */ - return; + /* For epoll we read the information about what is selected upon from the proc fs.*/ + char fname[30]; + FILE *f; + unsigned int pos, flags, mnt_id; + int line = 0; + sprintf(fname,"/proc/%d/fdinfo/%d",getpid(), kp_fd); + f = fopen(fname,"r"); + if (!f) { + fprintf(stderr,"failed to open file %s, errno = %d\n", fname, errno); + ASSERT(0); + return 0; } - if (error_str) { - int len = 0; - while (error_str[len]) - len++; - if (len) { - /* async signal safe */ - erts_silence_warn_unused_result(write(2, error_str, len)); - } + if (fscanf(f,"pos:\t%x\nflags:\t%x", &pos, &flags) != 2) { + fprintf(stderr,"failed to parse file %s, errno = %d\n", fname, errno); + ASSERT(0); + return 0; } - abort(); + if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id)); + line += 3; + while (!feof(f)) { + /* tfd: 10 events: 40000019 data: 180000000a */ + int ev_fd; + uint32_t events; + uint64_t data; + if (fscanf(f,"tfd:%d events:%x data:%lx\n", &ev_fd, &events, &data) != 3) { + fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n", fname, + line, + errno); + return 0; + } + if (fd == ev_fd) { + fclose(f); + return events; + } + } + fclose(f); + return 0; } - -/* - * --- Debug ----------------------------------------------------------------- - */ +#endif void -ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, +ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, ErtsPollEvents ev[], int len) { int fd; +#if !ERTS_POLL_USE_CONCURRENT_UPDATE ERTS_POLLSET_LOCK(ps); for (fd = 0; fd < len; fd++) { if (fd >= ps->fds_status_len) @@ -2855,9 +2090,6 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, ev[fd] = ps->fds_status[fd].events; if ( fd == ps->wake_fds[0] || fd == ps->wake_fds[1] || -#if ERTS_POLL_USE_TIMERFD - fd == ps->timer_fd || -#endif #if ERTS_POLL_USE_KERNEL_POLL fd == ps->kp_fd || #endif @@ -2866,7 +2098,50 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, } } ERTS_POLLSET_UNLOCK(ps); +#elif ERTS_POLL_USE_EPOLL + /* For epoll we read the information about what is selected upon from the proc fs.*/ + char fname[30]; + FILE *f; + unsigned int pos, flags, mnt_id; + int line = 0; + sprintf(fname,"/proc/%d/fdinfo/%d",getpid(), ps->kp_fd); + for (fd = 0; fd < len; fd++) + ev[fd] = ERTS_POLL_EV_NONE; + f = fopen(fname,"r"); + if (!f) { + fprintf(stderr,"failed to open file %s, errno = %d\n", fname, errno); + return; + } + if (fscanf(f,"pos:\t%x\nflags:\t%x", &pos, &flags) != 2) { + fprintf(stderr,"failed to parse file %s, errno = %d\n", fname, errno); + ASSERT(0); + return; + } + if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id)); + line += 3; + while (!feof(f)) { + /* tfd: 10 events: 40000019 data: 180000000a */ + int fd; + uint32_t events; + uint64_t data; + if (fscanf(f,"tfd:%d events:%x data:%lx\n", &fd, &events, &data) != 3) { + fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n", + fname, line, errno); + ASSERT(0); + return; + } + data &= 0xFFFFFFFF; + ASSERT(fd == data); + /* Events are the events that are being monitored, which of course include + error and hup events, but we are only interested in IN/OUT events */ + ev[fd] = (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT) & ERTS_POLL_EV_N2E(events); + line++; + } +#else + for (fd = 0; fd < len; fd++) + ev[fd] = ERTS_POLL_EV_NONE; +#endif } #ifdef HARD_DEBUG @@ -2886,10 +2161,10 @@ check_poll_result(ErtsPollResFd pr[], int len) } -#if ERTS_POLL_USE_DEVPOLL +#if ERTS_POLL_USE_DEVPOLL && defined(DEBUG) static void -check_poll_status(ErtsPollSet ps) +check_poll_status(ErtsPollSet *ps) { int i; for (i = 0; i < ps->fds_status_len; i++) { @@ -2921,30 +2196,24 @@ check_poll_status(ErtsPollSet ps) #endif /* ERTS_POLL_USE_DEVPOLL */ #endif /* HARD_DEBUG */ -#ifdef ERTS_POLL_DEBUG_PRINT static void print_misc_debug_info(void) { - erts_printf("erts_poll using: %s lazy_updates:%s batch_updates:%s\n", +#if ERTS_POLL_DEBUG_PRINT + erts_printf("erts_poll using: %s lazy_updates:%s\n", #if ERTS_POLL_USE_KQUEUE "kqueue" #elif ERTS_POLL_USE_EPOLL "epoll" #elif ERTS_POLL_USE_DEVPOLL "/dev/poll" -#endif -#if ERTS_POLL_USE_FALLBACK - "-" -#endif -#if ERTS_POLL_USE_POLL +#elif ERTS_POLL_USE_POLL "poll" #elif ERTS_POLL_USE_SELECT "select" #endif , - "true" - , -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET +#if !ERTS_POLL_USE_CONCURRENT_UPDATE "true" #else "false" @@ -2963,29 +2232,20 @@ print_misc_debug_info(void) #ifdef FD_SETSIZE erts_printf("FD_SETSIZE=%d\n", FD_SETSIZE); #endif -} - #endif +} #ifdef ERTS_ENABLE_LOCK_COUNT -static void erts_lcnt_enable_pollset_lock_count(ErtsPollSet pollset, int enable) { +void ERTS_POLL_EXPORT(erts_lcnt_enable_pollset_lock_count)(ErtsPollSet *pollset, int enable) +{ +#if !ERTS_POLL_USE_CONCURRENT_UPDATE if(enable) { erts_lcnt_install_new_lock_info(&pollset->mtx.lcnt, "pollset_rm", NIL, ERTS_LOCK_TYPE_MUTEX | ERTS_LOCK_FLAGS_CATEGORY_IO); } else { erts_lcnt_uninstall(&pollset->mtx.lcnt); } -} - -void ERTS_POLL_EXPORT(erts_lcnt_update_pollset_locks)(int enable) { - ErtsPollSet iterator; - - erts_mtx_lock(&pollsets_lock); - - for(iterator = pollsets; iterator != NULL; iterator = iterator->next) { - erts_lcnt_enable_pollset_lock_count(iterator, enable); - } - - erts_mtx_unlock(&pollsets_lock); +#endif + return; } #endif diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index a57dc51e5b..e9a667cac1 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -18,11 +18,31 @@ * %CopyrightEnd% */ -/* - * Description: Poll interface suitable for ERTS with or without - * SMP support. +/** + * @description: Poll interface suitable for ERTS with SMP support. + * + * @author: Rickard Green + * @author: Lukas Larsson + * + * This header file exports macros and functions that are used to + * react to I/O polling events from file descriptors or wait-able + * objects. The API exported is the following: * - * Author: Rickard Green + * defines: + * ERTS_POLL_EV_NONE - No events have been set. This is not the same as 0. + * ERTS_POLL_EV_IN - Represent an IN event + * ERTS_POLL_EV_OUT - Represent an OUT event + * ERTS_POLL_EV_ERR - Represent an error event + * ERTS_POLL_EV_NVAL - Represent an invalid event + * + * macro functions: + * ErtsSysFdType ERTS_POLL_RES_GET_FD(ErtsPollResFd *evt); + * void ERTS_POLL_RES_SET_FD(ErtsPollResFd *evt, ErtsSysFdType fd); + * ErtsPollEvents ERTS_POLL_RES_GET_EVTS(ErtsPollResFd *evt) + * void ERTS_POLL_RES_SET_EVTS(ErtsPollResFd *evt, ErtsPollEvents fd); + * + * functions: + * See erl_poll_api.h */ #ifndef ERL_POLL_H__ @@ -32,33 +52,27 @@ #define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN -#if 0 -#define ERTS_POLL_COUNT_AVOIDED_WAKEUPS -#endif - #ifdef ERTS_ENABLE_KERNEL_POLL -# if defined(ERTS_KERNEL_POLL_VERSION) -# define ERTS_POLL_EXPORT(FUNC) FUNC ## _kp +# undef ERTS_ENABLE_KERNEL_POLL +# define ERTS_ENABLE_KERNEL_POLL 1 +# if defined(ERTS_NO_KERNEL_POLL_VERSION) +# define ERTS_POLL_EXPORT(FUNC) FUNC ## _flbk +# undef ERTS_NO_KERNEL_POLL_VERSION +# define ERTS_NO_KERNEL_POLL_VERSION 1 +# define ERTS_KERNEL_POLL_VERSION 0 # else -# define ERTS_POLL_EXPORT(FUNC) FUNC ## _nkp -# undef ERTS_POLL_DISABLE_KERNEL_POLL -# define ERTS_POLL_DISABLE_KERNEL_POLL +# undef ERTS_KERNEL_POLL_VERSION +# define ERTS_KERNEL_POLL_VERSION 1 +# define ERTS_NO_KERNEL_POLL_VERSION 0 +# define ERTS_POLL_EXPORT(FUNC) FUNC # endif #else # define ERTS_POLL_EXPORT(FUNC) FUNC -# undef ERTS_POLL_DISABLE_KERNEL_POLL -# define ERTS_POLL_DISABLE_KERNEL_POLL -#endif - -#ifdef ERTS_POLL_DISABLE_KERNEL_POLL -# undef HAVE_SYS_EPOLL_H -# undef HAVE_SYS_EVENT_H -# undef HAVE_SYS_DEVPOLL_H +# define ERTS_ENABLE_KERNEL_POLL 0 +# define ERTS_NO_KERNEL_POLL_VERSION 1 +# define ERTS_KERNEL_POLL_VERSION 0 #endif -#undef ERTS_POLL_USE_KERNEL_POLL -#define ERTS_POLL_USE_KERNEL_POLL 0 - #undef ERTS_POLL_USE_KQUEUE #define ERTS_POLL_USE_KQUEUE 0 #undef ERTS_POLL_USE_EPOLL @@ -70,68 +84,96 @@ #undef ERTS_POLL_USE_SELECT #define ERTS_POLL_USE_SELECT 0 -#if defined(HAVE_SYS_EVENT_H) -# undef ERTS_POLL_USE_KQUEUE -# define ERTS_POLL_USE_KQUEUE 1 -# undef ERTS_POLL_USE_KERNEL_POLL -# define ERTS_POLL_USE_KERNEL_POLL 1 -#elif defined(HAVE_SYS_EPOLL_H) -# undef ERTS_POLL_USE_EPOLL -# define ERTS_POLL_USE_EPOLL 1 -# undef ERTS_POLL_USE_KERNEL_POLL -# define ERTS_POLL_USE_KERNEL_POLL 1 -#elif defined(HAVE_SYS_DEVPOLL_H) -# undef ERTS_POLL_USE_DEVPOLL -# define ERTS_POLL_USE_DEVPOLL 1 -# undef ERTS_POLL_USE_KERNEL_POLL -# define ERTS_POLL_USE_KERNEL_POLL 1 +/* Defines which structure that erts_poll_wait should use to wait with + and how events should be represented */ +#define ERTS_POLL_USE_EPOLL_EVS 0 +#define ERTS_POLL_USE_KQUEUE_EVS 0 +#define ERTS_POLL_USE_DEVPOLL_EVS 0 +#define ERTS_POLL_USE_POLL_EVS 0 +#define ERTS_POLL_USE_SELECT_EVS 0 + +#define ERTS_POLL_USE_KERNEL_POLL ERTS_KERNEL_POLL_VERSION + +#if ERTS_ENABLE_KERNEL_POLL +# if defined(HAVE_SYS_EVENT_H) +# undef ERTS_POLL_USE_KQUEUE_EVS +# define ERTS_POLL_USE_KQUEUE_EVS 1 +# undef ERTS_POLL_USE_KQUEUE +# define ERTS_POLL_USE_KQUEUE ERTS_KERNEL_POLL_VERSION +# elif defined(HAVE_SYS_EPOLL_H) +# undef ERTS_POLL_USE_EPOLL_EVS +# define ERTS_POLL_USE_EPOLL_EVS 1 +# undef ERTS_POLL_USE_EPOLL +# define ERTS_POLL_USE_EPOLL ERTS_KERNEL_POLL_VERSION +# elif defined(HAVE_SYS_DEVPOLL_H) +# undef ERTS_POLL_USE_DEVPOLL_EVS +# define ERTS_POLL_USE_DEVPOLL_EVS 1 +# undef ERTS_POLL_USE_DEVPOLL +# define ERTS_POLL_USE_DEVPOLL ERTS_KERNEL_POLL_VERSION +# else +# error "Missing kernel poll implementation of erts_poll()" +# endif #endif -#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) - -#if !ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_FALLBACK +#if ERTS_NO_KERNEL_POLL_VERSION # if defined(ERTS_USE_POLL) +# undef ERTS_POLL_USE_POLL_EVS +# define ERTS_POLL_USE_POLL_EVS 1 # undef ERTS_POLL_USE_POLL # define ERTS_POLL_USE_POLL 1 # elif !defined(__WIN32__) +# undef ERTS_POLL_USE_SELECT_EVS +# define ERTS_POLL_USE_SELECT_EVS 1 # undef ERTS_POLL_USE_SELECT # define ERTS_POLL_USE_SELECT 1 # endif #endif -#define ERTS_POLL_USE_TIMERFD 0 +#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) typedef Uint32 ErtsPollEvents; -#undef ERTS_POLL_EV_E2N + +typedef enum { + ERTS_POLL_OP_ADD = 0, /* Add the FD to the pollset */ + ERTS_POLL_OP_MOD = 1, /* Modify the FD in the pollset */ + ERTS_POLL_OP_DEL = 2 /* Delete the FD from the pollset */ +} ErtsPollOp; + +#define op2str(op) (op == ERTS_POLL_OP_ADD ? "add" : \ + (op == ERTS_POLL_OP_MOD ? "mod" : "del")) #if defined(__WIN32__) /* --- win32 --------------------------------------- */ -#define ERTS_POLL_EV_IN 1 -#define ERTS_POLL_EV_OUT 2 -#define ERTS_POLL_EV_ERR 4 -#define ERTS_POLL_EV_NVAL 8 +#define ERTS_POLL_EV_IN 1 +#define ERTS_POLL_EV_OUT 2 +#define ERTS_POLL_EV_ERR 4 +#define ERTS_POLL_EV_NVAL 8 -#elif ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ +#define ERTS_POLL_EV_E2N(EV) (EV) +#define ERTS_POLL_EV_N2E(EV) (EV) -#include <sys/epoll.h> +#elif ERTS_POLL_USE_EPOLL_EVS /* --- epoll ------------------------------- */ -#ifdef HAVE_SYS_TIMERFD_H -#include <sys/timerfd.h> -#undef ERTS_POLL_USE_TIMERFD -#define ERTS_POLL_USE_TIMERFD 1 -#endif +#include <sys/epoll.h> #define ERTS_POLL_EV_E2N(EV) \ ((uint32_t) (EV)) #define ERTS_POLL_EV_N2E(EV) \ - ((ErtsPollEvents) (EV)) + ((ErtsPollEvents) (EV) & ~EPOLLONESHOT) #define ERTS_POLL_EV_IN ERTS_POLL_EV_N2E(EPOLLIN) #define ERTS_POLL_EV_OUT ERTS_POLL_EV_N2E(EPOLLOUT) #define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(EPOLLET) #define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(EPOLLERR|EPOLLHUP) -#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ +typedef struct epoll_event ErtsPollResFd; + +#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->data.fd)) +#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->data.fd = ident +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->events) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->events = ERTS_POLL_EV_E2N(evts) + +#elif ERTS_POLL_USE_DEVPOLL_EVS /* --- devpoll ----------------------------- */ #include <sys/devpoll.h> @@ -145,12 +187,37 @@ typedef Uint32 ErtsPollEvents; #define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(POLLNVAL) #define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP) -#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ +typedef struct pollfd ErtsPollResFd; + +#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->fd)) +#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->fd = ident +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->revents) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->revents = ERTS_POLL_EV_E2N(evts) + +#elif ERTS_POLL_USE_KQUEUE_EVS /* --- kqueue ------------------------------ */ /* Kqueue use fallback defines (poll() or select()) */ + +#include <sys/event.h> + +#ifdef ERTS_USE_POLL +# undef ERTS_POLL_USE_POLL_EVS +# define ERTS_POLL_USE_POLL_EVS 1 +#elif !defined(__WIN32__) +# undef ERTS_POLL_USE_SELECT_EVS +# define ERTS_POLL_USE_SELECT_EVS 1 #endif -#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ +typedef struct kevent ErtsPollResFd; + +#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->ident)) +#define ERTS_POLL_RES_SET_FD(evt, fd) (evt)->ident = fd +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((ErtsPollEvents)(evt)->udata) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->udata = (void*)(UWord)(ERTS_POLL_EV_E2N(evts)) + +#endif +#if ERTS_POLL_USE_POLL_EVS + /* --- poll -------------------------------- */ #include <poll.h> #define ERTS_POLL_EV_NKP_E2N(EV) \ @@ -169,7 +236,7 @@ typedef Uint32 ErtsPollEvents; #define ERTS_POLL_EV_NKP_NVAL ERTS_POLL_EV_N2E(POLLNVAL) #define ERTS_POLL_EV_NKP_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP) -#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ +#elif ERTS_POLL_USE_SELECT_EVS /* --- select ------------------------------ */ #define ERTS_POLL_EV_NKP_E2N(EV) (EV) #define ERTS_POLL_EV_NKP_N2E(EV) (EV) @@ -195,70 +262,65 @@ typedef Uint32 ErtsPollEvents; #endif -typedef struct ErtsPollSet_ *ErtsPollSet; +#if !ERTS_ENABLE_KERNEL_POLL -typedef struct { - ErtsSysFdType fd; - ErtsPollEvents events; - int on; -} ErtsPollControlEntry; - -typedef struct { +typedef struct _ErtsPollResFd { ErtsSysFdType fd; ErtsPollEvents events; } ErtsPollResFd; +#define ERTS_POLL_RES_GET_FD(evt) (evt)->fd +#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->fd = (ident) +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->events) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->events = ERTS_POLL_EV_E2N(evts) + +#endif + +#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)) + +#define ev2str(ev) \ + (((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \ + ((ev) == ERTS_POLL_EV_IN ? "IN" : \ + ((ev) == ERTS_POLL_EV_OUT ? "OUT" : \ + ((ev) == (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT) ? "IN|OUT" : \ + ((ev) & ERTS_POLL_EV_ERR ? "ERR" : \ + ((ev) & ERTS_POLL_EV_NVAL ? "NVAL" : "OTHER")))))) + + +typedef struct ERTS_POLL_EXPORT(erts_pollset) ErtsPollSet; + typedef struct { char *primary; - char *fallback; char *kernel_poll; Uint memory_size; - int poll_set_size; - int fallback_poll_set_size; + Uint poll_set_size; int lazy_updates; - int pending_updates; + Uint pending_updates; int batch_updates; int concurrent_updates; - int max_fds; -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - long no_avoided_wakeups; - long no_avoided_interrupts; - long no_interrupt_timed; -#endif + int is_fallback; + Uint max_fds; + Uint active_fds; + Uint poll_threads; } ErtsPollInfo; -void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet, - int); -void ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet, - int, - ErtsMonotonicTime); -ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet, - ErtsSysFdType, - ErtsPollEvents, - int on, - int* wake_poller - ); -void ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet, - ErtsPollControlEntry [], - int on); -int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet, - ErtsPollResFd [], - int *, - ErtsMonotonicTime); -int ERTS_POLL_EXPORT(erts_poll_max_fds)(void); -void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet, - ErtsPollInfo *); -ErtsPollSet ERTS_POLL_EXPORT(erts_poll_create_pollset)(void); -void ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet); -void ERTS_POLL_EXPORT(erts_poll_init)(void); -void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet, - ErtsPollEvents [], - int); +#if defined(ERTS_POLL_USE_FALLBACK) && ERTS_KERNEL_POLL_VERSION +# undef ERTS_POLL_EXPORT +# define ERTS_POLL_EXPORT(FUNC) FUNC ## _flbk +# include "erl_poll_api.h" +# undef ERTS_POLL_EXPORT +# define ERTS_POLL_EXPORT(FUNC) FUNC +#elif !defined(ERTS_POLL_USE_FALLBACK) +# define ERTS_POLL_USE_FALLBACK 0 +#endif -int erts_poll_new_table_len(int old_len, int need_len); +#include "erl_poll_api.h" -#ifdef ERTS_ENABLE_LOCK_COUNT -void ERTS_POLL_EXPORT(erts_lcnt_update_pollset_locks)(int enable); -#endif +/** + * Get the next size of the array that holds the file descriptors. + * This function is used in order for the check io array and the + * pollset array to be of the same size. + */ +int erts_poll_new_table_len(int old_len, int need_len); #endif /* #ifndef ERL_POLL_H__ */ diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h new file mode 100644 index 0000000000..04beb37d1c --- /dev/null +++ b/erts/emulator/sys/common/erl_poll_api.h @@ -0,0 +1,122 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2016. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ +/** + * @description: Poll interface functions + * @author Lukas Larsson + * + * The functions in the header are used to interact with the poll + * implementation. Iff the kernel-poll implementation needs a fallback + * pollset, then all functions are exported twice. Once with a _flbk + * suffix and once without any suffix. If no fallback is needed, then + * only the non-suffix version is exported. + */ + +/** + * Initialize the poll implementation. Has to be called before any other function. + * @param[out] concurrent_waiters if not NULL, set to 1 if more then one thread + * is allowed to wait in the pollsets at the same time. + */ +void ERTS_POLL_EXPORT(erts_poll_init)(int *concurrent_waiters); +/** + * @brief Create a new pollset. + * @param id The unique debug id of this pollset. + */ +ErtsPollSet *ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id); + +/** + * Modify the contents of a pollset. This function can be called while one + * (or possibly more) thread is waiting in the pollset. + * + * @param ps the pollset to modify + * @param fd the file descriptor to modify + * @param op the type of operation to do. Normal usage is ADD,MOD...MOD,DEL. + * @param evts the events that we are changing interest to. Ignored if op is DEL. + * @param[in] wake_poller if set to 1 any thread waiting in the pollset will be woken. + * This parameter is ignored if the pollset supports concurrent waiters. + * @param[out] wake_poller set to 1 if the waiting thread was woken. + * @return The events set, or ERTS_POLL_EV_NVAL if it was not possible to add the + * fd to the pollset. + */ +ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, + ErtsSysFdType fd, + ErtsPollOp op, + ErtsPollEvents evts, + int *wake_poller); + +/** + * Wait for events to be ready in the pollset. If the erts_poll_init call + * set concurrent_waiters to 1, then multiple threads are allowed to call + * this function at the same time. + * + * When an event has been triggered on a fd, that event is disabled. To + * re-enable it the implementation has to call erts_poll_control again. + * + * @param ps the pollset to wait for events in + * @param res an array of fd results that the ready fds are put in. + * @param[in] length the length of the res array + * @param[out] length the number of ready events returned in res + * @return 0 on success, else the ERRNO of the error that happened. + */ +int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, + ErtsPollResFd res[], + int *length); +/** + * Interrupt the thread waiting in the pollset. This function should be called + * with set = 0 before any thread calls erts_poll_wait in order to clear any + * interrupts that have happened while the thread was awake. + * + * This function has no effect on pollsets that support concurrent waiters. + * + * @param ps the pollset to wake + * @param set if 1, interrupt the pollset, if 0 clear the interrupt flag. + */ +void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set); + +/* Debug functions */ + +/** + * Get the maximum number of fds supported by the pollset + */ +int ERTS_POLL_EXPORT(erts_poll_max_fds)(void); +/** + * Get information about the given pollset + */ +void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, + ErtsPollInfo *info); +/** + * Get information about which events are currently selected. + * + * The unix fd is used to index into the array, so naturally this function does + * not work on windows. If the pollset cannot figure out what the selected + * events for a given fd is, it is set to ERTS_POLL_EV_NONE. + * + * @param ps the pollset to get events from + * @param evts an array of which events are selected on. + */ +void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, + ErtsPollEvents evts[], + int length); + +#ifdef ERTS_ENABLE_LOCK_COUNT +/** + * Enable lock counting of any locks within the pollset. + */ +void ERTS_POLL_EXPORT(erts_lcnt_enable_pollset_lock_count)(ErtsPollSet *, int enable); +#endif diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 09237c81ce..420138ff0a 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -45,14 +45,6 @@ #endif #endif -/* - * erts_check_io_time is used by the erl_check_io implementation. The - * global erts_check_io_time variable is declared here since there - * (often) exist two versions of erl_check_io (kernel-poll and - * non-kernel-poll), and we dont want two versions of this variable. - */ -erts_atomic_t erts_check_io_time; - /* Written once and only once */ static int filename_encoding = ERL_FILENAME_UNKNOWN; diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index d05028cabc..6315135151 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -131,121 +131,6 @@ static int replace_intr = 0; /* assume yes initially, ttsl_init will clear it */ int using_oldshell = 1; -#ifdef ERTS_ENABLE_KERNEL_POLL - -int erts_use_kernel_poll = 0; - -struct { - int (*select)(ErlDrvPort, ErlDrvEvent, int, int); - int (*enif_select)(ErlNifEnv*, ErlNifEvent, enum ErlNifSelectFlags, void*, const ErlNifPid*, Eterm); - int (*event)(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); - void (*check_io_as_interrupt)(void); - void (*check_io_interrupt)(int); - void (*check_io_interrupt_tmd)(int, ErtsMonotonicTime); - void (*check_io)(int); - Uint (*size)(void); - Eterm (*info)(void *); - int (*check_io_debug)(ErtsCheckIoDebugInfo *); -} io_func = {0}; - - -int -driver_select(ErlDrvPort port, ErlDrvEvent event, int mode, int on) -{ - return (*io_func.select)(port, event, mode, on); -} - -int -driver_event(ErlDrvPort port, ErlDrvEvent event, ErlDrvEventData event_data) -{ - return (*io_func.event)(port, event, event_data); -} - -int enif_select(ErlNifEnv* env, ErlNifEvent event, - enum ErlNifSelectFlags flags, void* obj, const ErlNifPid* pid, Eterm ref) -{ - return (*io_func.enif_select)(env, event, flags, obj, pid, ref); -} - - -Eterm erts_check_io_info(void *p) -{ - return (*io_func.info)(p); -} - -int -erts_check_io_debug(ErtsCheckIoDebugInfo *ip) -{ - return (*io_func.check_io_debug)(ip); -} - - -static void -init_check_io(void) -{ - if (erts_use_kernel_poll) { - io_func.select = driver_select_kp; - io_func.enif_select = enif_select_kp; - io_func.event = driver_event_kp; - io_func.check_io_interrupt = erts_check_io_interrupt_kp; - io_func.check_io_interrupt_tmd = erts_check_io_interrupt_timed_kp; - io_func.check_io = erts_check_io_kp; - io_func.size = erts_check_io_size_kp; - io_func.info = erts_check_io_info_kp; - io_func.check_io_debug = erts_check_io_debug_kp; - erts_init_check_io_kp(); - max_files = erts_check_io_max_files_kp(); - } - else { - io_func.select = driver_select_nkp; - io_func.enif_select = enif_select_nkp; - io_func.event = driver_event_nkp; - io_func.check_io_interrupt = erts_check_io_interrupt_nkp; - io_func.check_io_interrupt_tmd = erts_check_io_interrupt_timed_nkp; - io_func.check_io = erts_check_io_nkp; - io_func.size = erts_check_io_size_nkp; - io_func.info = erts_check_io_info_nkp; - io_func.check_io_debug = erts_check_io_debug_nkp; - erts_init_check_io_nkp(); - max_files = erts_check_io_max_files_nkp(); - } -} - -#define ERTS_CHK_IO_AS_INTR() (*io_func.check_io_interrupt)(1) -#define ERTS_CHK_IO_INTR (*io_func.check_io_interrupt) -#define ERTS_CHK_IO_INTR_TMD (*io_func.check_io_interrupt_tmd) -#define ERTS_CHK_IO (*io_func.check_io) -#define ERTS_CHK_IO_SZ (*io_func.size) - -#else /* !ERTS_ENABLE_KERNEL_POLL */ - -static void -init_check_io(void) -{ - erts_init_check_io(); - max_files = erts_check_io_max_files(); -} - -#define ERTS_CHK_IO_AS_INTR() erts_check_io_interrupt(1) -#define ERTS_CHK_IO_INTR erts_check_io_interrupt -#define ERTS_CHK_IO_INTR_TMD erts_check_io_interrupt_timed -#define ERTS_CHK_IO erts_check_io -#define ERTS_CHK_IO_SZ erts_check_io_size - -#endif - -void -erts_sys_schedule_interrupt(int set) -{ - ERTS_CHK_IO_INTR(set); -} - -void -erts_sys_schedule_interrupt_timed(int set, ErtsMonotonicTime timeout_time) -{ - ERTS_CHK_IO_INTR_TMD(set, timeout_time); -} - UWord erts_sys_get_page_size(void) { @@ -261,7 +146,7 @@ erts_sys_get_page_size(void) Uint erts_sys_misc_mem_sz(void) { - Uint res = ERTS_CHK_IO_SZ(); + Uint res = erts_check_io_size(); res += erts_atomic_read_mb(&sys_misc_mem_sz); return res; } @@ -618,14 +503,12 @@ break_requested(void) * just set a flag - checked for and handled by * scheduler threads erts_check_io() (not signal handler). */ -#ifdef DEBUG - fprintf(stderr,"break!\n"); -#endif if (ERTS_BREAK_REQUESTED) erts_exit(ERTS_INTR_EXIT, ""); ERTS_SET_BREAK_REQUESTED; - ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */ + /* Wake aux thread to get handle break */ + erts_aux_thread_poke(); } static RETSIGTYPE request_break(int signum) @@ -813,7 +696,7 @@ erts_sys_unix_later_init(void) int sys_max_files(void) { - return(max_files); + return max_files; } /************************** OS info *******************************/ @@ -1190,20 +1073,6 @@ erl_debug(char* fmt, ...) #endif /* DEBUG */ -/* - * Called from schedule() when it runs out of runnable processes, - * or when Erlang code has performed INPUT_REDUCTIONS reduction - * steps. runnable == 0 iff there are no runnable Erlang processes. - */ -void -erl_sys_schedule(int runnable) -{ - ERTS_CHK_IO(!runnable); - ERTS_LC_ASSERT(!erts_thr_progress_is_blocking()); -} - - - static erts_tid_t sig_dispatcher_tid; static void @@ -1392,99 +1261,18 @@ erts_sys_main_thread(void) } -#ifdef ERTS_ENABLE_KERNEL_POLL /* get_value() is currently only used when - kernel-poll is enabled */ - -/* Get arg marks argument as handled by - putting NULL in argv */ -static char * -get_value(char* rest, char** argv, int* ip) -{ - char *param = argv[*ip]+1; - argv[*ip] = NULL; - if (*rest == '\0') { - char *next = argv[*ip + 1]; - if (next[0] == '-' - && next[1] == '-' - && next[2] == '\0') { - erts_fprintf(stderr, "bad \"%s\" value: \n", param); - erts_usage(); - } - (*ip)++; - argv[*ip] = NULL; - return next; - } - return rest; -} - -#endif /* ERTS_ENABLE_KERNEL_POLL */ - void erl_sys_args(int* argc, char** argv) { - int i, j; erts_rwmtx_init(&environ_rwmtx, "environ", NIL, ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); - i = 1; - ASSERT(argc && argv); - while (i < *argc) { - if(argv[i][0] == '-') { - switch (argv[i][1]) { -#ifdef ERTS_ENABLE_KERNEL_POLL - case 'K': { - char *arg = get_value(argv[i] + 2, argv, &i); - if (strcmp("true", arg) == 0) { - erts_use_kernel_poll = 1; - } - else if (strcmp("false", arg) == 0) { - erts_use_kernel_poll = 0; - } - else { - erts_fprintf(stderr, "bad \"K\" value: %s\n", arg); - erts_usage(); - } - break; - } -#endif - case '-': - goto done_parsing; - default: - break; - } - } - i++; - } - - done_parsing: - -#ifdef ERTS_ENABLE_KERNEL_POLL - if (erts_use_kernel_poll) { - char no_kp[10]; - size_t no_kp_sz = sizeof(no_kp); - int res = erts_sys_getenv_raw("ERL_NO_KERNEL_POLL", no_kp, &no_kp_sz); - if (res > 0 - || (res == 0 - && sys_strcmp("false", no_kp) != 0 - && sys_strcmp("FALSE", no_kp) != 0)) { - erts_use_kernel_poll = 0; - } - } -#endif - - init_check_io(); + max_files = erts_check_io_max_files(); init_smp_sig_notify(); init_smp_sig_suspend(); - /* Handled arguments have been marked with NULL. Slide arguments - not handled towards the beginning of argv. */ - for (i = 0, j = 0; i < *argc; i++) { - if (argv[i]) - argv[j++] = argv[i]; - } - *argc = j; } diff --git a/erts/emulator/sys/unix/sys_drivers.c b/erts/emulator/sys/unix/sys_drivers.c index 7c9a532fed..0228e1af54 100644 --- a/erts/emulator/sys/unix/sys_drivers.c +++ b/erts/emulator/sys/unix/sys_drivers.c @@ -1723,8 +1723,6 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, SET_NONBLOCKING(forker_fd); - driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1); - return (ErlDrvData)port_num; } @@ -1821,10 +1819,19 @@ static void forker_ready_output(ErlDrvData e, ErlDrvEvent fd) static ErlDrvSSizeT forker_control(ErlDrvData e, unsigned int cmd, char *buf, ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen) { + static int first_call = 1; ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf; ErlDrvPort port_num = (ErlDrvPort)e; int res; + if (first_call) { + /* + * Do driver_select here when schedulers and their pollsets have started. + */ + driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1); + first_call = 0; + } + driver_enq(port_num, buf, len); if (driver_sizeq(port_num) > sizeof(*proto)) { return 0; diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c index 0bd43bb4fb..fd4c745c3b 100644 --- a/erts/emulator/sys/win32/erl_poll.c +++ b/erts/emulator/sys/win32/erl_poll.c @@ -34,6 +34,7 @@ */ /*#define HARDDEBUG */ +/*#define HARDTRACE */ #ifdef HARDDEBUG #ifdef HARDTRACE #define HARDTRACEF(X) my_debug_printf##X @@ -50,7 +51,7 @@ static void my_debug_printf(char *fmt, ...) va_start(args, fmt); erts_vsnprintf(buffer,1024,fmt,args); va_end(args); - erts_fprintf(stderr,"%s\r\n",buffer); + erts_printf("%s\r\n",buffer); } #else #define HARDTRACEF(X) @@ -274,20 +275,17 @@ typedef struct _Waiter { /* * The structure for a pollset. There can currently be only one... */ -struct ErtsPollSet_ { +struct erts_pollset { Waiter** waiter; int allocated_waiters; /* Size ow waiter array */ int num_waiters; /* Number of waiter threads. */ - int restore_events; /* Tells us to restore waiters events - next time around */ HANDLE event_io_ready; /* To be used when waiting for io */ /* These are used to wait for workers to enter standby */ volatile int standby_wait_counter; /* Number of threads to wait for */ CRITICAL_SECTION standby_crit; /* CS to guard the counter */ - HANDLE standby_wait_event; /* Event signalled when counte == 0 */ + HANDLE standby_wait_event; /* Event signalled when counter == 0 */ erts_atomic32_t wakeup_state; erts_mtx_t mtx; - erts_atomic64_t timeout_time; }; @@ -352,39 +350,19 @@ do { \ wait_standby(PS); \ } while(0) -static ERTS_INLINE void -init_timeout_time(ErtsPollSet ps) -{ - erts_atomic64_init_nob(&ps->timeout_time, - (erts_aint64_t) ERTS_MONOTONIC_TIME_MAX); -} - -static ERTS_INLINE void -set_timeout_time(ErtsPollSet ps, ErtsMonotonicTime time) -{ - erts_atomic64_set_relb(&ps->timeout_time, - (erts_aint64_t) time); -} - -static ERTS_INLINE ErtsMonotonicTime -get_timeout_time(ErtsPollSet ps) -{ - return (ErtsMonotonicTime) erts_atomic64_read_acqb(&ps->timeout_time); -} - #define ERTS_POLL_NOT_WOKEN ((erts_aint32_t) 0) #define ERTS_POLL_WOKEN_IO_READY ((erts_aint32_t) 1) #define ERTS_POLL_WOKEN_INTR ((erts_aint32_t) 2) #define ERTS_POLL_WOKEN_TIMEDOUT ((erts_aint32_t) 3) static ERTS_INLINE int -is_io_ready(ErtsPollSet ps) +is_io_ready(ErtsPollSet *ps) { return erts_atomic32_read_nob(&ps->wakeup_state) == ERTS_POLL_WOKEN_IO_READY; } static ERTS_INLINE void -woke_up(ErtsPollSet ps) +woke_up(ErtsPollSet *ps) { if (erts_atomic32_read_nob(&ps->wakeup_state) == ERTS_POLL_NOT_WOKEN) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, @@ -407,7 +385,7 @@ woke_up(ErtsPollSet ps) } static ERTS_INLINE int -wakeup_cause(ErtsPollSet ps) +wakeup_cause(ErtsPollSet *ps) { int res; erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); @@ -430,46 +408,8 @@ wakeup_cause(ErtsPollSet ps) return res; } -static ERTS_INLINE DWORD -poll_wait_timeout(ErtsPollSet ps, ErtsMonotonicTime timeout_time) -{ - ErtsMonotonicTime current_time, diff_time, timeout; - - if (timeout_time == ERTS_POLL_NO_TIMEOUT) { - no_timeout: - set_timeout_time(ps, ERTS_MONOTONIC_TIME_MIN); - woke_up(ps); - return (DWORD) 0; - } - - current_time = erts_get_monotonic_time(NULL); - diff_time = timeout_time - current_time; - if (diff_time <= 0) - goto no_timeout; - - /* Round up to nearest milli second */ - timeout = (ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1); - if (timeout > INT_MAX) - timeout = INT_MAX; /* Also prevents DWORD overflow */ - - set_timeout_time(ps, current_time + ERTS_MSEC_TO_MONOTONIC(timeout)); - - ResetEvent(ps->event_io_ready); - /* - * Since we don't know the internals of ResetEvent() we issue - * a memory barrier as a safety precaution ensuring that - * the load of wakeup_state wont be reordered with stores made - * by ResetEvent(). - */ - ERTS_THR_MEMORY_BARRIER; - if (erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN) - return (DWORD) 0; - - return (DWORD) timeout; -} - static ERTS_INLINE void -wake_poller(ErtsPollSet ps, int io_ready) +wake_poller(ErtsPollSet *ps, int io_ready) { erts_aint32_t wakeup_state; if (io_ready) { @@ -504,13 +444,13 @@ wake_poller(ErtsPollSet ps, int io_ready) } static ERTS_INLINE void -reset_io_ready(ErtsPollSet ps) +reset_io_ready(ErtsPollSet *ps) { erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); } static ERTS_INLINE void -restore_io_ready(ErtsPollSet ps) +restore_io_ready(ErtsPollSet *ps) { erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_IO_READY); } @@ -520,13 +460,13 @@ restore_io_ready(ErtsPollSet ps) * notifying a poller thread about I/O ready. */ static ERTS_INLINE void -notify_io_ready(ErtsPollSet ps) +notify_io_ready(ErtsPollSet *ps) { wake_poller(ps, 1); } static ERTS_INLINE void -reset_interrupt(ErtsPollSet ps) +reset_interrupt(ErtsPollSet *ps) { /* We need to keep io-ready if set */ erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); @@ -543,12 +483,12 @@ reset_interrupt(ErtsPollSet ps) } static ERTS_INLINE void -set_interrupt(ErtsPollSet ps) +set_interrupt(ErtsPollSet *ps) { wake_poller(ps, 0); } -static void setup_standby_wait(ErtsPollSet ps, int num_threads) +static void setup_standby_wait(ErtsPollSet *ps, int num_threads) { EnterCriticalSection(&(ps->standby_crit)); ps->standby_wait_counter = num_threads; @@ -556,7 +496,7 @@ static void setup_standby_wait(ErtsPollSet ps, int num_threads) LeaveCriticalSection(&(ps->standby_crit)); } -static void signal_standby(ErtsPollSet ps) +static void signal_standby(ErtsPollSet *ps) { EnterCriticalSection(&(ps->standby_crit)); --(ps->standby_wait_counter); @@ -570,7 +510,7 @@ static void signal_standby(ErtsPollSet ps) LeaveCriticalSection(&(ps->standby_crit)); } -static void wait_standby(ErtsPollSet ps) +static void wait_standby(ErtsPollSet *ps) { WaitForSingleObject(ps->standby_wait_event,INFINITE); } @@ -638,7 +578,7 @@ static void consistency_check(Waiter* w) #endif -static void new_waiter(ErtsPollSet ps) +static void new_waiter(ErtsPollSet *ps) { register Waiter* w; DWORD tid; /* Id for thread. */ @@ -732,7 +672,7 @@ static void *break_waiter(void *param) static void *threaded_waiter(void *param) { register Waiter* w = (Waiter *) param; - ErtsPollSet ps = (ErtsPollSet) w->xdata; + ErtsPollSet *ps = (ErtsPollSet*) w->xdata; #ifdef HARD_POLL_DEBUG2 HANDLE oold_fired[64]; int num_oold_fired; @@ -835,9 +775,9 @@ event_happened: ASSERT(i >= WAIT_OBJECT_0+1); i -= WAIT_OBJECT_0; ASSERT(i >= 1); - w->active_events--; HARDDEBUGF(("i = %d, a,h,t = %d,%d,%d",i, w->active_events, w->highwater, w->total_events)); + w->active_events--; #ifdef HARD_POLL_DEBUG2 fired[num_fired++] = w->events[i]; #endif @@ -867,7 +807,7 @@ event_happened: * The actual adding and removing from pollset utilities */ -static int set_driver_select(ErtsPollSet ps, HANDLE event, ErtsPollEvents mode) +static int set_driver_select(ErtsPollSet *ps, HANDLE event, ErtsPollEvents mode) { int i; int best_waiter = -1; /* The waiter with lowest number of events. */ @@ -957,13 +897,13 @@ static int set_driver_select(ErtsPollSet ps, HANDLE event, ErtsPollEvents mode) #endif erts_mtx_unlock(&w->mtx); START_WAITER(ps,w); - HARDDEBUGF(("add select %d %d %d %d",best_waiter, + HARDDEBUGF(("%d: add select %d %d %d %d", event, best_waiter, w->active_events,w->highwater,w->total_events)); return mode; } -static int cancel_driver_select(ErtsPollSet ps, HANDLE event) +static int cancel_driver_select(ErtsPollSet *ps, HANDLE event) { int i; @@ -1018,7 +958,7 @@ static int cancel_driver_select(ErtsPollSet ps, HANDLE event) * Interface functions */ -void erts_poll_interrupt(ErtsPollSet ps, int set /* bool */) +void erts_poll_interrupt(ErtsPollSet *ps, int set /* bool */) { HARDTRACEF(("In erts_poll_interrupt(%d)",set)); if (!set) @@ -1028,35 +968,23 @@ void erts_poll_interrupt(ErtsPollSet ps, int set /* bool */) HARDTRACEF(("Out erts_poll_interrupt(%d)",set)); } -void erts_poll_interrupt_timed(ErtsPollSet ps, - int set /* bool */, - ErtsMonotonicTime timeout_time) -{ - HARDTRACEF(("In erts_poll_interrupt_timed(%d,%ld)",set,timeout_time)); - if (!set) - reset_interrupt(ps); - else if (get_timeout_time(ps) > timeout_time) - set_interrupt(ps); - HARDTRACEF(("Out erts_poll_interrupt_timed")); -} - /* * Windows is special, there is actually only one event type, and * the only difference between ERTS_POLL_EV_IN and ERTS_POLL_EV_OUT * is which driver callback will eventually be called. */ -static ErtsPollEvents do_poll_control(ErtsPollSet ps, - ErtsSysFdType fd, - ErtsPollEvents pe, - int on /* bool */) +static ErtsPollEvents do_poll_control(ErtsPollSet *ps, + ErtsSysFdType fd, + ErtsPollOp op, + ErtsPollEvents pe) { HANDLE event = (HANDLE) fd; ErtsPollEvents mode; ErtsPollEvents result; ASSERT(event != INVALID_HANDLE_VALUE); - if (on) { + if (op != ERTS_POLL_OP_DEL) { if (pe & ERTS_POLL_EV_IN || !(pe & ERTS_POLL_EV_OUT )) { mode = ERTS_POLL_EV_IN; } else { @@ -1069,51 +997,30 @@ static ErtsPollEvents do_poll_control(ErtsPollSet ps, return result; } -ErtsPollEvents erts_poll_control(ErtsPollSet ps, +ErtsPollEvents erts_poll_control(ErtsPollSet *ps, ErtsSysFdType fd, + ErtsPollOp op, ErtsPollEvents pe, - int on, int* do_wake) /* In: Wake up polling thread */ /* Out: Poller is woken */ { ErtsPollEvents result; - HARDTRACEF(("In erts_poll_control(0x%08X, %u, %d)",(unsigned long) fd, (unsigned) pe, on)); + HARDTRACEF(("In erts_poll_control(0x%08X, %s, %s)", + (unsigned long) fd, op2str(op), ev2str(pe))); ERTS_POLLSET_LOCK(ps); - result=do_poll_control(ps,fd,pe,on); + result=do_poll_control(ps, fd, op, pe); ERTS_POLLSET_UNLOCK(ps); *do_wake = 0; /* Never any need to wake polling threads on windows */ HARDTRACEF(("Out erts_poll_control -> %u",(unsigned) result)); return result; } -void erts_poll_controlv(ErtsPollSet ps, - ErtsPollControlEntry pcev[], - int len) -{ - int i; - int hshur = 0; - int do_wake = 0; - - HARDTRACEF(("In erts_poll_controlv(%d)",len)); - ERTS_POLLSET_LOCK(ps); - - for (i = 0; i < len; i++) { - pcev[i].events = do_poll_control(ps, - pcev[i].fd, - pcev[i].events, - pcev[i].on); - } - ERTS_POLLSET_UNLOCK(ps); - HARDTRACEF(("Out erts_poll_controlv")); -} - -int erts_poll_wait(ErtsPollSet ps, +int erts_poll_wait(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len, - ErtsMonotonicTime timeout_time) + int *len) { int no_fds; - DWORD timeout; + DWORD timeout = INFINITE; EventData* ev; int res = 0; int num = 0; @@ -1124,42 +1031,6 @@ int erts_poll_wait(ErtsPollSet ps, HARDTRACEF(("In erts_poll_wait")); ERTS_POLLSET_LOCK(ps); - if (!is_io_ready(ps) && ps->restore_events) { - HARDDEBUGF(("Restore events: %d",ps->num_waiters)); - ps->restore_events = 0; - for (i = 0; i < ps->num_waiters; ++i) { - Waiter* w = ps->waiter[i]; - erts_mtx_lock(&w->mtx); - HARDDEBUGF(("Maybe reset %d %d %d %d",i, - w->active_events,w->highwater,w->total_events)); - if (w->active_events < w->total_events) { - erts_mtx_unlock(&w->mtx); - STOP_WAITER(ps,w); - HARDDEBUGF(("Need reset %d %d %d %d",i, - w->active_events,w->highwater,w->total_events)); - erts_mtx_lock(&w->mtx); - /* Need reset, just check that it doesn't have got more to tell */ - if (w->highwater != w->active_events) { - HARDDEBUGF(("Oups!")); - /* Oups, got signalled before we took the lock, can't reset */ - if(!is_io_ready(ps)) { - erts_exit(ERTS_ERROR_EXIT,"Internal error: " - "Inconsistent io structures in erl_poll.\n"); - } - START_WAITER(ps,w); - erts_mtx_unlock(&w->mtx); - ps->restore_events = 1; - continue; - } - w->active_events = w->highwater = w->total_events; - START_WAITER(ps,w); - erts_mtx_unlock(&w->mtx); - } else { - erts_mtx_unlock(&w->mtx); - } - } - } - no_fds = *len; #ifdef ERTS_POLL_MAX_RES @@ -1167,22 +1038,29 @@ int erts_poll_wait(ErtsPollSet ps, no_fds = ERTS_POLL_MAX_RES; #endif - timeout = poll_wait_timeout(ps, timeout_time); - - /*HARDDEBUGF(("timeout = %ld",(long) timeout));*/ + ResetEvent(ps->event_io_ready); + /* + * Since we don't know the internals of ResetEvent() we issue + * a memory barrier as a safety precaution ensuring that + * the load of wakeup_state wont be reordered with stores made + * by ResetEvent(). + */ + ERTS_THR_MEMORY_BARRIER; + if (erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN) + timeout = (DWORD) 0; - if (timeout > 0 && !erts_atomic32_read_nob(&break_waiter_state)) { + if (!erts_atomic32_read_nob(&break_waiter_state)) { HANDLE harr[2] = {ps->event_io_ready, break_happened_event}; int num_h = 2; - ERTS_MSACC_PUSH_STATE_M(); + ERTS_MSACC_PUSH_STATE(); HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout)); ERTS_POLLSET_UNLOCK(ps); erts_thr_progress_prepare_wait(NULL); - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); WaitForMultipleObjects(num_h, harr, FALSE, timeout); erts_thr_progress_finalize_wait(NULL); - ERTS_MSACC_POP_STATE_M(); + ERTS_MSACC_POP_STATE(); ERTS_POLLSET_LOCK(ps); HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout)); woke_up(ps); @@ -1215,7 +1093,7 @@ int erts_poll_wait(ErtsPollSet ps, reset_io_ready(ps); - n = ps->num_waiters; + n = ps->num_waiters; for (i = 0; i < n; i++) { Waiter* w = ps->waiter[i]; @@ -1241,11 +1119,10 @@ int erts_poll_wait(ErtsPollSet ps, HARDDEBUGF(("To many FD's to report!")); goto done; } - HARDDEBUGF(("SET! Restore events")); - ps->restore_events = 1; HARDDEBUGF(("Report %d,%d",i,j)); - pr[num].fd = (ErtsSysFdType) w->events[j]; - pr[num].events = w->evdata[j]->mode; + ERTS_POLL_RES_SET_FD(&pr[num], w->events[j]); + ERTS_POLL_RES_SET_EVTS(&pr[num], w->evdata[j]->mode); + remove_event_from_set(w, j); #ifdef HARD_POLL_DEBUG poll_debug_reported(w->events[j],w->highwater | (j << 16)); poll_debug_reported(w->events[j],first | (last << 16)); @@ -1253,13 +1130,14 @@ int erts_poll_wait(ErtsPollSet ps, ++num; } + w->total_events = w->highwater = w->active_events; + #ifdef DEBUG consistency_check(w); #endif erts_mtx_unlock(&w->mtx); } done: - set_timeout_time(ps, ERTS_MONOTONIC_TIME_MAX); *len = num; ERTS_POLLSET_UNLOCK(ps); HARDTRACEF(("Out erts_poll_wait")); @@ -1274,7 +1152,7 @@ int erts_poll_max_fds(void) return res; } -void erts_poll_info(ErtsPollSet ps, +void erts_poll_info(ErtsPollSet *ps, ErtsPollInfo *pip) { Uint size = 0; @@ -1284,7 +1162,7 @@ void erts_poll_info(ErtsPollSet ps, HARDTRACEF(("In erts_poll_info")); ERTS_POLLSET_LOCK(ps); - size += sizeof(struct ErtsPollSet_); + size += sizeof(struct erts_pollset); size += sizeof(Waiter *) * ps->allocated_waiters; for (i = 0; i < ps->num_waiters; ++i) { Waiter *w = ps->waiter[i]; @@ -1299,16 +1177,12 @@ void erts_poll_info(ErtsPollSet ps, pip->primary = "WaitForMultipleObjects"; - pip->fallback = NULL; - pip->kernel_poll = NULL; pip->memory_size = size; pip->poll_set_size = num_events; - pip->fallback_poll_set_size = 0; - pip->lazy_updates = 0; pip->pending_updates = 0; @@ -1316,6 +1190,8 @@ void erts_poll_info(ErtsPollSet ps, pip->batch_updates = 0; pip->concurrent_updates = 0; + + pip->is_fallback = 0; ERTS_POLLSET_UNLOCK(ps); pip->max_fds = erts_poll_max_fds(); @@ -1323,10 +1199,10 @@ void erts_poll_info(ErtsPollSet ps, } -ErtsPollSet erts_poll_create_pollset(void) +ErtsPollSet *erts_poll_create_pollset(int no) { - ErtsPollSet ps = SEL_ALLOC(ERTS_ALC_T_POLLSET, - sizeof(struct ErtsPollSet_)); + ErtsPollSet *ps = SEL_ALLOC(ERTS_ALC_T_POLLSET, + sizeof(struct erts_pollset)); HARDTRACEF(("In erts_poll_create_pollset")); ps->num_waiters = 0; @@ -1337,17 +1213,15 @@ ErtsPollSet erts_poll_create_pollset(void) ps->standby_wait_counter = 0; ps->event_io_ready = CreateManualEvent(FALSE); ps->standby_wait_event = CreateManualEvent(FALSE); - ps->restore_events = 0; erts_atomic32_init_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); erts_mtx_init(&ps->mtx, "pollset", NIL, ERTS_LOCK_FLAGS_CATEGORY_IO); - init_timeout_time(ps); HARDTRACEF(("Out erts_poll_create_pollset")); return ps; } -void erts_poll_destroy_pollset(ErtsPollSet ps) +void erts_poll_destroy_pollset(ErtsPollSet *ps) { int i; HARDTRACEF(("In erts_poll_destroy_pollset")); @@ -1378,14 +1252,16 @@ void erts_poll_destroy_pollset(ErtsPollSet ps) /* * Actually mostly initializes the friend module sys_interrupt... */ -void erts_poll_init(void) +void erts_poll_init(int *concurrent_updates) { - erts_tid_t thread; #ifdef HARD_POLL_DEBUG poll_debug_init(); #endif + if (concurrent_updates) + *concurrent_updates = 0; + HARDTRACEF(("In erts_poll_init")); erts_sys_break_event = CreateManualEvent(FALSE); @@ -1394,21 +1270,26 @@ void erts_poll_init(void) break_happened_event = CreateManualEvent(FALSE); erts_atomic32_init_nob(&break_waiter_state, 0); + HARDTRACEF(("Out erts_poll_init")); +} + +void erts_poll_late_init(void) +{ + erts_tid_t thread; erts_thr_create(&thread, &break_waiter, NULL, NULL); ERTS_UNSET_BREAK_REQUESTED; - HARDTRACEF(("Out erts_poll_init")); } /* * Non windows friendly interface, not used when fd's are not continous */ -void erts_poll_get_selected_events(ErtsPollSet ps, +void erts_poll_get_selected_events(ErtsPollSet *ps, ErtsPollEvents ev[], int len) { int i; HARDTRACEF(("In erts_poll_get_selected_events")); for (i = 0; i < len; ++i) - ev[i] = 0; + ev[i] = ERTS_POLL_EV_NONE; HARDTRACEF(("Out erts_poll_get_selected_events")); } diff --git a/erts/emulator/sys/win32/erl_win_dyn_driver.h b/erts/emulator/sys/win32/erl_win_dyn_driver.h index 6f28d513c2..0d1a6d4c87 100644 --- a/erts/emulator/sys/win32/erl_win_dyn_driver.h +++ b/erts/emulator/sys/win32/erl_win_dyn_driver.h @@ -40,7 +40,6 @@ WDD_TYPEDEF(int, driver_exit, (ErlDrvPort, int)); WDD_TYPEDEF(int, driver_failure_eof, (ErlDrvPort)); WDD_TYPEDEF(void, erl_drv_busy_msgq_limits, (ErlDrvPort, ErlDrvSizeT *, ErlDrvSizeT *)); WDD_TYPEDEF(int, driver_select, (ErlDrvPort, ErlDrvEvent, int, int)); -WDD_TYPEDEF(int, driver_event, (ErlDrvPort, ErlDrvEvent,ErlDrvEventData)); WDD_TYPEDEF(int, driver_output, (ErlDrvPort, char *, ErlDrvSizeT)); WDD_TYPEDEF(int, driver_output2, (ErlDrvPort, char *, ErlDrvSizeT ,char *, ErlDrvSizeT)); WDD_TYPEDEF(int, driver_output_binary, (ErlDrvPort, char *, ErlDrvSizeT, ErlDrvBinary*, ErlDrvSizeT, ErlDrvSizeT)); @@ -162,7 +161,7 @@ typedef struct { WDD_FTYPE(driver_failure_eof) *driver_failure_eof; WDD_FTYPE(erl_drv_busy_msgq_limits) *erl_drv_busy_msgq_limits; WDD_FTYPE(driver_select) *driver_select; - WDD_FTYPE(driver_event) *driver_event; + void *REMOVED_driver_event; WDD_FTYPE(driver_output) *driver_output; WDD_FTYPE(driver_output2) *driver_output2; WDD_FTYPE(driver_output_binary) *driver_output_binary; @@ -276,7 +275,6 @@ extern TWinDynDriverCallbacks WinDynDriverCallbacks; #define driver_failure_eof (WinDynDriverCallbacks.driver_failure_eof) #define erl_drv_busy_msgq_limits (WinDynDriverCallbacks.erl_drv_busy_msgq_limits) #define driver_select (WinDynDriverCallbacks.driver_select) -#define driver_event (WinDynDriverCallbacks.driver_event) #define driver_output (WinDynDriverCallbacks.driver_output) #define driver_output2 (WinDynDriverCallbacks.driver_output2) #define driver_output_binary (WinDynDriverCallbacks.driver_output_binary) @@ -414,7 +412,7 @@ do { \ ((W).driver_failure_eof) = driver_failure_eof; \ ((W).erl_drv_busy_msgq_limits) = erl_drv_busy_msgq_limits;\ ((W).driver_select) = driver_select; \ -((W).driver_event) = driver_event; \ +((W).REMOVED_driver_event) = NULL; \ ((W).driver_output) = driver_output; \ ((W).driver_output2) = driver_output2; \ ((W).driver_output_binary) = driver_output_binary; \ diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index b23dbecbac..0598a12351 100644 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -3235,40 +3235,16 @@ void erl_sys_init(void) SetStdHandle(STD_ERROR_HANDLE, GetStdHandle(STD_OUTPUT_HANDLE)); } erts_sys_init_float(); - erts_init_check_io(); /* Suppress windows error message popups */ SetErrorMode(SetErrorMode(0) | SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); } +void erts_poll_late_init(void); void erl_sys_late_init(void) { /* do nothing */ + erts_poll_late_init(); } - -void -erts_sys_schedule_interrupt(int set) -{ - erts_check_io_interrupt(set); -} - -void -erts_sys_schedule_interrupt_timed(int set, ErtsMonotonicTime timeout_time) -{ - erts_check_io_interrupt_timed(set, timeout_time); -} - -/* - * Called from schedule() when it runs out of runnable processes, - * or when Erlang code has performed INPUT_REDUCTIONS reduction - * steps. runnable == 0 iff there are no runnable Erlang processes. - */ -void -erl_sys_schedule(int runnable) -{ - erts_check_io(!runnable); - ERTS_LC_ASSERT(!erts_thr_progress_is_blocking()); -} - diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl index 5fca191679..33d0b708cf 100644 --- a/erts/emulator/test/driver_SUITE.erl +++ b/erts/emulator/test/driver_SUITE.erl @@ -43,9 +43,9 @@ outputv_errors/1, driver_unloaded/1, io_ready_exit/1, + use_fallback_pollset/0, use_fallback_pollset/1, bad_fd_in_pollset/1, - driver_event/1, fd_change/1, steal_control/1, otp_6602/1, @@ -58,11 +58,9 @@ ioq_exit_ready_output/1, ioq_exit_timeout/1, ioq_exit_ready_async/1, - ioq_exit_event/1, ioq_exit_ready_input_async/1, ioq_exit_ready_output_async/1, ioq_exit_timeout_async/1, - ioq_exit_event_async/1, zero_extended_marker_garb_drv/1, invalid_extended_marker_drv/1, larger_major_vsn_drv/1, @@ -86,6 +84,8 @@ -export([bin_prefix/2]). +-export([get_check_io_total/1]). % for z_SUITE.erl + -include_lib("common_test/include/ct.hrl"). @@ -119,17 +119,26 @@ -define(heap_binary_size, 64). init_per_testcase(Case, Config) when is_atom(Case), is_list(Config) -> - case catch erts_debug:get_internal_state(available_internal_state) of - true -> ok; - _ -> erts_debug:set_internal_state(available_internal_state, true) - end, + CIOD = rpc(Config, + fun() -> + case catch erts_debug:get_internal_state(available_internal_state) of + true -> ok; + _ -> erts_debug:set_internal_state(available_internal_state, true) + end, + erts_debug:get_internal_state(check_io_debug) + end), erlang:display({init_per_testcase, Case}), - 0 = element(1, erts_debug:get_internal_state(check_io_debug)), + 0 = element(1, CIOD), [{testcase, Case}|Config]. -end_per_testcase(Case, _Config) -> +end_per_testcase(Case, Config) -> erlang:display({end_per_testcase, Case}), - 0 = element(1, erts_debug:get_internal_state(check_io_debug)), + CIOD = rpc(Config, + fun() -> + get_stable_check_io_info(), + erts_debug:get_internal_state(check_io_debug) + end), + 0 = element(1, CIOD), ok. suite() -> @@ -137,10 +146,13 @@ suite() -> {timetrap, {minutes, 1}}]. all() -> %% Keep a_test first and z_test last... - [a_test, outputv_errors, outputv_echo, queue_echo, {group, timer}, - driver_unloaded, io_ready_exit, use_fallback_pollset, - bad_fd_in_pollset, driver_event, fd_change, - steal_control, otp_6602, driver_system_info_base_ver, + [a_test, outputv_errors, outputv_echo, queue_echo, + {group, timer}, + driver_unloaded, io_ready_exit, otp_6602, + {group, polling}, + {group, poll_thread}, + {group, poll_set}, + driver_system_info_base_ver, driver_system_info_prev_ver, driver_system_info_current_ver, driver_monitor, {group, ioq_exit}, zero_extended_marker_garb_drv, @@ -148,7 +160,6 @@ all() -> %% Keep a_test first and z_test last... larger_minor_vsn_drv, smaller_major_vsn_drv, smaller_minor_vsn_drv, peek_non_existing_queue, otp_6879, caller, many_events, missing_callbacks, - smp_select, driver_select_use, thread_mseg_alloc_cache_clean, otp_9302, thr_free_drv, @@ -161,11 +172,18 @@ groups() -> [{timer, [], [timer_measure, timer_cancel, timer_delay, timer_change]}, + {poll_thread, [], [{group, polling}]}, + {poll_set, [], [{group, polling}]}, + {polling, [], + [a_test, use_fallback_pollset, + bad_fd_in_pollset, fd_change, + steal_control, smp_select, + driver_select_use, z_test]}, {ioq_exit, [], [ioq_exit_ready_input, ioq_exit_ready_output, - ioq_exit_timeout, ioq_exit_ready_async, ioq_exit_event, + ioq_exit_timeout, ioq_exit_ready_async, ioq_exit_ready_input_async, ioq_exit_ready_output_async, - ioq_exit_timeout_async, ioq_exit_event_async]}]. + ioq_exit_timeout_async]}]. init_per_suite(Config) -> Config. @@ -173,10 +191,28 @@ init_per_suite(Config) -> end_per_suite(_Config) -> catch erts_debug:set_internal_state(available_internal_state, false). +init_per_group(poll_thread, Config) -> + [{node_args, "+IOt 2"} | Config]; +init_per_group(poll_set, Config) -> + [{node_args, "+IOt 2 +IOp 2"} | Config]; +init_per_group(polling, Config) -> + case proplists:get_value(node_args, Config) of + undefined -> + Config; + Args -> + {ok, Node} = start_node(polling, Args), + [{node, Node} | Config] + end; init_per_group(_GroupName, Config) -> Config. end_per_group(_GroupName, Config) -> + case proplists:get_value(node, Config) of + undefined -> + ok; + Node -> + stop_node(Node) + end, Config. %% Test sending bad types to port with an outputv-capable driver. @@ -778,21 +814,23 @@ io_ready_exit(Config) when is_list(Config) -> -define(CHKIO_STOP, 0). -define(CHKIO_USE_FALLBACK_POLLSET, 1). -define(CHKIO_BAD_FD_IN_POLLSET, 2). --define(CHKIO_DRIVER_EVENT, 3). -define(CHKIO_FD_CHANGE, 4). -define(CHKIO_STEAL, 5). -define(CHKIO_STEAL_AUX, 6). -define(CHKIO_SMP_SELECT, 7). -define(CHKIO_DRV_USE, 8). +use_fallback_pollset() -> + [{timetrap, {minutes, 2}}]. + use_fallback_pollset(Config) when is_list(Config) -> + rpc(Config, fun() -> use_fallback_pollset_t(Config) end). + +use_fallback_pollset_t(Config) when is_list(Config) -> FlbkFun = fun () -> - ChkIoDuring = erlang:system_info(check_io), - case lists:keysearch(fallback_poll_set_size, - 1, - ChkIoDuring) of - {value, - {fallback_poll_set_size, N}} when N > 0 -> + {Flbk, _} = get_fallback(erlang:system_info(check_io)), + case lists:keysearch(total_poll_set_size, 1, Flbk) of + {value, {total_poll_set_size, N}} when N > 0 -> ok; Error -> ct:fail({failed_to_use_fallback, Error}) @@ -814,6 +852,7 @@ use_fallback_pollset(Config) when is_list(Config) -> Skip -> {fun () -> ok end, Skip, ok} end, + io:format("Node = ~p~n",[node()]), case chkio_test_fini(chkio_test(Handel, ?CHKIO_USE_FALLBACK_POLLSET, fun () -> @@ -825,27 +864,31 @@ use_fallback_pollset(Config) when is_list(Config) -> end. bad_fd_in_pollset(Config) when is_list(Config) -> - chkio_test_fini(chkio_test(chkio_test_init(Config), - ?CHKIO_BAD_FD_IN_POLLSET, - fun () -> sleep(1000) end)). - -driver_event(Config) when is_list(Config) -> - chkio_test_fini(chkio_test(chkio_test_init(Config), - ?CHKIO_DRIVER_EVENT, - fun () -> sleep(1000) end)). + rpc(Config, + fun() -> + chkio_test_fini(chkio_test(chkio_test_init(Config), + ?CHKIO_BAD_FD_IN_POLLSET, + fun () -> sleep(1000) end)) + end). fd_change(Config) when is_list(Config) -> - chkio_test_fini(chkio_test(chkio_test_init(Config), - ?CHKIO_FD_CHANGE, - fun () -> sleep(1000) end)). + rpc(Config, + fun() -> + chkio_test_fini(chkio_test(chkio_test_init(Config), + ?CHKIO_FD_CHANGE, + fun () -> sleep(1000) end)) + end). steal_control(Config) when is_list(Config) -> - chkio_test_fini(case chkio_test_init(Config) of - {erts_poll_info, _} = Hndl -> - steal_control_test(Hndl); - Skip -> - Skip - end). + rpc(Config, + fun() -> + chkio_test_fini(case chkio_test_init(Config) of + {erts_poll_info, _} = Hndl -> + steal_control_test(Hndl); + Skip -> + Skip + end) + end). steal_control_test(Hndl = {erts_poll_info, Before}) -> Port = open_chkio_port(), @@ -887,7 +930,7 @@ chkio_test_init(Config) when is_list(Config) -> ChkIo = get_stable_check_io_info(), case catch lists:keysearch(name, 1, ChkIo) of {value, {name, erts_poll}} -> - io:format("Before test: ~p~n", [ChkIo]), + ct:log("Before test: ~p~n", [ChkIo]), Path = proplists:get_value(data_dir, Config), erl_ddll:start(), ok = load_driver(Path, 'chkio_drv'), @@ -948,8 +991,9 @@ chkio_test({erts_poll_info, Before}, "ok" -> chk_chkio_port(Port), Fun(), - During = erlang:system_info(check_io), + During = get_check_io_total(erlang:system_info(check_io)), erlang:display(During), + 0 = element(1, erts_debug:get_internal_state(check_io_debug)), io:format("During test: ~p~n", [During]), chk_chkio_port(Port), @@ -1001,22 +1045,83 @@ verify_chkio_state(Before, After) -> ok. get_stable_check_io_info() -> - ChkIo = erlang:system_info(check_io), - PendUpdNo = case lists:keysearch(pending_updates, 1, ChkIo) of - {value, {pending_updates, PendNo}} -> - PendNo; - false -> - 0 - end, - {value, {active_fds, ActFds}} = lists:keysearch(active_fds, 1, ChkIo), + get_stable_check_io_info(10). +get_stable_check_io_info(0) -> + get_check_io_total(erlang:system_info(check_io)); +get_stable_check_io_info(N) -> + ChkIo = get_check_io_total(erlang:system_info(check_io)), + PendUpdNo = proplists:get_value(pending_updates, ChkIo, 0), + ActFds = proplists:get_value(active_fds, ChkIo), case {PendUpdNo, ActFds} of {0, 0} -> ChkIo; _ -> - receive after 10 -> ok end, - get_stable_check_io_info() + receive after 100 -> ok end, + get_stable_check_io_info(N-1) end. +%% Merge return from erlang:system_info(check_io) +%% as if it was one big pollset. +get_check_io_total(ChkIo) -> + ct:log("ChkIo = ~p~n",[ChkIo]), + {Fallback, Rest} = get_fallback(ChkIo), + add_fallback_infos(Fallback, + lists:foldl(fun(Pollset, Acc) -> + lists:zipwith(fun(A, B) -> + add_pollset_infos(A,B) + end, + Pollset, Acc) + end, + hd(Rest), tl(Rest))). + +add_pollset_infos({Tag, A}=TA , {Tag, B}=TB) -> + case tag_type(Tag) of + sum -> + {Tag, A + B}; + const -> + case A of + B -> TA; + _ -> + ct:fail("Unexpected diff in pollsets ~p != ~p", + [TA,TB]) + end + end. + +get_fallback([MaybeFallback | ChkIo] = AllChkIo) -> + case proplists:get_value(fallback, MaybeFallback) of + true -> + {MaybeFallback, ChkIo}; + false -> + {undefined, AllChkIo} + end. + +add_fallback_infos(undefined, Acc) -> + Acc; +add_fallback_infos(Flbk, Acc) -> + lists:zipwith(fun({Tag, A}=TA, {Tag, B}=TB) -> + case tag_type(Tag) of + sum -> {Tag, A + B}; + const when Tag =:= fallback -> TA; + const -> TB + end + end, + Flbk, Acc). + +tag_type(name) -> const; +tag_type(primary) -> const; +tag_type(fallback) -> const; +tag_type(kernel_poll) -> const; +tag_type(memory_size) -> sum; +tag_type(total_poll_set_size) -> sum; +tag_type(lazy_updates) -> const; +tag_type(pending_updates) -> sum; +tag_type(batch_updates) -> const; +tag_type(concurrent_updates) -> const; +tag_type(max_fds) -> const; +tag_type(active_fds) -> sum; +tag_type(poll_threads) -> sum. + + %% Missed port lock when stealing control of fd from a %% driver that didn't use the same lock. The lock checker %% used to trigger on this and dump core. @@ -1336,11 +1441,9 @@ driver_monitor(Config) when is_list(Config) -> -define(IOQ_EXIT_READY_OUTPUT, 2). -define(IOQ_EXIT_TIMEOUT, 3). -define(IOQ_EXIT_READY_ASYNC, 4). --define(IOQ_EXIT_EVENT, 5). -define(IOQ_EXIT_READY_INPUT_ASYNC, 6). -define(IOQ_EXIT_READY_OUTPUT_ASYNC, 7). -define(IOQ_EXIT_TIMEOUT_ASYNC, 8). --define(IOQ_EXIT_EVENT_ASYNC, 9). ioq_exit_test(Config, TestNo) -> Drv = ioq_exit_drv, @@ -1393,9 +1496,6 @@ ioq_exit_timeout(Config) when is_list(Config) -> ioq_exit_ready_async(Config) when is_list(Config) -> ioq_exit_test(Config, ?IOQ_EXIT_READY_ASYNC). -ioq_exit_event(Config) when is_list(Config) -> - ioq_exit_test(Config, ?IOQ_EXIT_EVENT). - ioq_exit_ready_input_async(Config) when is_list(Config) -> ioq_exit_test(Config, ?IOQ_EXIT_READY_INPUT_ASYNC). @@ -1405,9 +1505,6 @@ ioq_exit_ready_output_async(Config) when is_list(Config) -> ioq_exit_timeout_async(Config) when is_list(Config) -> ioq_exit_test(Config, ?IOQ_EXIT_TIMEOUT_ASYNC). -ioq_exit_event_async(Config) when is_list(Config) -> - ioq_exit_test(Config, ?IOQ_EXIT_EVENT_ASYNC). - vsn_mismatch_test(Config, LoadResult) -> Path = proplists:get_value(data_dir, Config), @@ -1641,7 +1738,7 @@ missing_callbacks(Config) when is_list(Config) -> smp_select(Config) when is_list(Config) -> case os:type() of {win32,_} -> {skipped, "Test not implemented for this OS"}; - _ -> smp_select0(Config) + _ -> rpc(Config, fun() -> smp_select0(Config) end) end. smp_select0(Config) -> @@ -1697,7 +1794,7 @@ smp_select_wait(Pids, TimeoutMsg) -> driver_select_use(Config) when is_list(Config) -> case os:type() of {win32,_} -> {skipped, "Test not implemented for this OS"}; - _ -> driver_select_use0(Config) + _ -> rpc(Config, fun() -> driver_select_use0(Config) end) end. driver_select_use0(Config) -> @@ -2264,10 +2361,10 @@ count_proc_sched(Ps, PNs) -> end. a_test(Config) when is_list(Config) -> - check_io_debug(). + rpc(Config, fun check_io_debug/0). z_test(Config) when is_list(Config) -> - check_io_debug(). + rpc(Config, fun check_io_debug/0). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Utilities @@ -2275,8 +2372,8 @@ z_test(Config) when is_list(Config) -> check_io_debug() -> get_stable_check_io_info(), - {NoErrorFds, NoUsedFds, NoDrvSelStructs, NoDrvEvStructs} = CheckIoDebug - = erts_debug:get_internal_state(check_io_debug), + {NoErrorFds, NoUsedFds, NoDrvSelStructs, NoEnifSelStructs} + = CheckIoDebug = erts_debug:get_internal_state(check_io_debug), HasGetHost = has_gethost(), ct:log("check_io_debug: ~p~n" "HasGetHost: ~p",[CheckIoDebug, HasGetHost]), @@ -2289,7 +2386,7 @@ check_io_debug() -> %% one extra used fd that is not selected on ok end, - 0 = NoDrvEvStructs, + 0 = NoEnifSelStructs, ok. has_gethost() -> @@ -2459,15 +2556,19 @@ sleep(Ms) when is_integer(Ms), Ms >= 0 -> start_node(Config) when is_list(Config) -> + start_node(proplists:get_value(testcase, Config)); +start_node(Name) -> + start_node(Name, ""). +start_node(NodeName, Args) -> Pa = filename:dirname(code:which(?MODULE)), Name = list_to_atom(atom_to_list(?MODULE) ++ "-" - ++ atom_to_list(proplists:get_value(testcase, Config)) + ++ atom_to_list(NodeName) ++ "-" ++ integer_to_list(erlang:system_time(second)) ++ "-" ++ integer_to_list(erlang:unique_integer([positive]))), - test_server:start_node(Name, slave, [{args, "-pa "++Pa}]). + test_server:start_node(Name, slave, [{args, Args ++ " -pa "++Pa}]). stop_node(Node) -> test_server:stop_node(Node). @@ -2500,3 +2601,35 @@ driver_alloc_size() -> Sz0+Sz end, 0, CS) end. + +rpc(Config, Fun) -> + case proplists:get_value(node, Config) of + undefined -> + Fun(); + Node -> + Self = self(), + Ref = make_ref(), + Pid = spawn(Node, + fun() -> + Result + = try Fun() of + Res -> Res + catch E:R -> + {'EXIT',E,R,erlang:get_stacktrace()} + end, + Self ! {Ref, Result} + end), + MRef = monitor(process, Pid), + receive + {'DOWN', MRef, _Type, _Object, Info} -> + erlang:error({died, Pid, Info}); + {Ref, {'EXIT',E,R,ST}} -> + erlang:demonitor(MRef, [flush]), + erlang:raise(E,R,ST); + {Ref, Ret} -> + erlang:demonitor(MRef, [flush]), + Ret; + Other -> + ct:fail(Other) + end + end. diff --git a/erts/emulator/test/driver_SUITE_data/chkio_drv.c b/erts/emulator/test/driver_SUITE_data/chkio_drv.c index 8e5e81665c..d548c4b1dc 100644 --- a/erts/emulator/test/driver_SUITE_data/chkio_drv.c +++ b/erts/emulator/test/driver_SUITE_data/chkio_drv.c @@ -42,7 +42,6 @@ #define CHKIO_STOP 0 #define CHKIO_USE_FALLBACK_POLLSET 1 #define CHKIO_BAD_FD_IN_POLLSET 2 -#define CHKIO_DRIVER_EVENT 3 #define CHKIO_FD_CHANGE 4 #define CHKIO_STEAL 5 #define CHKIO_STEAL_AUX 6 @@ -67,15 +66,6 @@ typedef struct { } ChkioFallbackData; typedef struct { - int in_fd; - struct erl_drv_event_data in_data; - int in_ok; - int out_fd; - struct erl_drv_event_data out_data; - int out_ok; -} ChkioDriverEvent; - -typedef struct { int fds[2]; int same_fd; } ChkioFdChange; @@ -86,14 +76,10 @@ typedef struct { typedef struct { int driver_select_fds[2]; - int driver_event_fds[2]; - struct erl_drv_event_data event_data[2]; } ChkioSteal; typedef struct { int driver_select_fds[2]; - int driver_event_fds[2]; - struct erl_drv_event_data event_data[2]; } ChkioStealAux; @@ -141,7 +127,6 @@ static ErlDrvData chkio_drv_start(ErlDrvPort, char *); static void chkio_drv_stop(ErlDrvData); static void chkio_drv_ready_input(ErlDrvData, ErlDrvEvent); static void chkio_drv_ready_output(ErlDrvData, ErlDrvEvent); -static void chkio_drv_ready_event(ErlDrvData, ErlDrvEvent, ErlDrvEventData); static ErlDrvSSizeT chkio_drv_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, char **, ErlDrvSizeT); static void chkio_drv_timeout(ErlDrvData); @@ -164,7 +149,7 @@ static ErlDrvEntry chkio_drv_entry = { NULL, /* ready_async */ NULL, /* flush */ NULL, /* call */ - chkio_drv_ready_event, + NULL, /* unused_event_callback */ ERL_DRV_EXTENDED_MARKER, ERL_DRV_EXTENDED_MAJOR_VERSION, @@ -243,25 +228,6 @@ stop_use_fallback_pollset(ChkioDrvData *cddp) } static void -stop_driver_event(ChkioDrvData *cddp) -{ - if (cddp->test_data) { - ChkioDriverEvent *cdep = cddp->test_data; - cddp->test_data = NULL; - - if (cdep->in_fd >= 0) { - driver_event(cddp->port, (ErlDrvEvent) (ErlDrvSInt) cdep->in_fd, NULL); - close(cdep->in_fd); - } - if (cdep->out_fd >= 0) { - driver_event(cddp->port, (ErlDrvEvent) (ErlDrvSInt) cdep->out_fd, NULL); - close(cdep->out_fd); - } - driver_free(cdep); - } -} - -static void stop_fd_change(ChkioDrvData *cddp) { if (cddp->test_data) { @@ -305,14 +271,6 @@ stop_steal(ChkioDrvData *cddp) (ErlDrvEvent) (ErlDrvSInt) csp->driver_select_fds[1], DO_WRITE, 0); - if (csp->driver_event_fds[0] >= 0) - driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) csp->driver_event_fds[0], - NULL); - if (csp->driver_event_fds[1] >= 0) - driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) csp->driver_event_fds[1], - NULL); driver_free(csp); } } @@ -327,10 +285,6 @@ stop_steal_aux(ChkioDrvData *cddp) close(csap->driver_select_fds[0]); if (csap->driver_select_fds[1] >= 0) close(csap->driver_select_fds[1]); - if (csap->driver_event_fds[0] >= 0) - close(csap->driver_event_fds[0]); - if (csap->driver_event_fds[1] >= 0) - close(csap->driver_event_fds[1]); driver_free(csap); } } @@ -354,10 +308,13 @@ static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port) abort(); } case Selected: - driver_select(port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd, DO_READ, 0); - /*fall through*/ case Opened: - close(pip->read_fd); + TRACEF(("%T: Close pipe [%d->%d]\n", driver_mk_port(port), pip->write_fd, + pip->read_fd)); + if (pip->wasSelected) + driver_select(port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd, DO_READ|ERL_DRV_USE, 0); + else + close(pip->read_fd); close(pip->write_fd); pip->state = Closed; break; @@ -445,9 +402,6 @@ chkio_drv_stop(ErlDrvData drv_data) { case CHKIO_BAD_FD_IN_POLLSET: stop_bad_fd_in_pollset(cddp); break; - case CHKIO_DRIVER_EVENT: - stop_driver_event(cddp); - break; case CHKIO_FD_CHANGE: stop_fd_change(cddp); break; @@ -557,6 +511,9 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) driver_failure_atom(cddp->port, "input_fd_not_found"); break; } + case CHKIO_FD_CHANGE: + /* This may be triggered when an fd is closed while being selected on. */ + break; case CHKIO_STEAL: break; case CHKIO_STEAL_AUX: @@ -621,55 +578,6 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) } static void -chkio_drv_ready_event(ErlDrvData drv_data, - ErlDrvEvent event, - ErlDrvEventData event_data) -{ -#ifdef UNIX - ChkioDrvData *cddp = (ChkioDrvData *) drv_data; - switch (cddp->test) { - case CHKIO_DRIVER_EVENT: { -#ifdef HAVE_POLL_H - ChkioDriverEvent *cdep = cddp->test_data; - int fd = (int) (ErlDrvSInt) event; - if (fd == cdep->in_fd) { - if (event_data->events == POLLIN - && event_data->revents == POLLIN) { - cdep->in_ok++; - } - else { - driver_failure_atom(cddp->port, "invalid_input_fd_events"); - } - break; - } - if (fd == cdep->out_fd) { - if (event_data->events == POLLOUT - && event_data->revents == POLLOUT) { - cdep->out_ok++; - } - else { - driver_failure_atom(cddp->port, "invalid_output_fd_events"); - } - break; - } -#endif - } - case CHKIO_STEAL: -#ifdef HAVE_POLL_H - break; -#endif - case CHKIO_STEAL_AUX: -#ifdef HAVE_POLL_H - break; -#endif - default: - driver_failure_atom(cddp->port, "unexpected_ready_event"); - break; - } -#endif /* UNIX */ -} - -static void chkio_drv_timeout(ErlDrvData drv_data) { #ifdef UNIX @@ -779,25 +687,6 @@ chkio_drv_control(ErlDrvData drv_data, res_len = -1; stop_bad_fd_in_pollset(cddp); break; - case CHKIO_DRIVER_EVENT: { - ChkioDriverEvent *cdep = cddp->test_data; - if (!cdep->in_ok || !cdep->out_ok) { - if (!cdep->in_ok) - driver_failure_atom(cddp->port, "got_no_input_events"); - if (!cdep->out_ok) - driver_failure_atom(cddp->port, "got_no_output_events"); - } - else { - char *c = driver_alloc(sizeof(char)*2*30); - if (!c) - driver_failure_posix(cddp->port, ENOMEM); - *rbuf = c; - res_len = sprintf(c, "in=%d\nout=%d\n", - cdep->in_ok, cdep->out_ok); - } - stop_driver_event(cddp); - break; - } case CHKIO_FD_CHANGE: { ChkioFdChange *cfcp = cddp->test_data; if (!cfcp->same_fd) @@ -937,69 +826,6 @@ chkio_drv_control(ErlDrvData drv_data, res_len = -1; break; } - case CHKIO_DRIVER_EVENT: { -#ifndef HAVE_POLL_H - res_str = "skip: Need the poll.h header for this test, but it doesn't exist"; - res_len = -1; -#else /* HAVE_POLL_H */ - int in_fd = open("/dev/zero", O_RDONLY); - int out_fd = open("/dev/null", O_WRONLY); - - if (in_fd < 0 || out_fd < 0) { - if (in_fd >= 0) - close(in_fd); - if (out_fd >= 0) - close(out_fd); - driver_failure_posix(cddp->port, errno); - } - else { - ChkioDriverEvent *cdep = driver_alloc(sizeof(ChkioDriverEvent)); - if (!cdep) - driver_failure_posix(cddp->port, ENOMEM); - else { - int res; - cddp->test_data = cdep; - - cdep->in_fd = in_fd; - cdep->in_data.events = POLLIN; - cdep->in_data.revents = 0; - cdep->in_ok = 0; - - res = driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) in_fd, - &cdep->in_data); - if (res < 0) { - res_str = "skip: driver_event() not supported"; - res_len = -1; - close(in_fd); - close(out_fd); - cdep->in_fd = -1; - cdep->out_fd = -1; - } - else { - res_str = "ok"; - res_len = -1; - - cdep->out_fd = out_fd; - cdep->out_data.events = POLLOUT; - cdep->out_data.revents = 0; - cdep->out_ok = 0; - - res = driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) out_fd, - &cdep->out_data); - if (res < 0) { - close(out_fd); - cdep->out_fd = -1; - driver_failure_atom(cddp->port, "driver_event_failed"); - } - } - - } - } -#endif /* HAVE_POLL_H */ - break; - } case CHKIO_FD_CHANGE: { ChkioFdChange *cfcp = driver_alloc(sizeof(ChkioFdChange)); if (!cfcp) @@ -1028,58 +854,19 @@ chkio_drv_control(ErlDrvData drv_data, res_len = -1; } else { - int driver_event_fds[2]; int driver_select_fds[2]; cddp->test_data = csp; memcpy(c, buf, len); c[len] = '\0'; if (sscanf(c, - "fds:%d:%d:%d:%d", + "fds:%d:%d", &driver_select_fds[0], - &driver_select_fds[1], - &driver_event_fds[0], - &driver_event_fds[1]) != 4) - driver_failure_atom(cddp->port, "bad_input"); + &driver_select_fds[1]) != 2) + driver_failure_atom(cddp->port, "bad_input"); else { int res = 0; - if (driver_event_fds[0] < 0) { /* Have no working driver_event() ... */ - csp->driver_select_fds[0] = driver_select_fds[0]; /* In */ - csp->driver_select_fds[1] = driver_select_fds[1]; /* Out */ - csp->driver_event_fds[0] = -1; - csp->driver_event_fds[1] = -1; - } - else { /* Have working driver_event() ... */ -#ifndef HAVE_POLL_H - driver_failure_atom(cddp->port, "unexpected_result"); - res = -1; -#else - csp->driver_select_fds[0] = driver_select_fds[0]; /* In */ - csp->driver_event_fds[1] = driver_select_fds[1]; /* Out */ - csp->driver_event_fds[0] = driver_event_fds[0]; /* In */ - csp->driver_select_fds[1] = driver_event_fds[1]; /* Out */ - - /* Steal with driver_event() */ - - csp->event_data[0].events = POLLIN; - csp->event_data[0].revents = 0; - res = driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) csp->driver_event_fds[0], - &csp->event_data[0]); - if (res < 0) - driver_failure_atom(cddp->port, - "driver_event_failed_to_steal"); - if (res >= 0) { - csp->event_data[1].events = POLLOUT; - csp->event_data[1].revents = 0; - res = driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) csp->driver_event_fds[1], - &csp->event_data[1]); - if (res < 0) - driver_failure_atom(cddp->port, - "driver_event_failed_to_steal"); - } -#endif - } + csp->driver_select_fds[0] = driver_select_fds[0]; /* In */ + csp->driver_select_fds[1] = driver_select_fds[1]; /* Out */ /* Steal with driver_select() */ if (res >= 0) { @@ -1109,37 +896,17 @@ chkio_drv_control(ErlDrvData drv_data, break; } case CHKIO_STEAL_AUX: { - int read_fds[2]; - int write_fds[2]; + int read_fd; + int write_fd; - read_fds[0] = open("/dev/zero", O_RDONLY); - write_fds[0] = open("/dev/null", O_WRONLY); - -#ifdef HAVE_POLL_H - read_fds[1] = open("/dev/zero", O_RDONLY); - write_fds[1] = open("/dev/null", O_WRONLY); -#else - read_fds[1] = -1; - write_fds[1] = -1; -#endif + read_fd = open("/dev/zero", O_RDONLY); + write_fd = open("/dev/null", O_WRONLY); - if (read_fds[0] < 0 - || write_fds[0] < 0 -#ifdef HAVE_POLL_H - || read_fds[1] < 0 - || write_fds[1] < 0 -#endif - ) { - if (read_fds[0] < 0) - close(read_fds[0]); - if (write_fds[0] < 0) - close(write_fds[0]); -#ifdef HAVE_POLL_H - if (read_fds[1] < 0) - close(read_fds[1]); - if (write_fds[1] < 0) - close(write_fds[1]); -#endif + if (read_fd < 0 || write_fd < 0) { + if (read_fd < 0) + close(read_fd); + if (write_fd < 0) + close(write_fd); driver_failure_posix(cddp->port, errno); } else { @@ -1153,11 +920,8 @@ chkio_drv_control(ErlDrvData drv_data, int res; cddp->test_data = csap; - csap->driver_select_fds[0] = read_fds[0]; - csap->driver_select_fds[1] = write_fds[0]; - - csap->driver_event_fds[0] = read_fds[1]; - csap->driver_event_fds[1] = write_fds[1]; + csap->driver_select_fds[0] = read_fd; + csap->driver_select_fds[1] = write_fd; res = driver_select(cddp->port, (ErlDrvEvent) (ErlDrvSInt) csap->driver_select_fds[0], @@ -1173,32 +937,6 @@ chkio_drv_control(ErlDrvData drv_data, if (res < 0) driver_failure_atom(cddp->port, "driver_select_failed"); } -#ifdef HAVE_POLL_H - if (res >= 0) { - csap->event_data[0].events = POLLIN; - csap->event_data[0].revents = 0; - res = driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) csap->driver_event_fds[0], - &csap->event_data[0]); - if (res < 0) { - close(csap->driver_event_fds[0]); - csap->driver_event_fds[0] = -1; - close(csap->driver_event_fds[1]); - csap->driver_event_fds[1] = -1; - res = 0; - } - else { - csap->event_data[1].events = POLLOUT; - csap->event_data[1].revents = 0; - res = driver_event(cddp->port, - (ErlDrvEvent) (ErlDrvSInt) csap->driver_event_fds[1], - &csap->event_data[1]); - if (res < 0) - driver_failure_atom(cddp->port, - "driver_event_failed"); - } - } -#endif if (res < 0) { res_str = "error"; res_len = -1; @@ -1213,11 +951,9 @@ chkio_drv_control(ErlDrvData drv_data, else { *rbuf = c; res_len = sprintf(c, - "fds:%d:%d:%d:%d", + "fds:%d:%d", csap->driver_select_fds[0], - csap->driver_select_fds[1], - csap->driver_event_fds[0], - csap->driver_event_fds[1]); + csap->driver_select_fds[1]); } } } @@ -1257,7 +993,7 @@ chkio_drv_control(ErlDrvData drv_data, } TRACEF(("%T: Created pipe [%d->%d]\n", cddp->id, fds[1], fds[0])); pip->read_fd = fds[0]; - pip->write_fd = fds[1]; + pip->write_fd = fds[1]; pip->state = Opened; pip->wasSelected = 0; pip->next_write = pip->next_read = rand_r(&pip->rand_state) % 1024; @@ -1267,7 +1003,8 @@ chkio_drv_control(ErlDrvData drv_data, }/*fall through*/ case Opened: { if (op & 1) { - TRACEF(("%T: Write %d to opened pipe [%d->%d]\n", cddp->id, pip->next_write, pip->write_fd, pip->read_fd)); + TRACEF(("%T: Write %d to opened pipe [%d->%d]\n", cddp->id, + pip->next_write, pip->write_fd, pip->read_fd)); if (write(pip->write_fd, &pip->next_write, sizeof(int)) != sizeof(int)) { fprintf(stderr, "Failed to write to pipe fd=%d, errno=%d\n", pip->write_fd, errno); abort(); @@ -1276,8 +1013,12 @@ chkio_drv_control(ErlDrvData drv_data, } op >>= 1; if (pip->wasSelected && (op & 1)) { - TRACEF(("%T: Close pipe [%d->%d]\n", cddp->id, pip->write_fd, pip->read_fd)); - if (close(pip->read_fd) || close(pip->write_fd)) { + TRACEF(("%T: Close pipe [%d->%d]\n", cddp->id, pip->write_fd, + pip->read_fd)); + drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */ + if (driver_select(cddp->port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd, + DO_READ|ERL_DRV_USE, 0) + || close(pip->write_fd)) { fprintf(stderr, "Failed to close pipe, errno=%d\n", errno); abort(); } @@ -1285,8 +1026,10 @@ chkio_drv_control(ErlDrvData drv_data, break; } else { - TRACEF(("%T: Select on pipe [%d->%d]\n", cddp->id, pip->write_fd, pip->read_fd)); - if (driver_select(cddp->port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd, DO_READ, 1)) { + TRACEF(("%T: Select on pipe [%d->%d]\n", cddp->id, + pip->write_fd, pip->read_fd)); + if (driver_select(cddp->port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd, + DO_READ|ERL_DRV_USE, 1)) { fprintf(stderr, "driver_select failed for fd=%d\n", pip->read_fd); abort(); } @@ -1294,13 +1037,13 @@ chkio_drv_control(ErlDrvData drv_data, pip->wasSelected = 1; op >>= 1; if (pip->next_write != pip->next_read) { /* pipe not empty */ - if (op & 1) { + if (op & 1) { pip->state = Waiting; /* Wait for reader */ break; } op >>= 1; } - } + } }/*fall through*/ case Selected: if (op & 1) { @@ -1329,7 +1072,7 @@ chkio_drv_control(ErlDrvData drv_data, fprintf(stderr, "Failed to write to pipe fd=%d, errno=%d\n", pip->write_fd, errno); abort(); } - pip->next_write++; + pip->next_write++; } break; case Waiting: @@ -1583,7 +1326,12 @@ static void chkio_drv_stop_select(ErlDrvEvent e, void* null) if (!(drv_use_singleton.fd_stop_select < 0)) { assert_print("fd_stop_select<0", __LINE__); abort(); } - drv_use_singleton.fd_stop_select = (int)(long)e; + /* fd_stop_select counting is disabled if this is set to -2 */ + if (drv_use_singleton.fd_stop_select == -2) { + TRACEF(("closing %d\n", (int)(long)e)); + close((int)(long)e); + } else + drv_use_singleton.fd_stop_select = (int)(long)e; /* Can't call chkio_drv_use directly here. That could even be recursive. * Next timeout will detect it instead. */ diff --git a/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c b/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c index d87c2bec93..fa58e9d5ec 100644 --- a/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c +++ b/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c @@ -25,8 +25,7 @@ * - ready_input(), * - ready_output(), * - timeout(), - * - driver_async() -> read_async(), and - * - event() + * - driver_async() -> read_async() */ #ifndef UNIX @@ -65,11 +64,9 @@ typedef enum { IOQ_EXIT_READY_OUTPUT = 2, IOQ_EXIT_TIMEOUT = 3, IOQ_EXIT_READY_ASYNC = 4, - IOQ_EXIT_EVENT = 5, IOQ_EXIT_READY_INPUT_ASYNC = 6, IOQ_EXIT_READY_OUTPUT_ASYNC = 7, IOQ_EXIT_TIMEOUT_ASYNC = 8, - IOQ_EXIT_EVENT_ASYNC = 9 } IOQExitTest; typedef struct { @@ -80,9 +77,6 @@ typedef struct { int outstanding_async_task; long async_task; ErlDrvPDL pdl; -#ifdef HAVE_POLL_H - struct erl_drv_event_data event_data; -#endif } IOQExitDrvData; #define EV2FD(EV) ((int) ((long) (EV))) @@ -97,8 +91,6 @@ static ErlDrvSSizeT control(ErlDrvData, unsigned int, static void timeout(ErlDrvData drv_data); static void ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data); static void flush(ErlDrvData drv_data); -static void event(ErlDrvData drv_data, ErlDrvEvent event, - ErlDrvEventData event_data); static void async_invoke(void*); static void do_driver_async(IOQExitDrvData *); @@ -118,7 +110,7 @@ static ErlDrvEntry ioq_exit_drv_entry = { ready_async, flush, NULL /* call */, - event, + NULL /* unused_event_callback*/, ERL_DRV_EXTENDED_MARKER, ERL_DRV_EXTENDED_MAJOR_VERSION, ERL_DRV_EXTENDED_MINOR_VERSION, @@ -149,10 +141,6 @@ start(ErlDrvPort port, char *command) ddp->outstanding_async_task = 0; ddp->async_task = -1; ddp->pdl = driver_pdl_create(port); -#ifdef HAVE_POLL_H - ddp->event_data.events = (short) 0; - ddp->event_data.revents = (short) 0; -#endif return (ErlDrvData) ddp; } @@ -192,27 +180,6 @@ static ErlDrvSSizeT control(ErlDrvData drv_data, #else goto done; #endif - case IOQ_EXIT_EVENT: - case IOQ_EXIT_EVENT_ASYNC: -#ifdef UNIX -#ifdef HAVE_POLL_H - ddp->ofd = open("/dev/null", O_WRONLY); - if (ddp->ofd < 0) { - driver_failure_posix(ddp->port, errno); - return 0; - } - else if (driver_event(ddp->port, FD2EV(ddp->ofd), NULL) != 0) { - res_str = "skip: driver_event() not supported"; - goto done; - } -#else - res_str = "skip: No poll.h found which is needed for this test"; - goto done; -#endif - break; -#else /* UNIX */ - goto done; -#endif case IOQ_EXIT_TIMEOUT: case IOQ_EXIT_TIMEOUT_ASYNC: break; @@ -266,13 +233,6 @@ static void stop(ErlDrvData drv_data) close(ddp->ofd); } break; - case IOQ_EXIT_EVENT: - case IOQ_EXIT_EVENT_ASYNC: - if (ddp->ofd >= 0) { - driver_event(ddp->port, FD2EV(ddp->ofd), NULL); - close(ddp->ofd); - } - break; #endif case IOQ_EXIT_TIMEOUT: case IOQ_EXIT_TIMEOUT_ASYNC: @@ -302,13 +262,6 @@ static void flush(ErlDrvData drv_data) case IOQ_EXIT_READY_OUTPUT_ASYNC: driver_select(ddp->port, FD2EV(ddp->ofd), DO_WRITE, 1); break; - case IOQ_EXIT_EVENT: - case IOQ_EXIT_EVENT_ASYNC: -#ifdef HAVE_POLL_H - ddp->event_data.events |= POLLOUT; - driver_event(ddp->port, FD2EV(ddp->ofd), &ddp->event_data); -#endif - break; #endif case IOQ_EXIT_TIMEOUT: case IOQ_EXIT_TIMEOUT_ASYNC: @@ -395,30 +348,6 @@ static void ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data) } } -static void event(ErlDrvData drv_data, - ErlDrvEvent event, - ErlDrvEventData event_data) -{ - IOQExitDrvData *ddp = (IOQExitDrvData *) drv_data; - - PRINTF(("event(%p, %d, %p) called\r\n", drv_data, EV2FD(event), event_data)); - -#if defined(UNIX) && defined(HAVE_POLL_H) - if (ddp->ofd == EV2FD(event)) { - driver_event(ddp->port, FD2EV(ddp->ofd), NULL); - close(ddp->ofd); - ddp->ofd = -1; - if (ddp->test == IOQ_EXIT_EVENT_ASYNC) - do_driver_async(ddp); - else { - driver_pdl_lock(ddp->pdl); - driver_deq(ddp->port, 1); - driver_pdl_unlock(ddp->pdl); - } - } -#endif -} - static void async_invoke(void *arg) { PRINTF(("async_invoke(%p) called\r\n", arg)); diff --git a/erts/emulator/test/driver_SUITE_data/missing_callback_drv.c b/erts/emulator/test/driver_SUITE_data/missing_callback_drv.c index e7480d2e00..14838f0377 100644 --- a/erts/emulator/test/driver_SUITE_data/missing_callback_drv.c +++ b/erts/emulator/test/driver_SUITE_data/missing_callback_drv.c @@ -41,10 +41,6 @@ typedef struct { int ofd; int ifd; - int efd; -#ifdef HAVE_POLL_H - struct erl_drv_event_data edata; -#endif } mcd_data_t; static ErlDrvData start(ErlDrvPort port, char *command); @@ -90,7 +86,6 @@ start(ErlDrvPort port, char *command) mcd->ofd = -1; mcd->ifd = -1; - mcd->efd = -1; #ifdef UNIX @@ -105,15 +100,6 @@ start(ErlDrvPort port, char *command) goto error; if (driver_select(port, (ErlDrvEvent) (long) mcd->ifd, DO_READ, 1) != 0) goto error; - -#ifdef HAVE_POLL_H - mcd->efd = open("/dev/null", O_WRONLY); - if (mcd->efd < 0) - goto error; - mcd->edata.events = POLLOUT; - mcd->edata.revents = 0; - driver_event(port, (ErlDrvEvent) (long) mcd->efd, &mcd->edata); -#endif #endif driver_set_timer(port, 0); @@ -135,10 +121,6 @@ stop(ErlDrvData data) close(mcd->ofd); if (mcd->ifd >= 0) close(mcd->ifd); -#ifdef HAVE_POLL_H - if (mcd->efd >= 0) - close(mcd->efd); -#endif #endif driver_free(mcd); } diff --git a/erts/emulator/test/lttng_SUITE.erl b/erts/emulator/test/lttng_SUITE.erl index a012fa1da2..19c3844c40 100644 --- a/erts/emulator/test/lttng_SUITE.erl +++ b/erts/emulator/test/lttng_SUITE.erl @@ -81,7 +81,6 @@ end_per_testcase(Case, _Config) -> %% Not tested yet %% org_erlang_otp:driver_process_exit -%% org_erlang_otp:driver_event %% tracepoints %% @@ -100,7 +99,6 @@ end_per_testcase(Case, _Config) -> %% org_erlang_otp:driver_flush %% org_erlang_otp:driver_stop_select %% org_erlang_otp:driver_timeout -%% org_erlang_otp:driver_event %% org_erlang_otp:driver_ready_output %% org_erlang_otp:driver_ready_input %% org_erlang_otp:driver_output @@ -431,7 +429,6 @@ txt() -> "%% org_erlang_otp:driver_flush\n" "%% org_erlang_otp:driver_stop_select\n" "%% org_erlang_otp:driver_timeout\n" - "%% org_erlang_otp:driver_event\n" "%% org_erlang_otp:driver_ready_output\n" "%% org_erlang_otp:driver_ready_input\n" "%% org_erlang_otp:driver_output\n" diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl index 223bd7d586..bec2291867 100644 --- a/erts/emulator/test/nif_SUITE.erl +++ b/erts/emulator/test/nif_SUITE.erl @@ -25,13 +25,14 @@ %%-define(CHECK(Exp,Got), Exp = Got). -include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). -export([all/0, suite/0, groups/0, init_per_group/2, end_per_group/2, init_per_testcase/2, end_per_testcase/2, basic/1, reload_error/1, upgrade/1, heap_frag/1, t_on_load/1, - select/1, + select/1, select_steal/1, monitor_process_a/1, monitor_process_b/1, monitor_process_c/1, @@ -42,9 +43,9 @@ types/1, many_args/1, binaries/1, get_string/1, get_atom/1, maps/1, api_macros/1, - from_array/1, iolist_as_binary/1, resource/1, resource_binary/1, + from_array/1, iolist_as_binary/1, resource/1, resource_binary/1, resource_takeover/1, - threading/1, send/1, send2/1, send3/1, send_threaded/1, neg/1, + threading/1, send/1, send2/1, send3/1, send_threaded/1, neg/1, is_checks/1, get_length/1, make_atom/1, make_string/1, reverse_list_test/1, otp_9828/1, @@ -79,7 +80,7 @@ all() -> [{group, G} || G <- api_groups()] ++ [reload_error, heap_frag, types, many_args, - select, + select, select_steal, {group, monitor}, monitor_frenzy, hipe, @@ -144,7 +145,8 @@ init_per_testcase(nif_whereis_threaded, Config) -> true -> Config; false -> {skip, "No thread support"} end; -init_per_testcase(select, Config) -> +init_per_testcase(Select, Config) when Select =:= select; + Select =:= select_steal -> case os:type() of {win32,_} -> {skip, "Test not yet implemented for windows"}; @@ -152,6 +154,9 @@ init_per_testcase(select, Config) -> Config end; init_per_testcase(_Case, Config) -> + %% Clear any resource dtor data before test starts in case another tc + %% left it in a bad state + catch last_resource_dtor_call(), Config. end_per_testcase(t_on_load, _Config) -> @@ -590,7 +595,71 @@ select_3(_Config) -> {_,_,2} = last_resource_dtor_call(), ok. -check_stop_ret(?ERL_NIF_SELECT_STOP_CALLED) -> ok; +%% @doc The stealing child process for the select_steal test. Duplicates given +%% W/RFds and runs select on them to steal +select_steal_child_process(Parent, RFd) -> + %% Duplicate the resource with the same FD + {R2Fd, _R2Ptr} = dupe_resource_nif(RFd), + Ref2 = make_ref(), + + %% Try to select from the child pid (steal from parent) + ?assertEqual(0, select_nif(R2Fd, ?ERL_NIF_SELECT_READ, R2Fd, null, Ref2)), + ?assertEqual([], flush(0)), + ?assertEqual(eagain, read_nif(R2Fd, 1)), + + %% Check that now events arrive to this temporary process + Parent ! {self(), stage1}, % signal parent to send the <<"stolen1">> + + %% Receive <<"stolen1">> via enif_select + ?assertEqual(0, select_nif(R2Fd, ?ERL_NIF_SELECT_READ, R2Fd, null, Ref2)), + ?assertMatch([{select, R2Fd, Ref2, ready_input}], flush()), + ?assertEqual(<<"stolen1">>, read_nif(R2Fd, 7)), + + clear_select_nif(R2Fd), + + % do not do this here - stop_selecting(R2Fd, R2Rsrc, Ref2), + Parent ! {self(), done}. + +%% @doc Similar to select/1 test, make a double ended pipe. Then try to steal +%% the socket, see what happens. +select_steal(Config) when is_list(Config) -> + ensure_lib_loaded(Config), + + Ref = make_ref(), + {{RFd, RPtr}, {WFd, WPtr}} = pipe_nif(), + + %% Bind the socket to current pid in enif_select + ?assertEqual(0, select_nif(RFd, ?ERL_NIF_SELECT_READ, RFd, null, Ref)), + ?assertEqual([], flush(0)), + + %% Spawn a process and do some stealing + Parent = self(), + Pid = spawn_link(fun() -> select_steal_child_process(Parent, RFd) end), + + %% Signal from the child to send the first message + {Pid, stage1} = receive_any(), + ?assertEqual(ok, write_nif(WFd, <<"stolen1">>)), + + ?assertMatch([{Pid, done}], flush(1)), % synchronize with the child + + %% Try to select from the parent pid (steal back) + ?assertEqual(0, select_nif(RFd, ?ERL_NIF_SELECT_READ, RFd, Pid, Ref)), + + %% Ensure that no data is hanging and close. + %% Rfd is stolen at this point. + check_stop_ret(select_nif(WFd, ?ERL_NIF_SELECT_STOP, WFd, null, Ref)), + ?assertMatch([{fd_resource_stop, WPtr, _}], flush()), + {1, {WPtr, 1}} = last_fd_stop_call(), + + check_stop_ret(select_nif(RFd, ?ERL_NIF_SELECT_STOP, RFd, null, Ref)), + ?assertMatch([{fd_resource_stop, RPtr, _}], flush()), + {1, {RPtr, 1}} = last_fd_stop_call(), + + ?assert(is_closed_nif(WFd)), + + ok. + +check_stop_ret(?ERL_NIF_SELECT_STOP_CALLED) -> ok; check_stop_ret(?ERL_NIF_SELECT_STOP_SCHEDULED) -> ok. write_full(W, C) -> @@ -3193,10 +3262,12 @@ binary_to_term_nif(_, _, _) -> ?nif_stub. port_command_nif(_, _) -> ?nif_stub. format_term_nif(_,_) -> ?nif_stub. select_nif(_,_,_,_,_) -> ?nif_stub. +dupe_resource_nif(_) -> ?nif_stub. pipe_nif() -> ?nif_stub. write_nif(_,_) -> ?nif_stub. read_nif(_,_) -> ?nif_stub. is_closed_nif(_) -> ?nif_stub. +clear_select_nif(_) -> ?nif_stub. last_fd_stop_call() -> ?nif_stub. alloc_monitor_resource_nif() -> ?nif_stub. monitor_process_nif(_,_,_,_) -> ?nif_stub. diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c index b47d013bd2..79560a38aa 100644 --- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c +++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c @@ -2448,6 +2448,13 @@ static int get_fd(ErlNifEnv* env, ERL_NIF_TERM term, struct fd_resource** rsrc) return 1; } +/* Returns: badarg + * Or an enif_select result, which is a combination of bits: + * ERL_NIF_SELECT_STOP_CALLED = 1 + * ERL_NIF_SELECT_STOP_SCHEDULED = 2 + * ERL_NIF_SELECT_INVALID_EVENT = 4 + * ERL_NIF_SELECT_FAILED = 8 + */ static ERL_NIF_TERM select_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { struct fd_resource* fdr; @@ -2479,6 +2486,9 @@ static ERL_NIF_TERM select_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv } #ifndef __WIN32__ +/* + * Create a read-write pipe with two fds (to read and to write) + */ static ERL_NIF_TERM pipe_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { struct fd_resource* read_rsrc; @@ -2514,6 +2524,30 @@ static ERL_NIF_TERM pipe_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[] enif_make_tuple2(env, write_fd, make_pointer(env, write_rsrc))); } +/* + * Create (dupe) of a resource with the same fd, to test stealing + */ +static ERL_NIF_TERM dupe_resource_nif(ErlNifEnv* env, int argc, + const ERL_NIF_TERM argv[]) { + struct fd_resource* orig_rsrc; + + if (!get_fd(env, argv[0], &orig_rsrc)) { + return enif_make_badarg(env); + } else { + struct fd_resource* new_rsrc; + ERL_NIF_TERM new_fd; + + new_rsrc = enif_alloc_resource(fd_resource_type, + sizeof(struct fd_resource)); + new_rsrc->fd = orig_rsrc->fd; + new_rsrc->was_selected = 0; + new_fd = enif_make_resource(env, new_rsrc); + enif_release_resource(new_rsrc); + + return enif_make_tuple2(env, new_fd, make_pointer(env, new_rsrc)); + } +} + static ERL_NIF_TERM write_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { struct fd_resource* fdr; @@ -2589,6 +2623,20 @@ static ERL_NIF_TERM is_closed_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM a return fdr->fd < 0 ? atom_true : atom_false; } + +static ERL_NIF_TERM clear_select_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + struct fd_resource* fdr = NULL; + + if (!get_fd(env, argv[0], &fdr)) + return enif_make_badarg(env); + + fdr->fd = -1; + fdr->was_selected = 0; + + return atom_ok; +} + #endif /* !__WIN32__ */ @@ -3476,8 +3524,10 @@ static ErlNifFunc nif_funcs[] = #ifndef __WIN32__ {"pipe_nif", 0, pipe_nif}, {"write_nif", 2, write_nif}, + {"dupe_resource_nif", 1, dupe_resource_nif}, {"read_nif", 2, read_nif}, {"is_closed_nif", 1, is_closed_nif}, + {"clear_select_nif", 1, clear_select_nif}, #endif {"last_fd_stop_call", 0, last_fd_stop_call}, {"alloc_monitor_resource_nif", 0, alloc_monitor_resource_nif}, diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl index 12e26671c2..7afb82a1b8 100644 --- a/erts/emulator/test/scheduler_SUITE.erl +++ b/erts/emulator/test/scheduler_SUITE.erl @@ -57,6 +57,7 @@ scheduler_suspend_basic/1, scheduler_suspend/1, dirty_scheduler_threads/1, + poll_threads/1, reader_groups/1]). suite() -> @@ -72,6 +73,7 @@ all() -> {group, scheduler_bind}, scheduler_threads, scheduler_suspend_basic, scheduler_suspend, dirty_scheduler_threads, + poll_threads, reader_groups]. groups() -> @@ -1446,6 +1448,79 @@ sst5_loop(N) -> erlang:system_flag(multi_scheduling, unblock_normal), sst5_loop(N-1). +poll_threads(Config) when is_list(Config) -> + {Conc, PollType, KP} = get_ioconfig(Config), + {Sched, SchedOnln, _} = get_sstate(Config, ""), + + [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), + + [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), + + if + Conc -> + [5] = get_ionum(Config,"+IOt 5 +IOp 1"), + [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"), + [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"), + + [2] = get_ionum(Config, "+S 2 +IOPt 100"), + [4] = get_ionum(Config, "+S 4 +IOPt 100"), + [4] = get_ionum(Config, "+S 4:2 +IOPt 100"), + [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), + + fail = get_ionum(Config, "+IOt 1 +IOp 2"), + + ok; + not Conc -> + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"), + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"), + + [1, 1] = get_ionum(Config, "+S 2 +IOPt 100"), + [1, 1, 1, 1] = get_ionum(Config, "+S 4 +IOPt 100"), + [1, 1, 1, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"), + [1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), + + [1] = get_ionum(Config, "+IOt 1 +IOp 2"), + + ok + end, + + fail = get_ionum(Config, "+IOt 1 +IOPp 101"), + fail = get_ionum(Config, "+IOt 0"), + fail = get_ionum(Config, "+IOPt 101"), + + ok. + +get_ioconfig(Config) -> + [PS | _] = get_iostate(Config, ""), + {proplists:get_value(concurrent_updates, PS), + proplists:get_value(primary, PS), + proplists:get_value(kernel_poll, PS)}. + +get_ionum(Config, Cmd) -> + case get_iostate(Config, Cmd) of + fail -> fail; + PSs -> + lists:reverse( + lists:sort( + [proplists:get_value(poll_threads, PS) || PS <- PSs])) + end. + +get_iostate(Config, Cmd)-> + case start_node(Config, Cmd) of + {ok, Node} -> + [IOStates] = mcall(Node,[fun () -> + erlang:system_info(check_io) + end]), + IO = [IOState || IOState <- IOStates, + proplists:get_value(fallback, IOState) == false], + stop_node(Node), + IO; + {error,timeout} -> + fail + end. + reader_groups(Config) when is_list(Config) -> %% White box testing. These results are correct, but other results %% could be too... @@ -1770,18 +1845,24 @@ mcall(Node, Funs) -> Parent = self(), Refs = lists:map(fun (Fun) -> Ref = make_ref(), - spawn_link(Node, - fun () -> - Res = Fun(), - unlink(Parent), - Parent ! {Ref, Res} - end), - Ref + Pid = spawn(Node, + fun () -> + Res = Fun(), + unlink(Parent), + Parent ! {Ref, Res} + end), + MRef = erlang:monitor(process, Pid), + {Ref, MRef} end, Funs), - lists:map(fun (Ref) -> + lists:map(fun ({Ref, MRef}) -> receive {Ref, Res} -> - Res + receive + {'DOWN',MRef,_,_,_} -> + Res + end; + {'DOWN',MRef,_,_,Reason} -> + Reason end end, Refs). diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl index 7a396d273c..029a6de897 100644 --- a/erts/emulator/test/statistics_SUITE.erl +++ b/erts/emulator/test/statistics_SUITE.erl @@ -674,6 +674,16 @@ msacc_test(TmpFile) -> ets:insert(Tid, {1, hello}), ets:delete(Tid), + %% Check some IO + {ok, L} = gen_tcp:listen(0, [{active, true},{reuseaddr,true}]), + {ok, Port} = inet:port(L), + Pid = spawn(fun() -> + {ok, S} = gen_tcp:accept(L), + (fun F() -> receive M -> F() end end)() + end), + {ok, C} = gen_tcp:connect("localhost", Port, []), + [begin gen_tcp:send(C,"hello"),timer:sleep(1) end || _ <- lists:seq(1,100)], + %% Collect some garbage [erlang:garbage_collect() || _ <- lists:seq(1,100)], diff --git a/erts/emulator/test/z_SUITE.erl b/erts/emulator/test/z_SUITE.erl index feea7432a9..ac3df8bfbf 100644 --- a/erts/emulator/test/z_SUITE.erl +++ b/erts/emulator/test/z_SUITE.erl @@ -330,7 +330,7 @@ display_check_io(ChkIo) -> ok. get_check_io_info() -> - ChkIo = erlang:system_info(check_io), + ChkIo = driver_SUITE:get_check_io_total(erlang:system_info(check_io)), PendUpdNo = case lists:keysearch(pending_updates, 1, ChkIo) of {value, {pending_updates, PendNo}} -> PendNo; diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 6b194e25da..d61a3cbf95 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -810,6 +810,28 @@ int main(int argc, char **argv) add_Eargs(argv[i+1]); i++; break; + case 'I': + if (argv[i][2] == 'O' && (argv[i][3] == 't' || argv[i][3] == 'p')) { + if (argv[i][4] != '\0') + goto the_default; + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + break; + } + if (argv[i][2] == 'O' && argv[i][3] == 'P' && + (argv[i][4] == 't' || argv[i][4] == 'p')) { + if (argv[i][5] != '\0') + goto the_default; + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + break; + } + usage(argv[i]); + break; case 'S': if (argv[i][2] == 'P') { if (argv[i][3] != '\0') diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in index 8f70f879d5..95e065b156 100644 --- a/erts/etc/unix/etp-commands.in +++ b/erts/etc/unix/etp-commands.in @@ -2316,40 +2316,46 @@ end define etp-port-sched-flags-int # Args: int # - if ($arg0 & 0x1) + if ($arg0 & (1 << 0)) printf " in-run-queue" end - if ($arg0 & 0x2) + if ($arg0 & (1 << 1)) printf " executing" end - if ($arg0 & 0x4) + if ($arg0 & (1 << 2)) printf " have-tasks" end - if ($arg0 & 0x8) + if ($arg0 & (1 << 3)) printf " exited" end - if ($arg0 & 0x10) + if ($arg0 & (1 << 4)) printf " busy-port" end - if ($arg0 & 0x20) + if ($arg0 & (1 << 5)) printf " busy-port-q" end - if ($arg0 & 0x40) + if ($arg0 & (1 << 6)) printf " chk-unset-busy-port-q" end - if ($arg0 & 0x80) + if ($arg0 & (1 << 7)) printf " have-busy-tasks" end - if ($arg0 & 0x100) + if ($arg0 & (1 << 8)) printf " have-nosuspend-tasks" end - if ($arg0 & 0x200) + if ($arg0 & (1 << 9)) printf " parallelism" end - if ($arg0 & 0x400) + if ($arg0 & (1 << 10)) printf " force-sched" end - if ($arg0 & 0xfffff800) + if ($arg0 & (1 << 11)) + printf " exiting" + end + if ($arg0 & (1 << 12)) + printf " exec-imm" + end + if ($arg0 & 0xffffc000) printf " GARBAGE" end printf "\n" @@ -2850,6 +2856,14 @@ define etp-run-queue-info-internal printf " Pointer: (ErtsRunQueue *) %p\n", $runq end +define etp-fds + if $_exitsignal == -1 + call erts_check_io_debug(0) + else + printf "Not yet implemented for core files" + end +end + define etp-disasm-1 set $code_ptr = ((BeamInstr*)$arg0) set $addr = *$code_ptr @@ -4314,6 +4328,10 @@ document etp-init %--------------------------------------------------------------------------- end +define hook-run + set $_exitsignal = -1 +end + etp-init help etp-init etp-show diff --git a/lib/kernel/test/gen_udp_SUITE.erl b/lib/kernel/test/gen_udp_SUITE.erl index 8364fe7cdc..96e495505a 100644 --- a/lib/kernel/test/gen_udp_SUITE.erl +++ b/lib/kernel/test/gen_udp_SUITE.erl @@ -288,46 +288,56 @@ bad_address(Config) when is_list(Config) -> %% %% Starts a slave node that on command sends a bunch of messages %% to our UDP port. The receiving process just receives and -%% ignores the incoming messages, but counts them. -%% A tracing process traces the receiving process for -%% 'receive' and scheduling events. From the trace, -%% message contents is verified; and, how many messages -%% are received per in/out scheduling, which should be -%% the same as the read_packets parameter. -%% +%% ignores the incoming messages. +%% A tracing process traces the receiving port for +%% 'send' and scheduling events. From the trace, +%% how many messages are received per in/out scheduling, +%% which should never be more than the read_packet parameter. %% OTP-6249 UDP option for number of packet reads. read_packets(Config) when is_list(Config) -> N1 = 5, - N2 = 7, + N2 = 1, + Msgs = 30000, {ok,R} = gen_udp:open(0, [{read_packets,N1}]), {ok,RP} = inet:port(R), {ok,Node} = start_node(gen_udp_SUITE_read_packets), Die = make_ref(), - Loop = erlang:spawn_link(fun () -> infinite_loop(Die) end), %% - Msgs1 = [erlang:integer_to_list(M) || M <- lists:seq(1, N1*3)], - [V1|_] = read_packets_test(R, RP, Msgs1, Node), + {V1, Trace1} = read_packets_test(R, RP, Msgs, Node), {ok,[{read_packets,N1}]} = inet:getopts(R, [read_packets]), %% ok = inet:setopts(R, [{read_packets,N2}]), - Msgs2 = [erlang:integer_to_list(M) || M <- lists:seq(1, N2*3)], - [V2|_] = read_packets_test(R, RP, Msgs2, Node), + {V2, Trace2} = read_packets_test(R, RP, Msgs, Node), {ok,[{read_packets,N2}]} = inet:getopts(R, [read_packets]), %% stop_node(Node), - Mref = erlang:monitor(process, Loop), - Loop ! Die, - receive - {'DOWN',Mref,_,_, normal} -> - case {V1,V2} of - {N1,N2} -> - ok; - _ when V1 =/= N1, V2 =/= N2 -> - ok - end + ct:log("N1=~p, V1=~p vs N2=~p, V2=~p",[N1,V1,N2,V2]), + + dump_terms(Config, "trace1.terms", Trace2), + dump_terms(Config, "trace2.terms", Trace2), + + %% Because of the inherit racy-ness of the feature it is + %% hard to test that it behaves correctly. + %% Right now (OTP 21) a port task takes 5% of the + %% allotted port task reductions to execute, so + %% the max number of executions a port is allowed to + %% do before being re-scheduled is N * 20 + + if + V1 > (N1 * 20) -> + ct:fail("Got ~p msgs, max was ~p", [V1, N1]); + V2 > (N2 * 20) -> + ct:fail("Got ~p msgs, max was ~p", [V2, N2]); + true -> + ok end. +dump_terms(Config, Name, Terms) -> + FName = filename:join(proplists:get_value(priv_dir, Config),Name), + file:write_file(FName, term_to_binary(Terms)), + ct:log("Logged terms to ~s",[FName]). + infinite_loop(Die) -> receive Die -> @@ -338,7 +348,6 @@ infinite_loop(Die) -> end. read_packets_test(R, RP, Msgs, Node) -> - Len = length(Msgs), Receiver = self(), Tracer = spawn_link( @@ -363,24 +372,24 @@ read_packets_test(R, RP, Msgs, Node) -> [link,{priority,high}]), receive {Sender,{port,SP}} -> - erlang:trace(self(), true, - [running,'receive',{tracer,Tracer}]), + erlang:trace(R, true, + [running_ports,'send',{tracer,Tracer}]), erlang:yield(), Sender ! {Receiver,go}, - read_packets_recv(Len), - erlang:trace(self(), false, [all]), + read_packets_recv(Msgs), + erlang:trace(R, false, [all]), Tracer ! {Receiver,get_trace}, receive {Tracer,{trace,Trace}} -> - read_packets_verify(R, SP, Msgs, Trace) + {read_packets_verify(R, SP, Trace), Trace} end end. -read_packets_send(S, RP, [Msg|Msgs]) -> - ok = gen_udp:send(S, localhost, RP, Msg), - read_packets_send(S, RP, Msgs); -read_packets_send(_S, _RP, []) -> - ok. +read_packets_send(_S, _RP, 0) -> + ok; +read_packets_send(S, RP, Msgs) -> + ok = gen_udp:send(S, localhost, RP, "UDP FLOOOOOOD"), + read_packets_send(S, RP, Msgs - 1). read_packets_recv(0) -> ok; @@ -392,23 +401,24 @@ read_packets_recv(N) -> timeout end. -read_packets_verify(R, SP, Msg, Trace) -> - lists:reverse( - lists:sort(read_packets_verify(R, SP, Msg, Trace, 0))). - -read_packets_verify(R, SP, Msgs, [{trace,Self,OutIn,_}|Trace], M) - when Self =:= self(), OutIn =:= out; - Self =:= self(), OutIn =:= in -> - push(M, read_packets_verify(R, SP, Msgs, Trace, 0)); -read_packets_verify(R, SP, [Msg|Msgs], - [{trace,Self,'receive',{udp,R,{127,0,0,1},SP,Msg}} - |Trace], M) +read_packets_verify(R, SP, Trace) -> + [Max | _] = Pkts = lists:reverse(lists:sort(read_packets_verify(R, SP, Trace, 0))), + ct:pal("~p",[lists:sublist(Pkts,10)]), + Max. + +read_packets_verify(R, SP, [{trace,R,OutIn,_}|Trace], M) + when OutIn =:= out; OutIn =:= in -> + push(M, read_packets_verify(R, SP, Trace, 0)); +read_packets_verify(R, SP, [{trace, R,'receive',timeout}|Trace], M) -> + push(M, read_packets_verify(R, SP, Trace, 0)); +read_packets_verify(R, SP, + [{trace,R,'send',{udp,R,{127,0,0,1},SP,_Msg}, Self} | Trace], M) when Self =:= self() -> - read_packets_verify(R, SP, Msgs, Trace, M+1); -read_packets_verify(_R, _SP, [], [], M) -> + read_packets_verify(R, SP, Trace, M+1); +read_packets_verify(_R, _SP, [], M) -> push(M, []); -read_packets_verify(_R, _SP, Msgs, Trace, M) -> - ct:fail({read_packets_verify,mismatch,Msgs,Trace,M}). +read_packets_verify(_R, _SP, Trace, M) -> + ct:fail({read_packets_verify,mismatch,Trace,M}). push(0, Vs) -> Vs; diff --git a/system/doc/tutorial/port_driver.c b/system/doc/tutorial/port_driver.c index 37de67310f..8b441733ed 100644 --- a/system/doc/tutorial/port_driver.c +++ b/system/doc/tutorial/port_driver.c @@ -51,8 +51,7 @@ ErlDrvEntry example_driver_entry = { queue */ NULL, /* F_PTR call, much like control, sync call to driver */ - NULL, /* F_PTR event, called when an event selected - by driver_event() occurs. */ + NULL, /* unused */ ERL_DRV_EXTENDED_MARKER, /* int extended marker, Should always be set to indicate driver versioning */ ERL_DRV_EXTENDED_MAJOR_VERSION, /* int major_version, should always be |