aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/Makefile.in35
-rw-r--r--erts/emulator/beam/atom.c2
-rw-r--r--erts/emulator/beam/atom.names4
-rw-r--r--erts/emulator/beam/beam_bif_load.c59
-rw-r--r--erts/emulator/beam/beam_emu.c1
-rw-r--r--erts/emulator/beam/bif.c4
-rw-r--r--erts/emulator/beam/bif.tab27
-rw-r--r--erts/emulator/beam/big.c19
-rw-r--r--erts/emulator/beam/copy.c10
-rw-r--r--erts/emulator/beam/erl_alloc.c3
-rw-r--r--erts/emulator/beam/erl_alloc.types6
-rw-r--r--erts/emulator/beam/erl_alloc_util.c4
-rw-r--r--erts/emulator/beam/erl_async.c2
-rw-r--r--erts/emulator/beam/erl_bif_atomics.c256
-rw-r--r--erts/emulator/beam/erl_bif_counters.c255
-rw-r--r--erts/emulator/beam/erl_bif_lists.c838
-rw-r--r--erts/emulator/beam/erl_bif_persistent.c983
-rw-r--r--erts/emulator/beam/erl_bif_unique.h6
-rw-r--r--erts/emulator/beam/erl_db_tree.c12
-rw-r--r--erts/emulator/beam/erl_dirty_bif.tab2
-rw-r--r--erts/emulator/beam/erl_gc.c33
-rw-r--r--erts/emulator/beam/erl_hl_timer.c26
-rw-r--r--erts/emulator/beam/erl_init.c5
-rw-r--r--erts/emulator/beam/erl_lock_check.c2
-rw-r--r--erts/emulator/beam/erl_monitor_link.h15
-rw-r--r--erts/emulator/beam/erl_node_tables.c21
-rw-r--r--erts/emulator/beam/erl_port.h2
-rw-r--r--erts/emulator/beam/erl_port_task.c81
-rw-r--r--erts/emulator/beam/erl_port_task.h21
-rw-r--r--erts/emulator/beam/erl_process.c200
-rw-r--r--erts/emulator/beam/erl_process.h4
-rw-r--r--erts/emulator/beam/erl_process_dump.c40
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.h9
-rw-r--r--erts/emulator/beam/erl_thr_progress.c34
-rw-r--r--erts/emulator/beam/erl_thr_progress.h29
-rw-r--r--erts/emulator/beam/erl_trace.c15
-rw-r--r--erts/emulator/beam/erl_utils.h61
-rw-r--r--erts/emulator/beam/external.c3
-rw-r--r--erts/emulator/beam/global.h11
-rw-r--r--erts/emulator/beam/sys.h3
-rw-r--r--erts/emulator/beam/utils.c64
-rw-r--r--erts/emulator/drivers/common/inet_drv.c423
-rw-r--r--erts/emulator/nifs/common/prim_file_nif.c153
-rw-r--r--erts/emulator/nifs/common/prim_file_nif.h7
-rw-r--r--erts/emulator/nifs/unix/unix_prim_file.c7
-rw-r--r--erts/emulator/nifs/win32/win_prim_file.c164
-rw-r--r--erts/emulator/sys/common/erl_check_io.c318
-rw-r--r--erts/emulator/sys/common/erl_check_io.h21
-rw-r--r--erts/emulator/sys/common/erl_poll.c511
-rw-r--r--erts/emulator/sys/common/erl_poll.h14
-rw-r--r--erts/emulator/sys/common/erl_poll_api.h6
-rw-r--r--erts/emulator/sys/unix/sys_uds.c5
-rw-r--r--erts/emulator/sys/win32/erl_poll.c10
-rw-r--r--erts/emulator/test/Makefile3
-rw-r--r--erts/emulator/test/atomics_SUITE.erl150
-rw-r--r--erts/emulator/test/big_SUITE.erl57
-rw-r--r--erts/emulator/test/code_SUITE.erl37
-rw-r--r--erts/emulator/test/counters_SUITE.erl234
-rw-r--r--erts/emulator/test/driver_SUITE.erl17
-rw-r--r--erts/emulator/test/driver_SUITE_data/chkio_drv.c49
-rw-r--r--erts/emulator/test/persistent_term_SUITE.erl614
-rw-r--r--erts/emulator/test/scheduler_SUITE.erl27
-rw-r--r--erts/emulator/test/signal_SUITE.erl2
63 files changed, 5190 insertions, 846 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 054692819e..57a9d45887 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -633,21 +633,24 @@ GENERATE += $(TTF_DIR)/driver_tab.c
# This list must be consistent with PRE_LOADED_MODULES in
# erts/preloaded/src/Makefile.
-PRELOAD_BEAM = $(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \
- $(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \
- $(ERL_TOP)/erts/preloaded/ebin/init.beam \
- $(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \
- $(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \
- $(ERL_TOP)/erts/preloaded/ebin/prim_inet.beam \
- $(ERL_TOP)/erts/preloaded/ebin/prim_file.beam \
- $(ERL_TOP)/erts/preloaded/ebin/zlib.beam \
- $(ERL_TOP)/erts/preloaded/ebin/prim_zip.beam \
- $(ERL_TOP)/erts/preloaded/ebin/erl_prim_loader.beam \
- $(ERL_TOP)/erts/preloaded/ebin/erlang.beam \
- $(ERL_TOP)/erts/preloaded/ebin/erts_internal.beam \
- $(ERL_TOP)/erts/preloaded/ebin/erl_tracer.beam \
- $(ERL_TOP)/erts/preloaded/ebin/erts_literal_area_collector.beam \
- $(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam
+PRELOAD_BEAM = $(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/init.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/prim_inet.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/prim_file.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/zlib.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/prim_zip.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/erl_prim_loader.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/erlang.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/erts_internal.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/erl_tracer.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/erts_literal_area_collector.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/atomics.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/counters.beam \
+ $(ERL_TOP)/erts/preloaded/ebin/persistent_term.beam
ifeq ($(TARGET),win32)
# On windows the preloaded objects are in a resource object.
@@ -839,6 +842,8 @@ RUN_OBJS += \
$(OBJDIR)/erl_bif_ddll.o $(OBJDIR)/erl_bif_guard.o \
$(OBJDIR)/erl_bif_info.o $(OBJDIR)/erl_bif_op.o \
$(OBJDIR)/erl_bif_os.o $(OBJDIR)/erl_bif_lists.o \
+ $(OBJDIR)/erl_bif_persistent.o \
+ $(OBJDIR)/erl_bif_atomics.o $(OBJDIR)/erl_bif_counters.o \
$(OBJDIR)/erl_bif_trace.o $(OBJDIR)/erl_bif_unique.o \
$(OBJDIR)/erl_bif_wrap.o $(OBJDIR)/erl_nfunc_sched.o \
$(OBJDIR)/erl_guard_bifs.o $(OBJDIR)/erl_dirty_bif_wrap.o \
diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c
index 5381611fab..59b51fd15e 100644
--- a/erts/emulator/beam/atom.c
+++ b/erts/emulator/beam/atom.c
@@ -174,7 +174,7 @@ atom_alloc(Atom* tmpl)
/*
* Precompute ordinal value of first 3 bytes + 7 bits.
- * This is used by utils.c:erts_cmp_atoms().
+ * This is used by erl_utils.h:erts_cmp_atoms().
* We cannot use the full 32 bits of the first 4 bytes,
* since we use the sign of the difference between two
* ordinal values to represent their relative order.
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index fdce4f7b95..291bc95604 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -182,6 +182,7 @@ atom control
atom copy
atom copy_literals
atom counters
+atom count
atom cpu
atom cpu_timestamp
atom cr
@@ -287,6 +288,7 @@ atom gc_minor_end
atom gc_minor_start
atom Ge='>='
atom generational
+atom get_all_trap
atom get_seq_token
atom get_tcw
atom gather_gc_info_result
@@ -325,6 +327,7 @@ atom index
atom infinity
atom info
atom info_msg
+atom info_trap
atom init
atom initial_call
atom input
@@ -393,6 +396,7 @@ atom microsecond
atom microstate_accounting
atom milli_seconds
atom millisecond
+atom min
atom min_heap_size
atom min_bin_vheap_size
atom minor
diff --git a/erts/emulator/beam/beam_bif_load.c b/erts/emulator/beam/beam_bif_load.c
index d221e6aea6..bb1b2e5b27 100644
--- a/erts/emulator/beam/beam_bif_load.c
+++ b/erts/emulator/beam/beam_bif_load.c
@@ -1752,29 +1752,7 @@ BIF_RETTYPE erts_internal_purge_module_2(BIF_ALIST_2)
finalize_purge_operation(BIF_P, ret == am_true);
if (literals) {
- ErtsLiteralAreaRef *ref;
- ErtsMessage *mp;
- ref = erts_alloc(ERTS_ALC_T_LITERAL_REF,
- sizeof(ErtsLiteralAreaRef));
- ref->literal_area = literals;
- ref->next = NULL;
- erts_mtx_lock(&release_literal_areas.mtx);
- if (release_literal_areas.last) {
- release_literal_areas.last->next = ref;
- release_literal_areas.last = ref;
- }
- else {
- release_literal_areas.first = ref;
- release_literal_areas.last = ref;
- }
- erts_mtx_unlock(&release_literal_areas.mtx);
- mp = erts_alloc_message(0, NULL);
- ERL_MESSAGE_TOKEN(mp) = am_undefined;
- erts_queue_proc_message(BIF_P,
- erts_literal_area_collector,
- 0,
- mp,
- am_copy_literals);
+ erts_queue_release_literals(BIF_P, literals);
}
return ret;
@@ -1786,6 +1764,41 @@ BIF_RETTYPE erts_internal_purge_module_2(BIF_ALIST_2)
}
}
+void
+erts_queue_release_literals(Process* c_p, ErtsLiteralArea* literals)
+{
+ ErtsLiteralAreaRef *ref;
+ ErtsMessage *mp;
+ ref = erts_alloc(ERTS_ALC_T_LITERAL_REF,
+ sizeof(ErtsLiteralAreaRef));
+ ref->literal_area = literals;
+ ref->next = NULL;
+ erts_mtx_lock(&release_literal_areas.mtx);
+ if (release_literal_areas.last) {
+ release_literal_areas.last->next = ref;
+ release_literal_areas.last = ref;
+ } else {
+ release_literal_areas.first = ref;
+ release_literal_areas.last = ref;
+ }
+ erts_mtx_unlock(&release_literal_areas.mtx);
+ mp = erts_alloc_message(0, NULL);
+ ERL_MESSAGE_TOKEN(mp) = am_undefined;
+ if (c_p == NULL) {
+ erts_queue_message(erts_literal_area_collector,
+ 0,
+ mp,
+ am_copy_literals,
+ am_system);
+ } else {
+ erts_queue_proc_message(c_p,
+ erts_literal_area_collector,
+ 0,
+ mp,
+ am_copy_literals);
+ }
+}
+
/*
* Move code from current to old and null all export entries for the module
*/
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index ab5920a67e..e909a0b4da 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -579,6 +579,7 @@ init_emulator(void)
* the instructions' C labels to the loader.
* The second call starts execution of BEAM code. This call never returns.
*/
+ERTS_NO_RETPOLINE
void process_main(Eterm * x_reg_array, FloatDef* f_reg_array)
{
static int init_done = 0;
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index 79ed5a0c18..04498332b0 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -3622,6 +3622,10 @@ erts_internal_garbage_collect_1(BIF_ALIST_1)
default: BIF_ERROR(BIF_P, BADARG);
}
erts_garbage_collect(BIF_P, 0, NULL, 0);
+ if (ERTS_PROC_IS_EXITING(BIF_P)) {
+ /* The max heap size limit was reached. */
+ return THE_NON_VALUE;
+ }
return am_true;
}
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 7548924178..d4ba90a61a 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -40,6 +40,7 @@
# Note: Guards BIFs usually require special support in the compiler.
#
+
gcbif erlang:abs/1
bif erlang:adler32/1
bif erlang:adler32/2
@@ -698,3 +699,29 @@ ubif erlang:map_get/2
ubif erlang:is_map_key/2
bif ets:internal_delete_all/2
bif ets:internal_select_delete/2
+
+#
+# New in 21.2
+#
+
+bif persistent_term:put/2
+bif persistent_term:get/1
+bif persistent_term:get/0
+bif persistent_term:erase/1
+bif persistent_term:info/0
+bif erts_internal:erase_persistent_terms/0
+
+bif erts_internal:atomics_new/2
+bif atomics:get/2
+bif atomics:put/3
+bif atomics:add/3
+bif atomics:add_get/3
+bif atomics:exchange/3
+bif atomics:compare_exchange/4
+bif atomics:info/1
+
+bif erts_internal:counters_new/1
+bif erts_internal:counters_get/2
+bif erts_internal:counters_add/3
+bif erts_internal:counters_put/3
+bif erts_internal:counters_info/1
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index 84338769e0..7e0be2a2a7 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -668,27 +668,25 @@ static dsize_t I_mul(ErtsDigit* x, dsize_t xl, ErtsDigit* y, dsize_t yl, ErtsDig
static dsize_t I_sqr(ErtsDigit* x, dsize_t xl, ErtsDigit* r)
{
- ErtsDigit d_next = *x;
ErtsDigit d;
ErtsDigit* r0 = r;
ErtsDigit* s = r;
if ((r + xl) == x) /* "Inline" operation */
*x = 0;
- x++;
while(xl--) {
- ErtsDigit* y = x;
+ ErtsDigit* y;
ErtsDigit y_0 = 0, y_1 = 0, y_2 = 0, y_3 = 0;
ErtsDigit b0, b1;
ErtsDigit z0, z1, z2;
ErtsDigit t;
dsize_t y_l = xl;
-
+
+ d = *x;
+ x++;
+ y = x;
s = r;
- d = d_next;
- d_next = *x;
- x++;
DMUL(d, d, b1, b0);
DSUMc(*s, b0, y_3, t);
@@ -1159,8 +1157,11 @@ static dsize_t I_band(ErtsDigit* x, dsize_t xl, short xsgn,
*r++ = ~c1 & ~c2;
x++; y++;
}
- while(xl--)
- *r++ = ~*x++;
+ while(xl--) {
+ DSUBb(*x,0,b1,c1);
+ *r++ = ~c1;
+ x++;
+ }
}
}
return I_btrail(r0, r, sign);
diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c
index e7bfd04b73..e7bd046e18 100644
--- a/erts/emulator/beam/copy.c
+++ b/erts/emulator/beam/copy.c
@@ -1074,6 +1074,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info)
Eterm* ptr;
Eterm *lit_purge_ptr = info->lit_purge_ptr;
Uint lit_purge_sz = info->lit_purge_sz;
+ int copy_literals = info->copy_literals;
#ifdef DEBUG
Eterm mypid = erts_get_current_pid();
#endif
@@ -1119,7 +1120,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info)
/* off heap list pointers are copied verbatim */
if (erts_is_literal(obj,ptr)) {
VERBOSE(DEBUG_SHCOPY, ("[pid=%T] bypassed copying %p is %T\n", mypid, ptr, obj));
- if (in_literal_purge_area(ptr))
+ if (copy_literals || in_literal_purge_area(ptr))
info->literal_size += size_object(obj);
goto pop_next;
}
@@ -1170,7 +1171,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info)
/* off heap pointers to boxes are copied verbatim */
if (erts_is_literal(obj,ptr)) {
VERBOSE(DEBUG_SHCOPY, ("[pid=%T] bypassed copying %p is %T\n", mypid, ptr, obj));
- if (in_literal_purge_area(ptr))
+ if (copy_literals || in_literal_purge_area(ptr))
info->literal_size += size_object(obj);
goto pop_next;
}
@@ -1338,6 +1339,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info,
unsigned remaining;
Eterm *lit_purge_ptr = info->lit_purge_ptr;
Uint lit_purge_sz = info->lit_purge_sz;
+ int copy_literals = info->copy_literals;
#ifdef DEBUG
Eterm mypid = erts_get_current_pid();
Eterm saved_obj = obj;
@@ -1387,7 +1389,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info,
ptr = list_val(obj);
/* off heap list pointers are copied verbatim */
if (erts_is_literal(obj,ptr)) {
- if (!in_literal_purge_area(ptr)) {
+ if (!(copy_literals || in_literal_purge_area(ptr))) {
*resp = obj;
} else {
Uint bsz = 0;
@@ -1455,7 +1457,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info,
ptr = boxed_val(obj);
/* off heap pointers to boxes are copied verbatim */
if (erts_is_literal(obj,ptr)) {
- if (!in_literal_purge_area(ptr)) {
+ if (!(copy_literals || in_literal_purge_area(ptr))) {
*resp = obj;
} else {
Uint bsz = 0;
diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c
index 8fe1ccb758..9e36d5e0d1 100644
--- a/erts/emulator/beam/erl_alloc.c
+++ b/erts/emulator/beam/erl_alloc.c
@@ -4030,6 +4030,9 @@ debug_free(ErtsAlcType_t n, void *extra, void *ptr)
ASSERT(ERTS_ALC_N_MIN <= n && n <= ERTS_ALC_N_MAX);
+ if (!ptr)
+ return;
+
dptr = check_memory_fence(ptr, &size, n, ERTS_ALC_O_FREE);
#ifdef ERTS_ALC_A_EXEC
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 5409b89bab..4f03a34390 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -274,9 +274,13 @@ type ML_YIELD_STATE SHORT_LIVED SYSTEM monitor_link_yield_state
type ML_DIST STANDARD SYSTEM monitor_link_dist
type PF3_ARGS SHORT_LIVED PROCESSES process_flag_3_arguments
type SETUP_CONN_ARG SHORT_LIVED PROCESSES setup_connection_argument
+type LIST_TRAP SHORT_LIVED PROCESSES list_bif_trap_state
type ENVIRONMENT SYSTEM SYSTEM environment
+type PERSISTENT_TERM LONG_LIVED CODE persisten_term
+type PERSISTENT_LOCK_Q SHORT_LIVED SYSTEM persistent_lock_q
+
#
# Types used for special emulators
#
@@ -334,6 +338,8 @@ type GC_INFO_REQ SHORT_LIVED SYSTEM gc_info_request
type PORT_DATA_HEAP STANDARD SYSTEM port_data_heap
type MSACC DRIVER SYSTEM microstate_accounting
type SYS_CHECK_REQ SHORT_LIVED SYSTEM system_check_request
+type ATOMICS STANDARD SYSTEM erl_bif_atomics
+type COUNTERS STANDARD SYSTEM erl_bif_counters
#
# Types used by system specific code
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index a5740a08cf..0be4562785 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -834,6 +834,8 @@ static ERTS_INLINE void clr_bit(UWord* map, Uint ix)
&= ~((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS));
}
+#ifdef DEBUG
+
static ERTS_INLINE int is_bit_set(UWord* map, Uint ix)
{
ASSERT(ix / ERTS_VSPACE_WORD_BITS < VSPACE_MAP_SZ);
@@ -841,6 +843,8 @@ static ERTS_INLINE int is_bit_set(UWord* map, Uint ix)
& ((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS));
}
+#endif
+
UWord erts_literal_vspace_map[VSPACE_MAP_SZ];
static void set_literal_range(void* start, Uint size)
diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c
index 605a2b3461..44655ad5df 100644
--- a/erts/emulator/beam/erl_async.c
+++ b/erts/emulator/beam/erl_async.c
@@ -336,7 +336,7 @@ static ERTS_INLINE ErtsAsync *async_get(ErtsThrQ_t *q,
case ERTS_THR_Q_NEED_THR_PRGR:
{
ErtsThrPrgrVal prgr = erts_thr_q_need_thr_progress(q);
- erts_thr_progress_wakeup(NULL, prgr);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(NULL), prgr);
/*
* We do no dequeue finalizing in hope that a new async
* job will arrive before we are woken due to thread
diff --git a/erts/emulator/beam/erl_bif_atomics.c b/erts/emulator/beam/erl_bif_atomics.c
new file mode 100644
index 0000000000..029831bd95
--- /dev/null
+++ b/erts/emulator/beam/erl_bif_atomics.c
@@ -0,0 +1,256 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2018. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Purpose: High performance atomics.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stddef.h> /* offsetof */
+
+#include "sys.h"
+#include "export.h"
+#include "bif.h"
+#include "erl_threads.h"
+#include "big.h"
+#include "erl_binary.h"
+#include "erl_bif_unique.h"
+#include "erl_map.h"
+
+typedef struct
+{
+ int is_signed;
+ UWord vlen;
+ erts_atomic64_t v[1];
+}AtomicsRef;
+
+static int atomics_destructor(Binary *unused)
+{
+ return 1;
+}
+
+#define OPT_SIGNED (1 << 0)
+
+BIF_RETTYPE erts_internal_atomics_new_2(BIF_ALIST_2)
+{
+ AtomicsRef* p;
+ Binary* mbin;
+ UWord i, cnt, opts;
+ Uint bytes;
+ Eterm* hp;
+
+ if (!term_to_UWord(BIF_ARG_1, &cnt)
+ || cnt == 0
+ || !term_to_UWord(BIF_ARG_2, &opts)) {
+
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ if (cnt > (ERTS_UWORD_MAX / sizeof(p->v[0])))
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+
+ bytes = offsetof(AtomicsRef, v) + cnt*sizeof(p->v[0]);
+ mbin = erts_create_magic_binary_x(bytes,
+ atomics_destructor,
+ ERTS_ALC_T_ATOMICS,
+ 0);
+ p = ERTS_MAGIC_BIN_DATA(mbin);
+ p->is_signed = opts & OPT_SIGNED;
+ p->vlen = cnt;
+ for (i=0; i < cnt; i++)
+ erts_atomic64_init_nob(&p->v[i], 0);
+ hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE);
+ return erts_mk_magic_ref(&hp, &MSO(BIF_P), mbin);
+}
+
+static ERTS_INLINE int get_ref(Eterm ref, AtomicsRef** pp)
+{
+ Binary* mbin;
+ if (!is_internal_magic_ref(ref))
+ return 0;
+
+ mbin = erts_magic_ref2bin(ref);
+ if (ERTS_MAGIC_BIN_DESTRUCTOR(mbin) != atomics_destructor)
+ return 0;
+ *pp = ERTS_MAGIC_BIN_DATA(mbin);
+ return 1;
+}
+
+static ERTS_INLINE int get_ref_ix(Eterm ref, Eterm ix,
+ AtomicsRef** pp, UWord* ixp)
+{
+ return (get_ref(ref, pp)
+ && term_to_UWord(ix, ixp)
+ && --(*ixp) < (*pp)->vlen);
+}
+
+static ERTS_INLINE int get_value(AtomicsRef* p, Eterm term, erts_aint64_t *valp)
+{
+ return (p->is_signed ?
+ term_to_Sint64(term, (Sint64*)valp) :
+ term_to_Uint64(term, (Uint64*)valp));
+}
+
+static ERTS_INLINE int get_incr(AtomicsRef* p, Eterm term, erts_aint64_t *valp)
+{
+ return (term_to_Sint64(term, (Sint64*)valp)
+ || term_to_Uint64(term, (Uint64*)valp));
+}
+
+static ERTS_INLINE Eterm bld_atomic(Process* proc, AtomicsRef* p,
+ erts_aint64_t val)
+{
+ if (p->is_signed) {
+ if (IS_SSMALL(val))
+ return make_small((Sint) val);
+ else {
+ Uint hsz = ERTS_SINT64_HEAP_SIZE(val);
+ Eterm* hp = HAlloc(proc, hsz);
+ return erts_sint64_to_big(val, &hp);
+ }
+ }
+ else {
+ if ((Uint64)val <= MAX_SMALL)
+ return make_small((Sint) val);
+ else {
+ Uint hsz = ERTS_UINT64_HEAP_SIZE((Uint64)val);
+ Eterm* hp = HAlloc(proc, hsz);
+ return erts_uint64_to_big(val, &hp);
+ }
+ }
+}
+
+BIF_RETTYPE atomics_put_3(BIF_ALIST_3)
+{
+ AtomicsRef* p;
+ UWord ix;
+ erts_aint64_t val;
+
+ if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+ || !get_value(p, BIF_ARG_3, &val)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ erts_atomic64_set_mb(&p->v[ix], val);
+ return am_ok;
+}
+
+BIF_RETTYPE atomics_get_2(BIF_ALIST_2)
+{
+ AtomicsRef* p;
+ UWord ix;
+
+ if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ return bld_atomic(BIF_P, p, erts_atomic64_read_mb(&p->v[ix]));
+}
+
+BIF_RETTYPE atomics_add_3(BIF_ALIST_3)
+{
+ AtomicsRef* p;
+ UWord ix;
+ erts_aint64_t incr;
+
+ if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+ || !get_incr(p, BIF_ARG_3, &incr)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ erts_atomic64_add_mb(&p->v[ix], incr);
+ return am_ok;
+}
+
+BIF_RETTYPE atomics_add_get_3(BIF_ALIST_3)
+{
+ AtomicsRef* p;
+ UWord ix;
+ erts_aint64_t incr;
+
+ if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+ || !get_incr(p, BIF_ARG_3, &incr)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ return bld_atomic(BIF_P, p, erts_atomic64_add_read_mb(&p->v[ix], incr));
+}
+
+BIF_RETTYPE atomics_exchange_3(BIF_ALIST_3)
+{
+ AtomicsRef* p;
+ UWord ix;
+ erts_aint64_t desired, was;
+
+ if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+ || !get_value(p, BIF_ARG_3, &desired)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ was = erts_atomic64_xchg_mb(&p->v[ix], desired);
+ return bld_atomic(BIF_P, p, was);
+}
+
+BIF_RETTYPE atomics_compare_exchange_4(BIF_ALIST_4)
+{
+ AtomicsRef* p;
+ UWord ix;
+ erts_aint64_t expected, desired, was;
+
+ if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)
+ || !get_value(p, BIF_ARG_3, &expected)
+ || !get_value(p, BIF_ARG_4, &desired)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ was = erts_atomic64_cmpxchg_mb(&p->v[ix], desired, expected);
+ return was == expected ? am_ok : bld_atomic(BIF_P, p, was);
+}
+
+BIF_RETTYPE atomics_info_1(BIF_ALIST_1)
+{
+ AtomicsRef* p;
+ Uint hsz = MAP4_SZ;
+ Eterm *hp;
+ Uint64 max;
+ Sint64 min;
+ UWord memory;
+ Eterm max_val, min_val, sz_val, mem_val;
+
+ if (!get_ref(BIF_ARG_1, &p))
+ BIF_ERROR(BIF_P, BADARG);
+
+ max = p->is_signed ? ERTS_SINT64_MAX : ERTS_UINT64_MAX;
+ min = p->is_signed ? ERTS_SINT64_MIN : 0;
+ memory = erts_magic_ref2bin(BIF_ARG_1)->orig_size;
+
+ erts_bld_uint64(NULL, &hsz, max);
+ erts_bld_sint64(NULL, &hsz, min);
+ erts_bld_uword(NULL, &hsz, p->vlen);
+ erts_bld_uword(NULL, &hsz, memory);
+
+ hp = HAlloc(BIF_P, hsz);
+ max_val = erts_bld_uint64(&hp, NULL, max);
+ min_val = erts_bld_sint64(&hp, NULL, min);
+ sz_val = erts_bld_uword(&hp, NULL, p->vlen);
+ mem_val = erts_bld_uword(&hp, NULL, memory);
+
+ return MAP4(hp, am_max, max_val,
+ am_memory, mem_val,
+ am_min, min_val,
+ am_size, sz_val);
+}
diff --git a/erts/emulator/beam/erl_bif_counters.c b/erts/emulator/beam/erl_bif_counters.c
new file mode 100644
index 0000000000..7c8884ba32
--- /dev/null
+++ b/erts/emulator/beam/erl_bif_counters.c
@@ -0,0 +1,255 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2018. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Purpose: The implementation for 'counters' with 'write_concurrency'.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stddef.h> /* offsetof */
+
+#include "sys.h"
+#include "export.h"
+#include "bif.h"
+#include "erl_threads.h"
+#include "big.h"
+#include "erl_binary.h"
+#include "erl_bif_unique.h"
+#include "erl_map.h"
+
+/*
+ * Each logical counter consists of one 64-bit atomic instance per scheduler
+ * plus one instance for the "base value".
+ *
+ * get() reads all atomics for the counter and returns the sum.
+ * add() reads and writes only its own scheduler specific atomic instance.
+ * put() reads all scheduler specific atomics and writes a new base value.
+ */
+#define ATOMICS_PER_COUNTER (erts_no_schedulers + 1)
+
+#define ATOMICS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic64_t))
+
+typedef struct
+{
+ UWord arity;
+#ifdef DEBUG
+ UWord ulen;
+#endif
+ union {
+ erts_atomic64_t v[ATOMICS_PER_CACHE_LINE];
+ byte cache_line__[ERTS_CACHE_LINE_SIZE];
+ } u[1];
+}CountersRef;
+
+static int counters_destructor(Binary *mbin)
+{
+ return 1;
+}
+
+
+static UWord ERTS_INLINE div_ceil(UWord dividend, UWord divisor)
+{
+ return (dividend + divisor - 1) / divisor;
+}
+
+BIF_RETTYPE erts_internal_counters_new_1(BIF_ALIST_1)
+{
+ CountersRef* p;
+ Binary* mbin;
+ UWord ui, vi, cnt;
+ Uint bytes, cache_lines;
+ Eterm* hp;
+
+ if (!term_to_UWord(BIF_ARG_1, &cnt)
+ || cnt == 0) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ if (cnt > (ERTS_UWORD_MAX / (sizeof(erts_atomic64_t)*2*ATOMICS_PER_COUNTER)))
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+
+ cache_lines = ATOMICS_PER_COUNTER * div_ceil(cnt, ATOMICS_PER_CACHE_LINE);
+ bytes = offsetof(CountersRef, u) + cache_lines * ERTS_CACHE_LINE_SIZE;
+ mbin = erts_create_magic_binary_x(bytes,
+ counters_destructor,
+ ERTS_ALC_T_ATOMICS,
+ 0);
+ p = ERTS_MAGIC_BIN_DATA(mbin);
+ p->arity = cnt;
+
+#ifdef DEBUG
+ p->ulen = cache_lines;
+#endif
+ ASSERT((byte*)&p->u[cache_lines] <= ((byte*)p + bytes));
+ for (ui=0; ui < cache_lines; ui++)
+ for (vi=0; vi < ATOMICS_PER_CACHE_LINE; vi++)
+ erts_atomic64_init_nob(&p->u[ui].v[vi], 0);
+ hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE);
+ return erts_mk_magic_ref(&hp, &MSO(BIF_P), mbin);
+}
+
+static ERTS_INLINE int get_ref(Eterm ref, CountersRef** pp)
+{
+ Binary* mbin;
+ if (!is_internal_magic_ref(ref))
+ return 0;
+
+ mbin = erts_magic_ref2bin(ref);
+ if (ERTS_MAGIC_BIN_DESTRUCTOR(mbin) != counters_destructor)
+ return 0;
+ *pp = ERTS_MAGIC_BIN_DATA(mbin);
+ return 1;
+}
+
+static ERTS_INLINE int get_ref_cnt(Eterm ref, Eterm index,
+ CountersRef** pp,
+ erts_atomic64_t** app,
+ UWord sched_ix)
+{
+ CountersRef* p;
+ UWord ix, ui, vi;
+ if (!get_ref(ref, &p) || !term_to_UWord(index, &ix) || --ix >= p->arity)
+ return 0;
+ ui = (ix / ATOMICS_PER_CACHE_LINE) * ATOMICS_PER_COUNTER + sched_ix;
+ vi = ix % ATOMICS_PER_CACHE_LINE;
+ ASSERT(ui < p->ulen);
+ *pp = p;
+ *app = &p->u[ui].v[vi];
+ return 1;
+}
+
+static ERTS_INLINE int get_ref_my_cnt(Eterm ref, Eterm index,
+ CountersRef** pp,
+ erts_atomic64_t** app)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp));
+ ASSERT(esdp->no > 0 && esdp->no < ATOMICS_PER_COUNTER);
+ return get_ref_cnt(ref, index, pp, app, esdp->no);
+}
+
+static ERTS_INLINE int get_ref_first_cnt(Eterm ref, Eterm index,
+ CountersRef** pp,
+ erts_atomic64_t** app)
+{
+ return get_ref_cnt(ref, index, pp, app, 0);
+}
+
+static ERTS_INLINE int get_incr(CountersRef* p, Eterm term, erts_aint64_t *valp)
+{
+ return (term_to_Sint64(term, (Sint64*)valp)
+ || term_to_Uint64(term, (Uint64*)valp));
+}
+
+static ERTS_INLINE Eterm bld_counter(Process* proc, CountersRef* p,
+ erts_aint64_t val)
+{
+ if (IS_SSMALL(val))
+ return make_small((Sint) val);
+ else {
+ Uint hsz = ERTS_SINT64_HEAP_SIZE(val);
+ Eterm* hp = HAlloc(proc, hsz);
+ return erts_sint64_to_big(val, &hp);
+ }
+}
+
+BIF_RETTYPE erts_internal_counters_get_2(BIF_ALIST_2)
+{
+ CountersRef* p;
+ erts_atomic64_t* ap;
+ erts_aint64_t acc = 0;
+ int j;
+
+ if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &ap)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ for (j = ATOMICS_PER_COUNTER; j ; --j) {
+ acc += erts_atomic64_read_nob(ap);
+ ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE);
+ }
+ return bld_counter(BIF_P, p, acc);
+}
+
+BIF_RETTYPE erts_internal_counters_add_3(BIF_ALIST_3)
+{
+ CountersRef* p;
+ erts_atomic64_t* ap;
+ erts_aint64_t incr, sum;
+
+ if (!get_ref_my_cnt(BIF_ARG_1, BIF_ARG_2, &p, &ap)
+ || !get_incr(p, BIF_ARG_3, &incr)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ sum = incr + erts_atomic64_read_nob(ap);
+ erts_atomic64_set_nob(ap, sum);
+ return am_ok;
+}
+
+BIF_RETTYPE erts_internal_counters_put_3(BIF_ALIST_3)
+{
+ CountersRef* p;
+ erts_atomic64_t* first_ap;
+ erts_atomic64_t* ap;
+ erts_aint64_t acc;
+ erts_aint64_t val;
+ int j;
+
+ if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &first_ap)
+ || !term_to_Sint64(BIF_ARG_3, &val)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ ap = first_ap;
+ acc = 0;
+ j = ATOMICS_PER_COUNTER - 1;
+ do {
+ ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE);
+ acc += erts_atomic64_read_nob(ap);
+ } while (--j);
+ erts_atomic64_set_nob(first_ap, val-acc);
+
+ return am_ok;
+}
+
+BIF_RETTYPE erts_internal_counters_info_1(BIF_ALIST_1)
+{
+ CountersRef* p;
+ Uint hsz = MAP2_SZ;
+ Eterm *hp;
+ UWord memory;
+ Eterm sz_val, mem_val;
+
+ if (!get_ref(BIF_ARG_1, &p))
+ BIF_ERROR(BIF_P, BADARG);
+
+ memory = erts_magic_ref2bin(BIF_ARG_1)->orig_size;
+ erts_bld_uword(NULL, &hsz, p->arity);
+ erts_bld_uword(NULL, &hsz, memory);
+
+ hp = HAlloc(BIF_P, hsz);
+ sz_val = erts_bld_uword(&hp, NULL, p->arity);
+ mem_val = erts_bld_uword(&hp, NULL, memory);
+
+ return MAP2(hp, am_memory, mem_val,
+ am_size, sz_val);
+}
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index 395be67a90..aaf262780f 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -29,12 +29,13 @@
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
-#include "erl_process.h"
-#include "error.h"
#include "bif.h"
+#include "erl_binary.h"
+
static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List);
+
static BIF_RETTYPE append(Process* p, Eterm A, Eterm B)
{
Eterm list;
@@ -146,110 +147,732 @@ BIF_RETTYPE append_2(BIF_ALIST_2)
return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
}
-/*
- * erlang:'--'/2
- */
+/* erlang:'--'/2
+ *
+ * Subtracts a list from another (LHS -- RHS), removing the first occurrence of
+ * each element in LHS from RHS. There is no type coercion so the elements must
+ * match exactly.
+ *
+ * The BIF is broken into several stages that can all trap individually, and it
+ * chooses its algorithm based on input size. If either input is small it will
+ * use a linear scan tuned to which side it's on, and if both inputs are large
+ * enough it will convert RHS into a multiset to provide good asymptotic
+ * behavior. */
+
+#define SUBTRACT_LHS_THRESHOLD 16
+#define SUBTRACT_RHS_THRESHOLD 16
+
+typedef enum {
+ SUBTRACT_STAGE_START,
+ SUBTRACT_STAGE_LEN_LHS,
+
+ /* Naive linear scan that's efficient when
+ * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */
+ SUBTRACT_STAGE_NAIVE_LHS,
+
+ SUBTRACT_STAGE_LEN_RHS,
+
+ /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */
+ SUBTRACT_STAGE_NAIVE_RHS,
+
+ /* Creates a multiset from RHS for faster lookups before sweeping through
+ * LHS. The set is implemented as a red-black tree and duplicate elements
+ * are handled by a counter on each node. */
+ SUBTRACT_STAGE_SET_BUILD,
+ SUBTRACT_STAGE_SET_FINISH
+} ErtsSubtractCtxStage;
+
+typedef struct subtract_node__ {
+ struct subtract_node__ *parent;
+ struct subtract_node__ *left;
+ struct subtract_node__ *right;
+ int is_red;
+
+ Eterm key;
+ Uint count;
+} subtract_tree_t;
+
+typedef struct {
+ ErtsSubtractCtxStage stage;
+
+ Eterm lhs_original;
+ Eterm rhs_original;
+
+ Uint lhs_remaining;
+ Uint rhs_remaining;
+
+ Eterm iterator;
+
+ Eterm *result_cdr;
+ Eterm result;
+
+ union {
+ Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD];
+ Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD];
+
+ struct {
+ subtract_tree_t *tree;
+
+ /* A memory area for the tree's nodes, saving us the need to have
+ * one allocation per node. */
+ subtract_tree_t *alloc_start;
+ subtract_tree_t *alloc;
+ } rhs_set;
+ } u;
+} ErtsSubtractContext;
+
+#define ERTS_RBT_PREFIX subtract
+#define ERTS_RBT_T subtract_tree_t
+#define ERTS_RBT_KEY_T Eterm
+#define ERTS_RBT_FLAGS_T int
+#define ERTS_RBT_INIT_EMPTY_TNODE(T) \
+ do { \
+ (T)->parent = NULL; \
+ (T)->left = NULL; \
+ (T)->right = NULL; \
+ } while(0)
+#define ERTS_RBT_IS_RED(T) ((T)->is_red)
+#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1)
+#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T))
+#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0)
+#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red)
+#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F)
+#define ERTS_RBT_GET_PARENT(T) ((T)->parent)
+#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P)
+#define ERTS_RBT_GET_RIGHT(T) ((T)->right)
+#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R))
+#define ERTS_RBT_GET_LEFT(T) ((T)->left)
+#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L))
+#define ERTS_RBT_GET_KEY(T) ((T)->key)
+#define ERTS_RBT_CMP_KEYS(KX, KY) CMP_TERM(KX, KY)
+#define ERTS_RBT_WANT_LOOKUP_INSERT
+#define ERTS_RBT_WANT_LOOKUP
+#define ERTS_RBT_WANT_DELETE
+#define ERTS_RBT_UNDEF
+
+#include "erl_rbtree.h"
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context);
+
+static void subtract_ctx_dtor(ErtsSubtractContext *context) {
+ switch (context->stage) {
+ case SUBTRACT_STAGE_SET_BUILD:
+ case SUBTRACT_STAGE_SET_FINISH:
+ erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start);
+ break;
+ default:
+ break;
+ }
+}
-#define SMALL_VEC_SIZE 10
-static Eterm subtract(Process* p, Eterm A, Eterm B)
-{
- Eterm list;
- Eterm* hp;
- Uint need;
- Eterm res;
- Eterm small_vec[SMALL_VEC_SIZE]; /* Preallocated memory for small lists */
- Eterm* vec_p;
- Eterm* vp;
- Sint i;
- Sint n;
- Sint m;
-
- if ((n = erts_list_length(A)) < 0) {
- BIF_ERROR(p, BADARG);
+static int subtract_ctx_bin_dtor(Binary *context_bin) {
+ ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin);
+ subtract_ctx_dtor(context);
+ return 1;
+}
+
+static void subtract_ctx_move(ErtsSubtractContext *from,
+ ErtsSubtractContext *to) {
+ int uses_result_cdr = 0;
+
+ to->stage = from->stage;
+
+ to->lhs_original = from->lhs_original;
+ to->rhs_original = from->rhs_original;
+
+ to->lhs_remaining = from->lhs_remaining;
+ to->rhs_remaining = from->rhs_remaining;
+
+ to->iterator = from->iterator;
+ to->result = from->result;
+
+ switch (to->stage) {
+ case SUBTRACT_STAGE_NAIVE_LHS:
+ sys_memcpy(to->u.lhs_elements,
+ from->u.lhs_elements,
+ sizeof(Eterm) * to->lhs_remaining);
+ break;
+ case SUBTRACT_STAGE_NAIVE_RHS:
+ sys_memcpy(to->u.rhs_elements,
+ from->u.rhs_elements,
+ sizeof(Eterm) * to->rhs_remaining);
+
+ uses_result_cdr = 1;
+ break;
+ case SUBTRACT_STAGE_SET_FINISH:
+ uses_result_cdr = 1;
+ /* FALL THROUGH */
+ case SUBTRACT_STAGE_SET_BUILD:
+ to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start;
+ to->u.rhs_set.alloc = from->u.rhs_set.alloc;
+ to->u.rhs_set.tree = from->u.rhs_set.tree;
+ break;
+ default:
+ break;
}
- if ((m = erts_list_length(B)) < 0) {
- BIF_ERROR(p, BADARG);
+
+ if (uses_result_cdr) {
+ if (from->result_cdr == &from->result) {
+ to->result_cdr = &to->result;
+ } else {
+ to->result_cdr = from->result_cdr;
+ }
}
-
- if (n == 0)
- BIF_RET(NIL);
- if (m == 0)
- BIF_RET(A);
-
- /* allocate element vector */
- if (n <= SMALL_VEC_SIZE)
- vec_p = small_vec;
- else
- vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm));
-
- /* PUT ALL ELEMENTS IN VP */
- vp = vec_p;
- list = A;
- i = n;
- while(i--) {
- Eterm* listp = list_val(list);
- *vp++ = CAR(listp);
- list = CDR(listp);
+}
+
+static Eterm subtract_create_trap_state(Process *p,
+ ErtsSubtractContext *context) {
+ Binary *state_bin;
+ Eterm *hp;
+
+ state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext),
+ subtract_ctx_bin_dtor);
+
+ subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin));
+
+ hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+
+ return erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+}
+
+static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_LEN_LHS;
+
+ context->iterator = context->lhs_original;
+ context->lhs_remaining = 0;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_LEN_RHS;
+
+ context->iterator = context->rhs_original;
+ context->rhs_remaining = 0;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) {
+ static const Sint ELEMENTS_PER_RED = 32;
+
+ Sint budget, count;
+ Eterm iterator;
+
+ budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ iterator = *iterator_p;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ for (count = 0; count < budget && is_list(iterator); count++) {
+ iterator = CDR(list_val(iterator));
}
-
- /* UNMARK ALL DELETED CELLS */
- list = B;
- m = 0; /* number of deleted elements */
- while(is_list(list)) {
- Eterm* listp = list_val(list);
- Eterm elem = CAR(listp);
- i = n;
- vp = vec_p;
- while(i--) {
- if (is_value(*vp) && eq(*vp, elem)) {
- *vp = THE_NON_VALUE;
- m++;
- break;
- }
- vp++;
- }
- list = CDR(listp);
+
+ if (!is_list(iterator) && !is_nil(iterator)) {
+ return -1;
}
-
- if (m == n) /* All deleted ? */
- res = NIL;
- else if (m == 0) /* None deleted ? */
- res = A;
- else { /* REBUILD LIST */
- res = NIL;
- need = 2*(n - m);
- hp = HAlloc(p, need);
- vp = vec_p + n - 1;
- while(vp >= vec_p) {
- if (is_value(*vp)) {
- res = CONS(hp, *vp, res);
- hp += 2;
- }
- vp--;
- }
+
+ BUMP_REDS(p, count / ELEMENTS_PER_RED);
+
+ *iterator_p = iterator;
+ *count_p += count;
+
+ if (is_nil(iterator)) {
+ return 1;
}
- if (vec_p != small_vec)
- erts_free(ERTS_ALC_T_TMP, (void *) vec_p);
- BIF_RET(res);
+
+ return 0;
}
-BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2)
-{
- return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) {
+ Eterm iterator;
+ int i = 0;
+
+ context->stage = SUBTRACT_STAGE_NAIVE_LHS;
+
+ context->iterator = context->rhs_original;
+ context->result = NIL;
+
+ iterator = context->lhs_original;
+
+ while (is_list(iterator)) {
+ const Eterm *cell = list_val(iterator);
+
+ ASSERT(i < SUBTRACT_LHS_THRESHOLD);
+
+ context->u.lhs_elements[i++] = CAR(cell);
+ iterator = CDR(cell);
+ }
+
+ ASSERT(i == context->lhs_remaining);
+
+ return subtract_continue(p, context);
}
-BIF_RETTYPE subtract_2(BIF_ALIST_2)
-{
- return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 16;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+ while (checks < budget && is_list(context->iterator)) {
+ const Eterm *cell;
+ Eterm value, next;
+ int found_at;
+
+ cell = list_val(context->iterator);
+
+ value = CAR(cell);
+ next = CDR(cell);
+
+ for (found_at = 0; found_at < context->lhs_remaining; found_at++) {
+ if (EQ(value, context->u.lhs_elements[found_at])) {
+ /* We shift the array one step down as we have to preserve
+ * order.
+ *
+ * Note that we can't exit early as that would suppress errors
+ * in the right-hand side (this runs prior to determining the
+ * length of RHS). */
+
+ context->lhs_remaining--;
+ sys_memmove(&context->u.lhs_elements[found_at],
+ &context->u.lhs_elements[found_at + 1],
+ (context->lhs_remaining - found_at) * sizeof(Eterm));
+ break;
+ }
+ }
+
+ checks += MAX(1, context->lhs_remaining);
+ context->iterator = next;
+ }
+
+ BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ return 0;
+ } else if (!is_nil(context->iterator)) {
+ return -1;
+ }
+
+ if (context->lhs_remaining > 0) {
+ Eterm *hp;
+ int i;
+
+ hp = HAlloc(p, context->lhs_remaining * 2);
+
+ for (i = context->lhs_remaining - 1; i >= 0; i--) {
+ Eterm value = context->u.lhs_elements[i];
+
+ context->result = CONS(hp, value, context->result);
+ hp += 2;
+ }
+ }
+
+ ASSERT(context->lhs_remaining > 0 || context->result == NIL);
+
+ return 1;
}
+static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) {
+ Eterm iterator;
+ int i = 0;
+
+ context->stage = SUBTRACT_STAGE_NAIVE_RHS;
+
+ context->iterator = context->lhs_original;
+ context->result_cdr = &context->result;
+ context->result = NIL;
+
+ iterator = context->rhs_original;
+
+ while (is_list(iterator)) {
+ const Eterm *cell = list_val(iterator);
+
+ ASSERT(i < SUBTRACT_RHS_THRESHOLD);
+
+ context->u.rhs_elements[i++] = CAR(cell);
+ iterator = CDR(cell);
+ }
+
+ ASSERT(i == context->rhs_remaining);
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 16;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (checks < budget && is_list(context->iterator)) {
+ const Eterm *cell;
+ Eterm value, next;
+ int found_at;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) {
+ if (EQ(value, context->u.rhs_elements[found_at])) {
+ break;
+ }
+ }
+
+ if (found_at < 0) {
+ /* Destructively add the value to the result. This is safe
+ * since the GC is disabled and the unfinished term is never
+ * leaked to the outside world. */
+ Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+ *context->result_cdr = make_list(hp);
+ context->result_cdr = &CDR(hp);
+
+ CAR(hp) = value;
+ } else if (found_at >= 0) {
+ Eterm swap;
+
+ if (context->rhs_remaining-- == 1) {
+ /* We've run out of items to remove, so the rest of the
+ * result will be equal to the remainder of the input. We know
+ * that LHS is well-formed as any errors would've been reported
+ * during length determination. */
+ *context->result_cdr = next;
+
+ BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+ return 1;
+ }
+
+ swap = context->u.rhs_elements[context->rhs_remaining];
+ context->u.rhs_elements[found_at] = swap;
+ }
+
+ checks += context->rhs_remaining;
+ context->iterator = next;
+ context->lhs_remaining--;
+ }
+
+ /* The result only has to be terminated when returning it to the user, but
+ * we're doing it when trapping as well to prevent headaches when
+ * debugging. */
+ *context->result_cdr = NIL;
+
+ BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_SET_BUILD;
+
+ context->u.rhs_set.alloc_start =
+ erts_alloc(ERTS_ALC_T_LIST_TRAP,
+ context->rhs_remaining * sizeof(subtract_tree_t));
+
+ context->u.rhs_set.alloc = context->u.rhs_set.alloc_start;
+ context->u.rhs_set.tree = NULL;
+
+ context->iterator = context->rhs_original;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_set_build(Process *p, ErtsSubtractContext *context) {
+ const static Sint INSERTIONS_PER_RED = 16;
+ Sint budget, insertions;
+
+ budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ insertions = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (insertions < budget && is_list(context->iterator)) {
+ subtract_tree_t *existing_node, *new_node;
+ const Eterm *cell;
+ Eterm value, next;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ new_node = context->u.rhs_set.alloc;
+ new_node->key = value;
+ new_node->count = 1;
+
+ existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree,
+ new_node);
+
+ if (existing_node != NULL) {
+ existing_node->count++;
+ } else {
+ context->u.rhs_set.alloc++;
+ }
+
+ context->iterator = next;
+ insertions++;
+ }
+
+ BUMP_REDS(p, insertions / INSERTIONS_PER_RED);
+
+ ASSERT(is_list(context->iterator) || is_nil(context->iterator));
+ ASSERT(context->u.rhs_set.tree != NULL);
+
+ return is_nil(context->iterator);
+}
+
+static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_SET_FINISH;
+
+ context->result_cdr = &context->result;
+ context->result = NIL;
+
+ context->iterator = context->lhs_original;
+
+ return subtract_continue(p, context);
+}
+
+static int subtract_set_finish(Process *p, ErtsSubtractContext *context) {
+ const Sint CHECKS_PER_RED = 8;
+ Sint checks, budget;
+
+ budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+ checks = 0;
+
+#ifdef DEBUG
+ budget = budget / 10 + 1;
+#endif
+
+ while (checks < budget && is_list(context->iterator)) {
+ subtract_tree_t *node;
+ const Eterm *cell;
+ Eterm value, next;
+
+ cell = list_val(context->iterator);
+ value = CAR(cell);
+ next = CDR(cell);
+
+ ASSERT(context->rhs_remaining > 0);
+
+ node = subtract_rbt_lookup(context->u.rhs_set.tree, value);
+
+ if (node == NULL) {
+ Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+ *context->result_cdr = make_list(hp);
+ context->result_cdr = &CDR(hp);
+
+ CAR(hp) = value;
+ } else {
+ if (context->rhs_remaining-- == 1) {
+ *context->result_cdr = next;
+
+ BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+ return 1;
+ }
+
+ if (node->count-- == 1) {
+ subtract_rbt_delete(&context->u.rhs_set.tree, node);
+ }
+ }
+
+ context->iterator = next;
+ context->lhs_remaining--;
+ checks++;
+ }
+
+ *context->result_cdr = NIL;
+
+ BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+ if (is_list(context->iterator)) {
+ ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context) {
+ switch (context->stage) {
+ case SUBTRACT_STAGE_START: {
+ return subtract_enter_len_lhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_LEN_LHS: {
+ int res = subtract_get_length(p,
+ &context->iterator,
+ &context->lhs_remaining);
+
+ if (res != 1) {
+ return res;
+ }
+
+ if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) {
+ return subtract_enter_naive_lhs(p, context);
+ }
+
+ return subtract_enter_len_rhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_NAIVE_LHS: {
+ return subtract_naive_lhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_LEN_RHS: {
+ int res = subtract_get_length(p,
+ &context->iterator,
+ &context->rhs_remaining);
+
+ if (res != 1) {
+ return res;
+ }
+
+ /* We've walked through both lists fully now so we no longer need
+ * to check for errors past this point. */
+
+ if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) {
+ return subtract_enter_naive_rhs(p, context);
+ }
+
+ return subtract_enter_set_build(p, context);
+ }
+
+ case SUBTRACT_STAGE_NAIVE_RHS: {
+ return subtract_naive_rhs(p, context);
+ }
+
+ case SUBTRACT_STAGE_SET_BUILD: {
+ int res = subtract_set_build(p, context);
+
+ if (res != 1) {
+ return res;
+ }
+
+ return subtract_enter_set_finish(p, context);
+ }
+
+ case SUBTRACT_STAGE_SET_FINISH: {
+ return subtract_set_finish(p, context);
+ }
+
+ default:
+ ERTS_ASSERT(!"unreachable");
+ }
+}
+
+static int subtract_start(Process *p, Eterm lhs, Eterm rhs,
+ ErtsSubtractContext *context) {
+ context->stage = SUBTRACT_STAGE_START;
+
+ context->lhs_original = lhs;
+ context->rhs_original = rhs;
+
+ return subtract_continue(p, context);
+}
+
+/* erlang:'--'/2 */
+static Eterm subtract(Export *bif_entry, BIF_ALIST_2) {
+ Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2;
+
+ if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) {
+ /* We start with the context on the stack in the hopes that we won't
+ * have to trap. */
+ ErtsSubtractContext context;
+ int res;
+
+ res = subtract_start(BIF_P, lhs, rhs, &context);
+
+ if (res == 0) {
+ Eterm state_mref;
+
+ state_mref = subtract_create_trap_state(BIF_P, &context);
+ erts_set_gc_state(BIF_P, 0);
+
+ BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL);
+ }
+
+ subtract_ctx_dtor(&context);
+
+ if (res < 0) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ BIF_RET(context.result);
+ } else if (is_internal_magic_ref(lhs)) {
+ ErtsSubtractContext *context;
+ int (*dtor)(Binary*);
+ Binary *magic_bin;
+
+ int res;
+
+ magic_bin = erts_magic_ref2bin(lhs);
+ dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin);
+
+ if (dtor != subtract_ctx_bin_dtor) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ ASSERT(BIF_P->flags & F_DISABLE_GC);
+ ASSERT(rhs == NIL);
+
+ context = ERTS_MAGIC_BIN_DATA(magic_bin);
+ res = subtract_continue(BIF_P, context);
+
+ if (res == 0) {
+ BIF_TRAP2(bif_entry, BIF_P, lhs, NIL);
+ }
+
+ erts_set_gc_state(BIF_P, 1);
+
+ if (res < 0) {
+ ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry,
+ context->lhs_original,
+ context->rhs_original);
+ }
+
+ BIF_RET(context->result);
+ }
+
+ ASSERT(!(BIF_P->flags & F_DISABLE_GC));
+
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) {
+ return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS);
+}
+
+BIF_RETTYPE subtract_2(BIF_ALIST_2) {
+ return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS);
+}
+
+
BIF_RETTYPE lists_member_2(BIF_ALIST_2)
{
Eterm term;
Eterm list;
Eterm item;
int non_immed_key;
- int max_iter = 10 * CONTEXT_REDS;
+ int reds_left = ERTS_BIF_REDS_LEFT(BIF_P);
+ int max_iter = 16 * reds_left;
if (is_nil(BIF_ARG_2)) {
BIF_RET(am_false);
@@ -267,14 +890,15 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2)
}
item = CAR(list_val(list));
if ((item == term) || (non_immed_key && eq(item, term))) {
- BIF_RET2(am_true, CONTEXT_REDS - max_iter/10);
+ BIF_RET2(am_true, reds_left - max_iter/16);
}
list = CDR(list_val(list));
}
if (is_not_nil(list)) {
+ BUMP_REDS(BIF_P, reds_left - max_iter/16);
BIF_ERROR(BIF_P, BADARG);
}
- BIF_RET2(am_false, CONTEXT_REDS - max_iter/10);
+ BIF_RET2(am_false, reds_left - max_iter/16);
}
static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
@@ -283,7 +907,7 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
{
static const Uint CELLS_PER_RED = 40;
- Eterm *heap_top, *heap_end;
+ Eterm *alloc_top, *alloc_end;
Uint cells_left, max_cells;
Eterm list, tail;
Eterm lookahead;
@@ -305,18 +929,18 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
BIF_ERROR(c_p, BADARG);
}
- heap_top = HAlloc(c_p, 2 * (max_cells - cells_left));
- heap_end = heap_top + 2 * (max_cells - cells_left);
+ alloc_top = HAlloc(c_p, 2 * (max_cells - cells_left));
+ alloc_end = alloc_top + 2 * (max_cells - cells_left);
- while (heap_top < heap_end) {
+ while (alloc_top < alloc_end) {
Eterm *pair = list_val(list);
- tail = CONS(heap_top, CAR(pair), tail);
+ tail = CONS(alloc_top, CAR(pair), tail);
list = CDR(pair);
ASSERT(is_list(list) || is_nil(list));
- heap_top += 2;
+ alloc_top += 2;
}
if (is_nil(list)) {
@@ -333,7 +957,7 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
{
static const Uint CELLS_PER_RED = 60;
- Eterm *heap_top, *heap_end;
+ Eterm *alloc_start, *alloc_top, *alloc_end;
Uint cells_left, max_cells;
Eterm list, tail;
@@ -343,21 +967,27 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
cells_left = max_cells = CELLS_PER_RED * (1 + ERTS_BIF_REDS_LEFT(c_p));
ASSERT(HEAP_LIMIT(c_p) >= HEAP_TOP(c_p) + 2);
- heap_end = HEAP_LIMIT(c_p) - 2;
- heap_top = HEAP_TOP(c_p);
+ alloc_start = HEAP_TOP(c_p);
+ alloc_end = HEAP_LIMIT(c_p) - 2;
+ alloc_top = alloc_start;
+
+ /* Don't process more cells than we have reductions for. */
+ alloc_end = MIN(alloc_top + (cells_left * 2), alloc_end);
- while (heap_top < heap_end && is_list(list)) {
+ while (alloc_top < alloc_end && is_list(list)) {
Eterm *pair = list_val(list);
- tail = CONS(heap_top, CAR(pair), tail);
+ tail = CONS(alloc_top, CAR(pair), tail);
list = CDR(pair);
- heap_top += 2;
+ alloc_top += 2;
}
- cells_left -= (heap_top - heap_end) / 2;
+ cells_left -= (alloc_top - alloc_start) / 2;
+ HEAP_TOP(c_p) = alloc_top;
+
+ ASSERT(cells_left >= 0 && cells_left <= max_cells);
BUMP_REDS(c_p, (max_cells - cells_left) / CELLS_PER_RED);
- HEAP_TOP(c_p) = heap_top;
if (is_nil(list)) {
BIF_RET(tail);
diff --git a/erts/emulator/beam/erl_bif_persistent.c b/erts/emulator/beam/erl_bif_persistent.c
new file mode 100644
index 0000000000..9dca768a18
--- /dev/null
+++ b/erts/emulator/beam/erl_bif_persistent.c
@@ -0,0 +1,983 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2018. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Purpose: Implement persistent term storage.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_vm.h"
+#include "global.h"
+#include "erl_process.h"
+#include "error.h"
+#include "erl_driver.h"
+#include "bif.h"
+#include "erl_map.h"
+#include "erl_binary.h"
+
+/*
+ * The limit for the number of persistent terms before
+ * a warning is issued.
+ */
+
+#define WARNING_LIMIT 20000
+#define XSTR(s) STR(s)
+#define STR(s) #s
+
+/*
+ * Parameters for the hash table.
+ */
+#define INITIAL_SIZE 8
+#define LOAD_FACTOR ((Uint)50)
+#define MUST_GROW(t) (((Uint)100) * t->num_entries >= LOAD_FACTOR * t->allocated)
+#define MUST_SHRINK(t) (((Uint)200) * t->num_entries <= LOAD_FACTOR * t->allocated && \
+ t->allocated > INITIAL_SIZE)
+
+typedef struct hash_table {
+ Uint allocated;
+ Uint num_entries;
+ Uint mask;
+ Uint first_to_delete;
+ Uint num_to_delete;
+ erts_atomic_t refc;
+ struct hash_table* delete_next;
+ ErtsThrPrgrLaterOp thr_prog_op;
+ Eterm term[1];
+} HashTable;
+
+typedef struct trap_data {
+ HashTable* table;
+ Uint idx;
+ Uint remaining;
+ Uint memory; /* Used by info/0 to count used memory */
+} TrapData;
+
+/*
+ * Declarations of local functions.
+ */
+
+static HashTable* create_initial_table(void);
+static Uint lookup(HashTable* hash_table, Eterm key);
+static HashTable* copy_table(HashTable* old_table, Uint new_size, int rehash);
+static HashTable* tmp_table_copy(HashTable* old_table);
+static int try_seize_update_permission(Process* c_p);
+static void release_update_permission(int release_updater);
+static void table_updater(void* table);
+static void table_deleter(void* hash_table);
+static void dec_table_refc(Process* c_p, HashTable* old_table);
+static void delete_table(Process* c_p, HashTable* table);
+static void mark_for_deletion(HashTable* hash_table, Uint entry_index);
+static ErtsLiteralArea* term_to_area(Eterm tuple);
+static void suspend_updater(Process* c_p);
+static Eterm do_get_all(Process* c_p, TrapData* trap_data, Eterm res);
+static Eterm do_info(Process* c_p, TrapData* trap_data);
+static void append_to_delete_queue(HashTable* table);
+static HashTable* next_to_delete(void);
+static Eterm alloc_trap_data(Process* c_p);
+static int cleanup_trap_data(Binary *bp);
+
+/*
+ * Traps
+ */
+
+static Export persistent_term_get_all_export;
+static BIF_RETTYPE persistent_term_get_all_trap(BIF_ALIST_2);
+static Export persistent_term_info_export;
+static BIF_RETTYPE persistent_term_info_trap(BIF_ALIST_1);
+
+/*
+ * Pointer to the current hash table.
+ */
+
+static erts_atomic_t the_hash_table;
+
+/*
+ * Queue of processes waiting to update the hash table.
+ */
+
+struct update_queue_item {
+ Process *p;
+ struct update_queue_item* next;
+};
+
+static erts_mtx_t update_table_permission_mtx;
+static struct update_queue_item* update_queue = NULL;
+static Process* updater_process = NULL;
+
+/* Protected by update_table_permission_mtx */
+static ErtsThrPrgrLaterOp thr_prog_op;
+static int issued_warning = 0;
+
+/*
+ * Queue of hash tables to be deleted.
+ */
+
+static erts_mtx_t delete_queue_mtx;
+static HashTable* delete_queue_head = NULL;
+static HashTable** delete_queue_tail = &delete_queue_head;
+
+/*
+ * The following variables are only used during crash dumping. They
+ * are intialized by erts_init_persistent_dumping().
+ */
+
+ErtsLiteralArea** erts_persistent_areas;
+Uint erts_num_persistent_areas;
+
+void erts_init_bif_persistent_term(void)
+{
+ HashTable* hash_table;
+
+ /*
+ * Initialize the mutex protecting updates.
+ */
+
+ erts_mtx_init(&update_table_permission_mtx,
+ "update_persistent_term_permission",
+ NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC |
+ ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
+
+ /*
+ * Initialize delete queue.
+ */
+
+ erts_mtx_init(&delete_queue_mtx,
+ "persistent_term_delete_permission",
+ NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC |
+ ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
+
+ /*
+ * Allocate a small initial hash table.
+ */
+
+ hash_table = create_initial_table();
+ erts_atomic_init_nob(&the_hash_table, (erts_aint_t)hash_table);
+
+ /*
+ * Initialize export entry for traps
+ */
+
+ erts_init_trap_export(&persistent_term_get_all_export,
+ am_persistent_term, am_get_all_trap, 2,
+ &persistent_term_get_all_trap);
+ erts_init_trap_export(&persistent_term_info_export,
+ am_persistent_term, am_info_trap, 1,
+ &persistent_term_info_trap);
+}
+
+BIF_RETTYPE persistent_term_put_2(BIF_ALIST_2)
+{
+ Eterm key;
+ Eterm term;
+ Eterm heap[3];
+ Eterm tuple;
+ HashTable* hash_table;
+ Uint term_size;
+ Uint lit_area_size;
+ ErlOffHeap code_off_heap;
+ ErtsLiteralArea* literal_area;
+ erts_shcopy_t info;
+ Eterm* ptr;
+ Uint entry_index;
+
+ if (!try_seize_update_permission(BIF_P)) {
+ ERTS_BIF_YIELD2(bif_export[BIF_persistent_term_put_2],
+ BIF_P, BIF_ARG_1, BIF_ARG_2);
+ }
+
+ hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+
+ key = BIF_ARG_1;
+ term = BIF_ARG_2;
+
+ entry_index = lookup(hash_table, key);
+
+ heap[0] = make_arityval(2);
+ heap[1] = key;
+ heap[2] = term;
+ tuple = make_tuple(heap);
+
+ if (is_nil(hash_table->term[entry_index])) {
+ Uint size = hash_table->allocated;
+ if (MUST_GROW(hash_table)) {
+ size *= 2;
+ }
+ hash_table = copy_table(hash_table, size, 0);
+ entry_index = lookup(hash_table, key);
+ hash_table->num_entries++;
+ } else {
+ Eterm tuple = hash_table->term[entry_index];
+ Eterm old_term;
+
+ ASSERT(is_tuple_arity(tuple, 2));
+ old_term = boxed_val(tuple)[2];
+ if (EQ(term, old_term)) {
+ /* Same value. No need to update anything. */
+ release_update_permission(0);
+ BIF_RET(am_ok);
+ } else {
+ /* Mark the old term for deletion. */
+ mark_for_deletion(hash_table, entry_index);
+ hash_table = copy_table(hash_table, hash_table->allocated, 0);
+ }
+ }
+
+ /*
+ * Preserve internal sharing in the term by using the
+ * sharing-preserving functions. However, literals must
+ * be copied in case the module holding them are unloaded.
+ */
+ INITIALIZE_SHCOPY(info);
+ info.copy_literals = 1;
+ term_size = copy_shared_calculate(tuple, &info);
+ ERTS_INIT_OFF_HEAP(&code_off_heap);
+ lit_area_size = ERTS_LITERAL_AREA_ALLOC_SIZE(term_size);
+ literal_area = erts_alloc(ERTS_ALC_T_LITERAL, lit_area_size);
+ ptr = &literal_area->start[0];
+ literal_area->end = ptr + term_size;
+ tuple = copy_shared_perform(tuple, term_size, &info, &ptr, &code_off_heap);
+ ASSERT(tuple_val(tuple) == literal_area->start);
+ literal_area->off_heap = code_off_heap.first;
+ DESTROY_SHCOPY(info);
+ erts_set_literal_tag(&tuple, literal_area->start, term_size);
+ hash_table->term[entry_index] = tuple;
+
+ erts_schedule_thr_prgr_later_op(table_updater, hash_table, &thr_prog_op);
+ suspend_updater(BIF_P);
+
+ /*
+ * Issue a warning once if the warning limit has been exceeded.
+ */
+
+ if (hash_table->num_entries > WARNING_LIMIT && issued_warning == 0) {
+ static char w[] =
+ "More than " XSTR(WARNING_LIMIT) " persistent terms "
+ "have been created.\n"
+ "It is recommended to avoid creating an excessive number of\n"
+ "persistent terms, as creation and deletion of persistent terms\n"
+ "will be slower as the number of persistent terms increases.\n";
+ issued_warning = 1;
+ erts_send_warning_to_logger_str(BIF_P->group_leader, w);
+ }
+
+ ERTS_BIF_YIELD_RETURN(BIF_P, am_ok);
+}
+
+BIF_RETTYPE persistent_term_get_0(BIF_ALIST_0)
+{
+ HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ TrapData* trap_data;
+ Eterm res = NIL;
+ Eterm magic_ref;
+ Binary* mbp;
+
+ magic_ref = alloc_trap_data(BIF_P);
+ mbp = erts_magic_ref2bin(magic_ref);
+ trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+ trap_data->table = hash_table;
+ trap_data->idx = 0;
+ trap_data->remaining = hash_table->num_entries;
+ res = do_get_all(BIF_P, trap_data, res);
+ if (trap_data->remaining == 0) {
+ BUMP_REDS(BIF_P, hash_table->num_entries);
+ trap_data->table = NULL; /* Prevent refc decrement */
+ BIF_RET(res);
+ } else {
+ /*
+ * Increment the ref counter to prevent an update operation (by put/2
+ * or erase/1) to delete this hash table.
+ */
+ erts_atomic_inc_nob(&hash_table->refc);
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP2(&persistent_term_get_all_export, BIF_P, magic_ref, res);
+ }
+}
+
+BIF_RETTYPE persistent_term_get_1(BIF_ALIST_1)
+{
+ Eterm key = BIF_ARG_1;
+ HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ Uint entry_index;
+ Eterm term;
+
+ entry_index = lookup(hash_table, key);
+ term = hash_table->term[entry_index];
+ if (is_boxed(term)) {
+ ASSERT(is_tuple_arity(term, 2));
+ BIF_RET(tuple_val(term)[2]);
+ }
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1)
+{
+ Eterm key = BIF_ARG_1;
+ HashTable* old_table;
+ HashTable* new_table;
+ Uint entry_index;
+ Eterm old_term;
+
+ if (!try_seize_update_permission(BIF_P)) {
+ ERTS_BIF_YIELD1(bif_export[BIF_persistent_term_erase_1],
+ BIF_P, BIF_ARG_1);
+ }
+
+ old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ entry_index = lookup(old_table, key);
+ old_term = old_table->term[entry_index];
+ if (is_boxed(old_term)) {
+ Uint new_size;
+ HashTable* tmp_table;
+
+ /*
+ * Since we don't use any delete markers, we must rehash
+ * the table when deleting terms to ensure that all terms
+ * can still be reached if there are hash collisions.
+ * We can't rehash in place and it would not be safe to modify
+ * the old table yet, so we will first need a new
+ * temporary table copy of the same size as the old one.
+ */
+
+ ASSERT(is_tuple_arity(old_term, 2));
+ tmp_table = tmp_table_copy(old_table);
+
+ /*
+ * Delete the term from the temporary table. Then copy the
+ * temporary table to a new table, rehashing the entries
+ * while copying.
+ */
+
+ tmp_table->term[entry_index] = NIL;
+ tmp_table->num_entries--;
+ new_size = tmp_table->allocated;
+ if (MUST_SHRINK(tmp_table)) {
+ new_size /= 2;
+ }
+ new_table = copy_table(tmp_table, new_size, 1);
+ erts_free(ERTS_ALC_T_TMP, tmp_table);
+
+ mark_for_deletion(old_table, entry_index);
+ erts_schedule_thr_prgr_later_op(table_updater, new_table, &thr_prog_op);
+ suspend_updater(BIF_P);
+ ERTS_BIF_YIELD_RETURN(BIF_P, am_true);
+ }
+
+ /*
+ * Key is not present. Nothing to do.
+ */
+
+ ASSERT(is_nil(old_term));
+ release_update_permission(0);
+ BIF_RET(am_false);
+}
+
+BIF_RETTYPE erts_internal_erase_persistent_terms_0(BIF_ALIST_0)
+{
+ HashTable* old_table;
+ HashTable* new_table;
+
+ if (!try_seize_update_permission(BIF_P)) {
+ ERTS_BIF_YIELD0(bif_export[BIF_erts_internal_erase_persistent_terms_0],
+ BIF_P);
+ }
+ old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ old_table->first_to_delete = 0;
+ old_table->num_to_delete = old_table->allocated;
+ new_table = create_initial_table();
+ erts_schedule_thr_prgr_later_op(table_updater, new_table, &thr_prog_op);
+ suspend_updater(BIF_P);
+ ERTS_BIF_YIELD_RETURN(BIF_P, am_true);
+}
+
+BIF_RETTYPE persistent_term_info_0(BIF_ALIST_0)
+{
+ HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ TrapData* trap_data;
+ Eterm res = NIL;
+ Eterm magic_ref;
+ Binary* mbp;
+
+ magic_ref = alloc_trap_data(BIF_P);
+ mbp = erts_magic_ref2bin(magic_ref);
+ trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+ trap_data->table = hash_table;
+ trap_data->idx = 0;
+ trap_data->remaining = hash_table->num_entries;
+ trap_data->memory = 0;
+ res = do_info(BIF_P, trap_data);
+ if (trap_data->remaining == 0) {
+ BUMP_REDS(BIF_P, hash_table->num_entries);
+ trap_data->table = NULL; /* Prevent refc decrement */
+ BIF_RET(res);
+ } else {
+ /*
+ * Increment the ref counter to prevent an update operation (by put/2
+ * or erase/1) to delete this hash table.
+ */
+ erts_atomic_inc_nob(&hash_table->refc);
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP2(&persistent_term_info_export, BIF_P, magic_ref, res);
+ }
+}
+
+Uint
+erts_persistent_term_count(void)
+{
+ HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ return hash_table->num_entries;
+}
+
+void
+erts_init_persistent_dumping(void)
+{
+ HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ ErtsLiteralArea** area_p;
+ Uint i;
+
+ /*
+ * Overwrite the array of Eterms in the current hash table
+ * with pointers to literal areas.
+ */
+
+ erts_persistent_areas = (ErtsLiteralArea **) hash_table->term;
+ erts_num_persistent_areas = hash_table->num_entries;
+ area_p = erts_persistent_areas;
+ for (i = 0; i < hash_table->allocated; i++) {
+ Eterm term = hash_table->term[i];
+
+ if (is_boxed(term)) {
+ *area_p++ = term_to_area(term);
+ }
+ }
+}
+
+/*
+ * Local functions.
+ */
+
+static HashTable*
+create_initial_table(void)
+{
+ HashTable* hash_table;
+ int i;
+
+ hash_table = (HashTable *) erts_alloc(ERTS_ALC_T_PERSISTENT_TERM,
+ sizeof(HashTable)+sizeof(Eterm) *
+ (INITIAL_SIZE-1));
+ hash_table->allocated = INITIAL_SIZE;
+ hash_table->num_entries = 0;
+ hash_table->mask = INITIAL_SIZE-1;
+ hash_table->first_to_delete = 0;
+ hash_table->num_to_delete = 0;
+ erts_atomic_init_nob(&hash_table->refc, (erts_aint_t)1);
+ for (i = 0; i < INITIAL_SIZE; i++) {
+ hash_table->term[i] = NIL;
+ }
+ return hash_table;
+}
+
+static BIF_RETTYPE
+persistent_term_get_all_trap(BIF_ALIST_2)
+{
+ TrapData* trap_data;
+ Eterm res = BIF_ARG_2;
+ Uint bump_reds;
+ Binary* mbp;
+
+ ASSERT(is_list(BIF_ARG_2));
+ mbp = erts_magic_ref2bin(BIF_ARG_1);
+ trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+ bump_reds = trap_data->remaining;
+ res = do_get_all(BIF_P, trap_data, res);
+ ASSERT(is_list(res));
+ if (trap_data->remaining > 0) {
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP2(&persistent_term_get_all_export, BIF_P, BIF_ARG_1, res);
+ } else {
+ /*
+ * Decrement ref count (and possibly delete the hash table
+ * and associated literal area).
+ */
+ dec_table_refc(BIF_P, trap_data->table);
+ trap_data->table = NULL; /* Prevent refc decrement */
+ BUMP_REDS(BIF_P, bump_reds);
+ BIF_RET(res);
+ }
+}
+
+static Eterm
+do_get_all(Process* c_p, TrapData* trap_data, Eterm res)
+{
+ HashTable* hash_table;
+ Uint remaining;
+ Uint idx;
+ Uint max_iter;
+ Uint i;
+ Eterm* hp;
+ Uint heap_size;
+ struct copy_term {
+ Uint key_size;
+ Eterm* tuple_ptr;
+ } *copy_data;
+
+ hash_table = trap_data->table;
+ idx = trap_data->idx;
+#if defined(DEBUG) || defined(VALGRIND)
+ max_iter = 50;
+#else
+ max_iter = ERTS_BIF_REDS_LEFT(c_p);
+#endif
+ remaining = trap_data->remaining < max_iter ?
+ trap_data->remaining : max_iter;
+ trap_data->remaining -= remaining;
+
+ copy_data = (struct copy_term *) erts_alloc(ERTS_ALC_T_TMP,
+ remaining *
+ sizeof(struct copy_term));
+ i = 0;
+ heap_size = (2 + 3) * remaining;
+ while (remaining != 0) {
+ Eterm term = hash_table->term[idx];
+ if (is_tuple(term)) {
+ Uint key_size;
+ Eterm* tup_val;
+
+ ASSERT(is_tuple_arity(term, 2));
+ tup_val = tuple_val(term);
+ key_size = size_object(tup_val[1]);
+ copy_data[i].key_size = key_size;
+ copy_data[i].tuple_ptr = tup_val;
+ heap_size += key_size;
+ i++;
+ remaining--;
+ }
+ idx++;
+ }
+ trap_data->idx = idx;
+
+ hp = HAlloc(c_p, heap_size);
+ remaining = i;
+ for (i = 0; i < remaining; i++) {
+ Eterm* tuple_ptr;
+ Uint key_size;
+ Eterm key;
+ Eterm tup;
+
+ tuple_ptr = copy_data[i].tuple_ptr;
+ key_size = copy_data[i].key_size;
+ key = copy_struct(tuple_ptr[1], key_size, &hp, &c_p->off_heap);
+ tup = TUPLE2(hp, key, tuple_ptr[2]);
+ hp += 3;
+ res = CONS(hp, tup, res);
+ hp += 2;
+ }
+ erts_free(ERTS_ALC_T_TMP, copy_data);
+ return res;
+}
+
+static BIF_RETTYPE
+persistent_term_info_trap(BIF_ALIST_1)
+{
+ TrapData* trap_data = (TrapData *) BIF_ARG_1;
+ Eterm res;
+ Uint bump_reds;
+ Binary* mbp;
+
+ mbp = erts_magic_ref2bin(BIF_ARG_1);
+ trap_data = ERTS_MAGIC_BIN_DATA(mbp);
+ bump_reds = trap_data->remaining;
+ res = do_info(BIF_P, trap_data);
+ if (trap_data->remaining > 0) {
+ ASSERT(res == am_ok);
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP1(&persistent_term_info_export, BIF_P, BIF_ARG_1);
+ } else {
+ /*
+ * Decrement ref count (and possibly delete the hash table
+ * and associated literal area).
+ */
+ dec_table_refc(BIF_P, trap_data->table);
+ trap_data->table = NULL; /* Prevent refc decrement */
+ BUMP_REDS(BIF_P, bump_reds);
+ ASSERT(is_map(res));
+ BIF_RET(res);
+ }
+}
+
+#define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
+
+static Eterm
+do_info(Process* c_p, TrapData* trap_data)
+{
+ HashTable* hash_table;
+ Uint remaining;
+ Uint idx;
+ Uint max_iter;
+
+ hash_table = trap_data->table;
+ idx = trap_data->idx;
+#if defined(DEBUG) || defined(VALGRIND)
+ max_iter = 50;
+#else
+ max_iter = ERTS_BIF_REDS_LEFT(c_p);
+#endif
+ remaining = trap_data->remaining < max_iter ? trap_data->remaining : max_iter;
+ trap_data->remaining -= remaining;
+ while (remaining != 0) {
+ if (is_boxed(hash_table->term[idx])) {
+ ErtsLiteralArea* area;
+ area = term_to_area(hash_table->term[idx]);
+ trap_data->memory += sizeof(ErtsLiteralArea) +
+ sizeof(Eterm) * (area->end - area->start - 1);
+ remaining--;
+ }
+ idx++;
+ }
+ trap_data->idx = idx;
+ if (trap_data->remaining > 0) {
+ return am_ok; /* Dummy return value */
+ } else {
+ Eterm* hp;
+ Eterm count_term;
+ Eterm memory_term;
+ Eterm res;
+ Uint memory;
+ Uint hsz = MAP_SZ(2);
+
+ memory = sizeof(HashTable) + (trap_data->table->allocated-1) *
+ sizeof(Eterm) + trap_data->memory;
+ (void) erts_bld_uint(NULL, &hsz, hash_table->num_entries);
+ (void) erts_bld_uint(NULL, &hsz, memory);
+ hp = HAlloc(c_p, hsz);
+ count_term = erts_bld_uint(&hp, NULL, hash_table->num_entries);
+ memory_term = erts_bld_uint(&hp, NULL, memory);
+ res = MAP2(hp, am_count, count_term, am_memory, memory_term);
+ return res;
+ }
+}
+
+#undef DECL_AM
+
+static Eterm
+alloc_trap_data(Process* c_p)
+{
+ Binary* mbp = erts_create_magic_binary(sizeof(TrapData),
+ cleanup_trap_data);
+ Eterm* hp;
+
+ hp = HAlloc(c_p, ERTS_MAGIC_REF_THING_SIZE);
+ return erts_mk_magic_ref(&hp, &MSO(c_p), mbp);
+}
+
+static int
+cleanup_trap_data(Binary *bp)
+{
+ TrapData* trap_data = ERTS_MAGIC_BIN_DATA(bp);
+
+ if (trap_data->table) {
+ /*
+ * The process has been killed and is now exiting.
+ * Decrement the reference counter for the table.
+ */
+ dec_table_refc(NULL, trap_data->table);
+ }
+ return 1;
+}
+
+static Uint
+lookup(HashTable* hash_table, Eterm key)
+{
+ Uint mask = hash_table->mask;
+ Eterm* table = hash_table->term;
+ Uint32 idx = make_internal_hash(key, 0);
+ Eterm term;
+
+ do {
+ idx++;
+ term = table[idx & mask];
+ } while (is_boxed(term) && !EQ(key, (tuple_val(term))[1]));
+ return idx & mask;
+}
+
+static HashTable*
+tmp_table_copy(HashTable* old_table)
+{
+ Uint size = old_table->allocated;
+ HashTable* tmp_table;
+ Uint i;
+
+ tmp_table = (HashTable *) erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(HashTable) +
+ sizeof(Eterm) * (size-1));
+ *tmp_table = *old_table;
+ for (i = 0; i < size; i++) {
+ tmp_table->term[i] = old_table->term[i];
+ }
+ return tmp_table;
+}
+
+static HashTable*
+copy_table(HashTable* old_table, Uint new_size, int rehash)
+{
+ HashTable* new_table;
+ Uint old_size = old_table->allocated;
+ Uint i;
+
+ new_table = (HashTable *) erts_alloc(ERTS_ALC_T_PERSISTENT_TERM,
+ sizeof(HashTable) +
+ sizeof(Eterm) * (new_size-1));
+ if (old_table->allocated == new_size && !rehash) {
+ /*
+ * Same size and no key deleted. Make an exact copy of the table.
+ */
+ *new_table = *old_table;
+ for (i = 0; i < new_size; i++) {
+ new_table->term[i] = old_table->term[i];
+ }
+ } else {
+ /*
+ * The size of the table has changed or an element has been
+ * deleted. Must rehash, by inserting all old terms into the
+ * new (empty) table.
+ */
+ new_table->allocated = new_size;
+ new_table->num_entries = old_table->num_entries;
+ new_table->mask = new_size - 1;
+ for (i = 0; i < new_size; i++) {
+ new_table->term[i] = NIL;
+ }
+ for (i = 0; i < old_size; i++) {
+ if (is_tuple(old_table->term[i])) {
+ Eterm key = tuple_val(old_table->term[i])[1];
+ Uint entry_index = lookup(new_table, key);
+ ASSERT(is_nil(new_table->term[entry_index]));
+ new_table->term[entry_index] = old_table->term[i];
+ }
+ }
+ }
+ new_table->first_to_delete = 0;
+ new_table->num_to_delete = 0;
+ erts_atomic_init_nob(&new_table->refc, (erts_aint_t)1);
+ return new_table;
+}
+
+static void
+mark_for_deletion(HashTable* hash_table, Uint entry_index)
+{
+ hash_table->first_to_delete = entry_index;
+ hash_table->num_to_delete = 1;
+}
+
+static ErtsLiteralArea*
+term_to_area(Eterm tuple)
+{
+ ASSERT(is_tuple_arity(tuple, 2));
+ return (ErtsLiteralArea *) (((char *) tuple_val(tuple)) -
+ offsetof(ErtsLiteralArea, start));
+}
+
+static void
+table_updater(void* data)
+{
+ HashTable* old_table;
+ HashTable* new_table;
+
+ old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table);
+ new_table = (HashTable *) data;
+ ASSERT(new_table->num_to_delete == 0);
+ erts_atomic_set_nob(&the_hash_table, (erts_aint_t)new_table);
+ append_to_delete_queue(old_table);
+ erts_schedule_thr_prgr_later_op(table_deleter,
+ old_table,
+ &old_table->thr_prog_op);
+ release_update_permission(1);
+}
+
+static void
+table_deleter(void* data)
+{
+ HashTable* old_table = (HashTable *) data;
+
+ dec_table_refc(NULL, old_table);
+}
+
+static void
+dec_table_refc(Process* c_p, HashTable* old_table)
+{
+ erts_aint_t refc = erts_atomic_dec_read_nob(&old_table->refc);
+
+ if (refc == 0) {
+ HashTable* to_delete;
+
+ while ((to_delete = next_to_delete()) != NULL) {
+ delete_table(c_p, to_delete);
+ }
+ }
+}
+
+static void
+delete_table(Process* c_p, HashTable* table)
+{
+ Uint idx = table->first_to_delete;
+ Uint n = table->num_to_delete;
+
+ /*
+ * There are no longer any references to this hash table.
+ *
+ * Any literals pointed for deletion can be queued for
+ * deletion and the table itself can be deallocated.
+ */
+
+#ifdef DEBUG
+ if (n == 1) {
+ ASSERT(is_tuple_arity(table->term[idx], 2));
+ }
+#endif
+
+ while (n > 0) {
+ Eterm term = table->term[idx];
+
+ if (is_tuple_arity(term, 2)) {
+ if (is_immed(tuple_val(term)[2])) {
+ erts_release_literal_area(term_to_area(term));
+ } else {
+ erts_queue_release_literals(c_p, term_to_area(term));
+ }
+ }
+ idx++, n--;
+ }
+ erts_free(ERTS_ALC_T_PERSISTENT_TERM, table);
+}
+
+/*
+ * Caller *must* yield if this function returns 0.
+ */
+
+static int
+try_seize_update_permission(Process* c_p)
+{
+ int success;
+
+ ASSERT(!erts_thr_progress_is_blocking()); /* to avoid deadlock */
+ ASSERT(c_p != NULL);
+
+ erts_mtx_lock(&update_table_permission_mtx);
+ ASSERT(updater_process != c_p);
+ success = (updater_process == NULL);
+ if (success) {
+ updater_process = c_p;
+ } else {
+ struct update_queue_item* qitem;
+ qitem = erts_alloc(ERTS_ALC_T_PERSISTENT_LOCK_Q, sizeof(*qitem));
+ qitem->p = c_p;
+ erts_proc_inc_refc(c_p);
+ qitem->next = update_queue;
+ update_queue = qitem;
+ erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL);
+ }
+ erts_mtx_unlock(&update_table_permission_mtx);
+ return success;
+}
+
+static void
+release_update_permission(int release_updater)
+{
+ erts_mtx_lock(&update_table_permission_mtx);
+ ASSERT(updater_process != NULL);
+
+ if (release_updater) {
+ erts_proc_lock(updater_process, ERTS_PROC_LOCK_STATUS);
+ if (!ERTS_PROC_IS_EXITING(updater_process)) {
+ erts_resume(updater_process, ERTS_PROC_LOCK_STATUS);
+ }
+ erts_proc_unlock(updater_process, ERTS_PROC_LOCK_STATUS);
+ }
+ updater_process = NULL;
+
+ while (update_queue != NULL) { /* Unleash the entire herd */
+ struct update_queue_item* qitem = update_queue;
+ erts_proc_lock(qitem->p, ERTS_PROC_LOCK_STATUS);
+ if (!ERTS_PROC_IS_EXITING(qitem->p)) {
+ erts_resume(qitem->p, ERTS_PROC_LOCK_STATUS);
+ }
+ erts_proc_unlock(qitem->p, ERTS_PROC_LOCK_STATUS);
+ update_queue = qitem->next;
+ erts_proc_dec_refc(qitem->p);
+ erts_free(ERTS_ALC_T_PERSISTENT_LOCK_Q, qitem);
+ }
+ erts_mtx_unlock(&update_table_permission_mtx);
+}
+
+static void
+suspend_updater(Process* c_p)
+{
+#ifdef DEBUG
+ ASSERT(c_p != NULL);
+ erts_mtx_lock(&update_table_permission_mtx);
+ ASSERT(updater_process == c_p);
+ erts_mtx_unlock(&update_table_permission_mtx);
+#endif
+ erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL);
+}
+
+static void
+append_to_delete_queue(HashTable* table)
+{
+ erts_mtx_lock(&delete_queue_mtx);
+ table->delete_next = NULL;
+ *delete_queue_tail = table;
+ delete_queue_tail = &table->delete_next;
+ erts_mtx_unlock(&delete_queue_mtx);
+}
+
+static HashTable*
+next_to_delete(void)
+{
+ HashTable* table;
+
+ erts_mtx_lock(&delete_queue_mtx);
+ table = delete_queue_head;
+ if (table) {
+ if (erts_atomic_read_nob(&table->refc)) {
+ /*
+ * This hash table is still referenced. Hash tables
+ * must be deleted in order, so we return a NULL
+ * pointer.
+ */
+ table = NULL;
+ } else {
+ /*
+ * Remove the first hash table from the queue.
+ */
+ delete_queue_head = table->delete_next;
+ if (delete_queue_head == NULL) {
+ delete_queue_tail = &delete_queue_head;
+ }
+ }
+ }
+ erts_mtx_unlock(&delete_queue_mtx);
+ return table;
+}
diff --git a/erts/emulator/beam/erl_bif_unique.h b/erts/emulator/beam/erl_bif_unique.h
index 40b70667c0..944788c67c 100644
--- a/erts/emulator/beam/erl_bif_unique.h
+++ b/erts/emulator/beam/erl_bif_unique.h
@@ -242,11 +242,11 @@ erts_internal_ref_number_cmp(Uint32 num1[ERTS_REF_NUMBERS],
Uint32 num2[ERTS_REF_NUMBERS])
{
if (num1[2] != num2[2])
- return (int) ((Sint64) num1[2] - (Sint64) num2[2]);
+ return num1[2] > num2[2] ? 1 : -1;
if (num1[1] != num2[1])
- return (int) ((Sint64) num1[1] - (Sint64) num2[1]);
+ return num1[1] > num2[1] ? 1 : -1;
if (num1[0] != num2[0])
- return (int) ((Sint64) num1[0] - (Sint64) num2[0]);
+ return num1[0] > num2[0] ? 1 : -1;
return 0;
}
diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c
index 788718ab09..45e4be2426 100644
--- a/erts/emulator/beam/erl_db_tree.c
+++ b/erts/emulator/beam/erl_db_tree.c
@@ -1860,22 +1860,14 @@ static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid,
sc.mp = mpi.mp;
- stack = get_static_stack(tb);
if (!mpi.got_partial && mpi.some_limitation &&
CMP_EQ(mpi.least,mpi.most)) {
- TreeDbTerm* term = *(mpi.save_term);
doit_select_replace(tb,mpi.save_term,&sc,0 /* dummy */);
- if (stack != NULL) {
- if (TOP_NODE(stack) == term)
- // throw away potentially invalid reference
- REPLACE_TOP_NODE(stack, *(mpi.save_term));
- release_stack(tb, stack);
- }
+ reset_static_stack(tb); /* may refer replaced term */
RET_TO_BIF(erts_make_integer(sc.replaced,p),DB_ERROR_NONE);
}
- if (stack == NULL)
- stack = get_any_stack(tb);
+ stack = get_any_stack(tb);
if (mpi.some_limitation) {
if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) {
diff --git a/erts/emulator/beam/erl_dirty_bif.tab b/erts/emulator/beam/erl_dirty_bif.tab
index 086275fbe5..20299ff604 100644
--- a/erts/emulator/beam/erl_dirty_bif.tab
+++ b/erts/emulator/beam/erl_dirty_bif.tab
@@ -59,8 +59,6 @@ dirty-cpu erts_debug:lcnt_clear/0
dirty-cpu-test erlang:'++'/2
dirty-cpu-test erlang:append/2
-dirty-cpu-test erlang:'--'/2
-dirty-cpu-test erlang:subtract/2
dirty-cpu-test erlang:iolist_size/1
dirty-cpu-test erlang:make_tuple/2
dirty-cpu-test erlang:make_tuple/3
diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c
index 4a80c00416..3a50b294d1 100644
--- a/erts/emulator/beam/erl_gc.c
+++ b/erts/emulator/beam/erl_gc.c
@@ -681,7 +681,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
ErtsMonotonicTime start_time;
ErtsSchedulerData *esdp = erts_proc_sched_data(p);
erts_aint32_t state;
- ERTS_MSACC_PUSH_STATE_M();
+ ERTS_MSACC_PUSH_STATE();
#ifdef USE_VM_PROBES
DTRACE_CHARBUF(pidbuf, DTRACE_TERM_BUF_SIZE);
#endif
@@ -711,7 +711,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
else if (p->live_hf_end != ERTS_INVALID_HFRAG_PTR)
live_hf_end = p->live_hf_end;
- ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_GC);
+ ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_GC);
erts_atomic32_read_bor_nob(&p->state, ERTS_PSFLG_GC);
if (erts_system_monitor_long_gc != 0)
@@ -759,7 +759,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
gc_trace_end_tag = am_gc_minor_end;
} else {
do_major_collection:
- ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC_FULL);
+ ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC_FULL);
if (IS_TRACED_FL(p, F_TRACE_GC)) {
trace_gc(p, am_gc_major_start, need, THE_NON_VALUE);
}
@@ -770,7 +770,7 @@ do_major_collection:
p->flags &= ~(F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC);
DTRACE2(gc_major_end, pidbuf, reclaimed_now);
gc_trace_end_tag = am_gc_major_end;
- ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC);
+ ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC);
}
reset_active_writer(p);
@@ -800,7 +800,7 @@ do_major_collection:
/* We have to make sure that we have space for need on the heap */
res = delay_garbage_collection(p, live_hf_end, need, fcalls);
- ERTS_MSACC_POP_STATE_M();
+ ERTS_MSACC_POP_STATE();
return res;
}
@@ -843,7 +843,7 @@ do_major_collection:
FLAGS(p) &= ~(F_FORCE_GC|F_HIBERNATED);
p->live_hf_end = ERTS_INVALID_HFRAG_PTR;
- ERTS_MSACC_POP_STATE_M();
+ ERTS_MSACC_POP_STATE();
#ifdef CHECK_FOR_HOLES
/*
@@ -1133,9 +1133,28 @@ erts_garbage_collect_literals(Process* p, Eterm* literals,
reds = (Sint64) garbage_collect(p, ERTS_INVALID_HFRAG_PTR, 0,
p->arg_reg, p->arity, fcalls,
ygen_usage);
+ if (ERTS_PROC_IS_EXITING(p)) {
+ return 0;
+ }
ASSERT(!(p->flags & (F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC)));
+ if (MAX_HEAP_SIZE_GET(p)) {
+ Uint new_heap_size;
+ Uint old_heap_size;
+ Uint total_heap_size;
+
+ new_heap_size = HEAP_END(p) - HEAP_START(p);
+ old_heap_size = erts_next_heap_size(lit_size, 0);
+ total_heap_size = new_heap_size + old_heap_size;
+ if (MAX_HEAP_SIZE_GET(p) < total_heap_size &&
+ reached_max_heap_size(p, total_heap_size,
+ new_heap_size, old_heap_size)) {
+ erts_set_self_exiting(p, am_killed);
+ return 0;
+ }
+ }
+
/*
* Set GC state.
*/
@@ -2440,7 +2459,7 @@ erts_copy_one_frag(Eterm** hpp, ErlOffHeap* off_heap,
*hpp = hp;
for (i = 0; i < nrefs; i++) {
- if (is_not_immed(refs[i]))
+ if (is_not_immed(refs[i]) && !erts_is_literal(refs[i],ptr_val(refs[i])))
refs[i] = offset_ptr(refs[i], offs);
}
bp->off_heap.first = NULL;
diff --git a/erts/emulator/beam/erl_hl_timer.c b/erts/emulator/beam/erl_hl_timer.c
index 6ec6f8065e..ef7a55fa38 100644
--- a/erts/emulator/beam/erl_hl_timer.c
+++ b/erts/emulator/beam/erl_hl_timer.c
@@ -3041,15 +3041,23 @@ erts_set_port_timer(Port *c_prt, Sint64 tmo)
check_canceled_queue(esdp, esdp->timer_service);
- timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo);
-
- create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC
- ? create_tw_timer
- : create_hl_timer);
- tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT,
- (void *) c_prt, c_prt->common.id,
- THE_NON_VALUE, NULL, NULL, NULL);
- erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr);
+ if (tmo == 0) {
+ erts_atomic_set_relb(&c_prt->common.timer, ERTS_PTMR_TIMEDOUT);
+ erts_port_task_schedule(c_prt->common.id,
+ &c_prt->timeout_task,
+ ERTS_PORT_TASK_TIMEOUT);
+ } else {
+
+ timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo);
+
+ create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC
+ ? create_tw_timer
+ : create_hl_timer);
+ tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT,
+ (void *) c_prt, c_prt->common.id,
+ THE_NON_VALUE, NULL, NULL, NULL);
+ erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr);
+ }
}
void
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 57c6c10c7f..99e788c718 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -354,6 +354,7 @@ erl_init(int ncpu,
erts_init_bif();
erts_init_bif_chksum();
erts_init_bif_binary();
+ erts_init_bif_persistent_term();
erts_init_bif_re();
erts_init_unicode(); /* after RE to get access to PCRE unicode */
erts_init_external();
@@ -2358,8 +2359,8 @@ system_cleanup(int flush_async)
* The exiting thread might be waiting for
* us to block; need to update status...
*/
- erts_thr_progress_active(NULL, 0);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_active(erts_thr_prgr_data(NULL), 0);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL));
}
/* Wait forever... */
while (1)
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index 463ae898a3..1416c5f96c 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -97,6 +97,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
{ "proc_btm", "pid" },
{ "dist_entry", "address" },
{ "dist_entry_links", "address" },
+ { "update_persistent_term_permission", NULL },
+ { "persistent_term_delete_permission", NULL },
{ "code_write_permission", NULL },
{ "purge_state", NULL },
{ "proc_status", "pid" },
diff --git a/erts/emulator/beam/erl_monitor_link.h b/erts/emulator/beam/erl_monitor_link.h
index 9ff8aa509a..ed7bf7d54a 100644
--- a/erts/emulator/beam/erl_monitor_link.h
+++ b/erts/emulator/beam/erl_monitor_link.h
@@ -1387,11 +1387,14 @@ ERTS_GLB_INLINE void
erts_monitor_release(ErtsMonitor *mon)
{
ErtsMonitorData *mdp = erts_monitor_to_data(mon);
- ERTS_ML_ASSERT(!(mon->flags & ERTS_ML_FLG_IN_TABLE));
ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) > 0);
- if (erts_atomic32_dec_read_nob(&mdp->refc) == 0)
+ if (erts_atomic32_dec_read_nob(&mdp->refc) == 0) {
+ ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE));
+ ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE));
+
erts_monitor_destroy__(mdp);
+ }
}
ERTS_GLB_INLINE void
@@ -1399,12 +1402,14 @@ erts_monitor_release_both(ErtsMonitorData *mdp)
{
ERTS_ML_ASSERT((mdp->origin.flags & ERTS_ML_FLGS_SAME)
== (mdp->target.flags & ERTS_ML_FLGS_SAME));
- ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE));
- ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE));
ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) >= 2);
- if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0)
+ if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0) {
+ ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE));
+ ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE));
+
erts_monitor_destroy__(mdp);
+ }
}
ERTS_GLB_INLINE int
diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c
index f4dc60941a..18ed782ae3 100644
--- a/erts/emulator/beam/erl_node_tables.c
+++ b/erts/emulator/beam/erl_node_tables.c
@@ -421,8 +421,25 @@ static void schedule_delete_dist_entry(DistEntry* dep)
*
* Note that timeouts do not guarantee thread progress.
*/
- erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry,
- dep, &dep->later_op);
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) {
+ erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry,
+ dep, &dep->later_op);
+ } else {
+ /*
+ * Since OTP 20, it's possible that destructor is executed on
+ * a dirty scheduler. Aux work cannot be done on a dirty
+ * scheduler, and scheduling any aux work on a dirty scheduler
+ * makes the scheduler to loop infinitely.
+ * To avoid this, make a spot jump: schedule this function again
+ * on a first normal scheduler. It is guaranteed to be always
+ * online. Since it's a rare event, this shall not pose a big
+ * utilisation hit.
+ */
+ erts_schedule_misc_aux_work(1,
+ (void (*)(void *))schedule_delete_dist_entry,
+ (void *) dep);
+ }
}
static void
diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h
index 2be0a5bf74..25976d38cc 100644
--- a/erts/emulator/beam/erl_port.h
+++ b/erts/emulator/beam/erl_port.h
@@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p);
#define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11))
/* Last port to terminate halts the emulator */
#define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12))
+/* Check if the event in ready_input should be cleaned */
+#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13))
#ifdef DEBUG
/* Only debug: make sure all flags aren't cleared unintentionally */
#define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31))
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 4928d80f27..c8f2e88127 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q
typedef union {
struct { /* I/O tasks */
ErlDrvEvent event;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ int is_scheduler_event;
+#endif
} io;
struct {
ErtsProc2PortSigCallback callback;
@@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ {
ErtsPortTaskBusyCaller pre_alloc_busy_caller;
};
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
static void begin_port_cleanup(Port *pp,
ErtsPortTask **execq,
@@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp)
}
static ERTS_INLINE void
-reset_executed_io_task_handle(ErtsPortTask *ptp)
+reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp)
{
if (ptp->u.alive.handle) {
ASSERT(ptp == handle2task(ptp->u.alive.handle));
- /* The port task handle is reset inside task_executed */
- erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
- reset_port_task_handle);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) {
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
+ } else {
+ reset_port_task_handle(ptp->u.alive.handle);
+ }
+ } else
+#endif
+ {
+ /* The port task handle is reset inside task_executed */
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ }
}
}
@@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp)
res = - 1; /* Task already aborted, executing, or executed */
else {
reset_port_task_handle(pthp);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ switch (ptp->type) {
+ case ERTS_PORT_TASK_INPUT:
+ case ERTS_PORT_TASK_OUTPUT:
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ ASSERT(erts_atomic_read_nob(
+ &erts_port_task_outstanding_io_tasks) > 0);
+ erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks);
+ }
+ break;
+ default:
+ break;
+ }
+#endif
+
res = 0;
}
}
@@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id,
va_list argp;
va_start(argp, type);
ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int);
+#endif
va_end(argp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks);
+#endif
break;
}
case ERTS_PORT_TASK_PROC_SIG: {
@@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
int processing_busy_q;
int vreds = 0;
int reds = 0;
- erts_aint_t io_tasks_executed = 0;
int fpe_was_unmasked;
erts_aint32_t state;
int active;
Uint64 start_time = 0;
ErtsSchedulerData *esdp = runq->scheduler;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_aint_t io_tasks_executed = 0;
+#endif
ERTS_MSACC_PUSH_STATE_M();
ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq));
@@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
for input and output */
(*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_OUTPUT:
reds = ERTS_PORT_REDS_OUTPUT;
@@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
LTTNG_DRIVER(driver_ready_output, pp);
(*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_PROC_SIG: {
ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data;
@@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
erts_unblock_fpe(fpe_was_unmasked);
ERTS_MSACC_POP_STATE_M();
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (io_tasks_executed) {
+ ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks)
+ >= io_tasks_executed);
+ erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks,
+ -1*io_tasks_executed);
+ }
+#endif
+
ASSERT(runq == erts_get_runq_port(pp));
active = finalize_exec(pp, &execq, processing_busy_q);
@@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq)
void
erts_port_task_init(void)
{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks,
+ (erts_aint_t) 0);
+#endif
init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads
+ 1); /* aux_thread */
init_busy_caller_table_alloc();
diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h
index ae78a7d8a3..ca5183b305 100644
--- a/erts/emulator/beam/erl_port_task.h
+++ b/erts/emulator/beam/erl_port_task.h
@@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#ifndef ERL_PORT_TASK_H__
#define ERL_PORT_TASK_H__
+#include "erl_poll.h"
+
#undef ERTS_INCLUDE_SCHEDULER_INTERNALS
#if (defined(ERL_PROCESS_C__) \
|| defined(ERL_PORT_TASK_C__) \
@@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#define ERTS_PT_FLG_BAD_OUTPUT (1 << 4)
typedef enum {
- ERTS_PORT_TASK_INPUT,
- ERTS_PORT_TASK_OUTPUT,
+ ERTS_PORT_TASK_INPUT = 0,
+ ERTS_PORT_TASK_OUTPUT = 1,
ERTS_PORT_TASK_TIMEOUT,
ERTS_PORT_TASK_DIST_CMD,
ERTS_PORT_TASK_PROC_SIG
@@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp);
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void);
+/* NOTE: Do not access any of the exported variables directly */
+extern erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE void
@@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp)
erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING);
}
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int
+erts_port_task_have_outstanding_io_tasks(void)
+{
+ return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks)
+ != 0);
+}
+#endif
+
#endif
#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 0f7f1598fd..2427d87f66 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
typedef struct {
int aux_work;
int tse;
- int sys_schedule;
} ErtsBusyWaitParams;
static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1];
@@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat;
static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static erts_atomic32_t doing_sys_schedule;
+#endif
static erts_atomic32_t no_empty_run_queues;
long erts_runq_supervision_interval = 0;
static ethr_event runq_supervision_event;
@@ -1646,7 +1648,7 @@ haw_thr_prgr_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val)
awdp->latest_wakeup = val;
haw_chk_later_cleanup_op_wakeup(awdp, val);
}
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
@@ -1656,7 +1658,7 @@ haw_thr_prgr_soft_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val)
if (erts_thr_progress_cmp(val, awdp->latest_wakeup) > 0) {
awdp->latest_wakeup = val;
haw_chk_later_cleanup_op_wakeup(awdp, val);
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
@@ -1670,7 +1672,7 @@ haw_thr_prgr_later_cleanup_op_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val,
else {
awdp->latest_wakeup = val;
awdp->later_op.size = thr_prgr_later_cleanup_op_threshold;
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
}
@@ -3066,6 +3068,7 @@ aux_thread(void *unused)
ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1);
erts_aint32_t aux_work;
ErtsThrPrgrCallbacks callbacks;
+ ErtsThrPrgrData *tpd;
int thr_prgr_active = 1;
ERTS_MSACC_DECLARE_CACHE();
@@ -3087,12 +3090,16 @@ aux_thread(void *unused)
callbacks.wait = thr_prgr_wait;
callbacks.finalize_wait = thr_prgr_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
#if ERTS_POLL_USE_FALLBACK
- ssi->psi = erts_create_pollset_thread(-1);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ssi->psi = erts_create_pollset_thread(-2, tpd);
+#else
+ ssi->psi = erts_create_pollset_thread(-1, tpd);
+#endif
#endif
sched_prep_spin_wait(ssi);
@@ -3105,11 +3112,11 @@ aux_thread(void *unused)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work) {
if (!thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 1);
+ erts_thr_progress_active(tpd, thr_prgr_active = 1);
aux_work = handle_aux_work(awdp, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (aux_work && erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
}
if (!aux_work) {
@@ -3120,7 +3127,7 @@ aux_thread(void *unused)
#endif
if (thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 0);
+ erts_thr_progress_active(tpd, thr_prgr_active = 0);
#if ERTS_POLL_USE_FALLBACK
@@ -3132,11 +3139,11 @@ aux_thread(void *unused)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(ssi->psi);
+ erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT);
}
}
#else
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(tpd);
flgs = sched_spin_wait(ssi, 0);
@@ -3153,7 +3160,7 @@ aux_thread(void *unused)
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER);
}
}
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
#endif
}
@@ -3171,7 +3178,8 @@ poll_thread(void *arg)
erts_aint32_t aux_work;
ErtsThrPrgrCallbacks callbacks;
int thr_prgr_active = 1;
- struct erts_poll_thread *psi = erts_create_pollset_thread(id);
+ struct erts_poll_thread *psi;
+ ErtsThrPrgrData *tpd;
ERTS_MSACC_DECLARE_CACHE();
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -3192,9 +3200,12 @@ poll_thread(void *arg)
callbacks.wait = thr_prgr_wait;
callbacks.finalize_wait = thr_prgr_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
+
+ psi = erts_create_pollset_thread(id, tpd);
+
ssi->psi = psi;
sched_prep_spin_wait(ssi);
@@ -3207,16 +3218,16 @@ poll_thread(void *arg)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work) {
if (!thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 1);
+ erts_thr_progress_active(tpd, thr_prgr_active = 1);
aux_work = handle_aux_work(awdp, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (aux_work && erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
}
if (!aux_work) {
if (thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 0);
+ erts_thr_progress_active(tpd, thr_prgr_active = 0);
flgs = sched_spin_wait(ssi, 0);
@@ -3226,7 +3237,7 @@ poll_thread(void *arg)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(psi);
+ erts_check_io(psi, ERTS_POLL_INF_TIMEOUT);
}
}
}
@@ -3236,6 +3247,59 @@ poll_thread(void *arg)
return NULL;
}
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ERTS_INLINE void
+clear_sys_scheduling(void)
+{
+ erts_atomic32_set_mb(&doing_sys_schedule, 0);
+}
+
+static ERTS_INLINE int
+try_set_sys_scheduling(void)
+{
+ return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0);
+}
+
+
+static ERTS_INLINE int
+prepare_for_sys_schedule(void)
+{
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && try_set_sys_scheduling()) {
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ clear_sys_scheduling();
+ }
+ return 0;
+}
+
+static void
+check_io_timer(void *null)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (prepare_for_sys_schedule()) {
+ erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT);
+ clear_sys_scheduling();
+ }
+
+ /* The timer is cleared if this schedulers run-queue became empty
+ or if the CHECKIO flag was cleared. The CHECKIO flags is cleared
+ when a check_balance assigns another scheduler to be the poller in
+ the overload scenario. */
+ if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == ERTS_RUNQ_FLG_CHECKIO) {
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT,
+ check_io_timer, NULL);
+ } else {
+ ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO);
+ }
+}
+
+#else
+#define clear_sys_scheduling()
+#define prepare_for_sys_schedule() 0
+#endif
+
static void
scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
{
@@ -3286,13 +3350,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(esdp))
- erts_thr_progress_leader_update(esdp);
+ if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp)))
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
}
if (aux_work) {
@@ -3301,7 +3365,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
current_time = erts_get_monotonic_time(esdp);
if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -3321,19 +3385,36 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
if (do_timeout) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
}
- else {
+ else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) {
+ /* We sleep in check_io, only for normal schedulers */
+ if (thr_prgr_active) {
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
+ sched_wall_time_change(esdp, 0);
+ }
+ flgs = sched_spin_wait(ssi, 0);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ erts_check_io(ssi->psi, timeout_time);
+ current_time = erts_get_monotonic_time(esdp);
+ }
+ }
+ clear_sys_scheduling();
+ } else {
if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
if (thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 0);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
sched_wall_time_change(esdp, 0);
}
- erts_thr_progress_prepare_wait(esdp);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp));
}
-
flgs = sched_spin_wait(ssi, spincount);
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
@@ -3363,7 +3444,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
}
if (!ERTS_SCHEDULER_IS_DIRTY(esdp))
- erts_thr_progress_finalize_wait(esdp);
+ erts_thr_progress_finalize_wait(erts_thr_prgr_data(esdp));
}
if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time)
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -3392,7 +3473,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (ERTS_SCHEDULER_IS_DIRTY(esdp))
dirty_sched_wall_time_change(esdp, working = 1);
else if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
@@ -4580,6 +4661,15 @@ check_balance(ErtsRunQueue *c_rq)
if (blnc_no_rqs == 1) {
c_rq->check_balance_reds = INT_MAX;
erts_atomic32_set_nob(&balance_info.checking_balance, 0);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS;
+ if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == 0) {
+ ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO);
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO);
+#endif
return;
}
@@ -5099,6 +5189,19 @@ erts_fprintf(stderr, "--------------------------------\n");
/* Publish new migration paths... */
erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (full_scheds == current_active) {
+ ERTS_ASSERT(full_scheds <= current_active);
+ /* All active schedulers ran for full, we need to do active polling,
+ so we setup a timer that does active polling */
+ if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) {
+ /* Active polling is not running, start it */
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO;
+ }
+#endif
+
/* Reset balance statistics in all online queues */
for (qix = 0; qix < blnc_no_rqs; qix++) {
Uint32 flags = run_queue_info[qix].flags;
@@ -5108,6 +5211,8 @@ erts_fprintf(stderr, "--------------------------------\n");
ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK));
if (rq->waiting)
flags |= ERTS_RUNQ_FLG_OUT_OF_WORK;
+ if (rq != c_rq)
+ flags &= ~ERTS_RUNQ_FLG_CHECKIO;
rq->full_reds_history_sum
= run_queue_info[qix].full_reds_history_sum;
@@ -5117,8 +5222,7 @@ erts_fprintf(stderr, "--------------------------------\n");
ERTS_DBG_CHK_FULL_REDS_HISTORY(rq);
rq->out_of_work_count = 0;
- (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags);
-
+ (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags);
rq->max_len = erts_atomic32_read_dirty(&rq->len);
for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) {
ErtsRunQueueInfo *rqi;
@@ -5557,7 +5661,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str)
return EINVAL;
}
- params->sys_schedule = sys_sched;
params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
params->aux_work = sys_sched * aux_work_fact;
@@ -5768,6 +5871,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th
size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs;
erts_aligned_run_queues =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic32_init_nob(&doing_sys_schedule, 0);
+#endif
erts_atomic32_init_nob(&no_empty_run_queues, 0);
erts_no_run_queues = n;
@@ -7565,7 +7671,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (aux_work|evacuate) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp),
+ thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
if (aux_work)
@@ -7573,8 +7680,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
aux_work,
1);
- if (aux_work && erts_thr_progress_update(esdp))
- erts_thr_progress_leader_update(esdp);
+ if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp)))
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
if (evacuate) {
erts_runq_lock(esdp->run_queue);
evacuate_run_queue(esdp->run_queue, &sbp);
@@ -7593,18 +7700,18 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (!aux_work && current_time < timeout_time) {
/* go to sleep... */
if (thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 0);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
sched_wall_time_change(esdp, 0);
}
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL));
suspend_normal_scheduler_sleep(esdp);
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(erts_thr_prgr_data(NULL));
current_time = erts_get_monotonic_time(esdp);
}
if (current_time >= timeout_time) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -7661,7 +7768,7 @@ suspend_scheduler(ErtsSchedulerData *esdp)
profile_scheduler(make_small(esdp->no), am_active);
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
}
@@ -8296,6 +8403,11 @@ sched_thread_func(void *vesdp)
erts_msacc_init_thread("scheduler", no, 1);
erts_thr_progress_register_managed_thread(esdp, &callbacks, 0);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ esdp->ssi->psi = erts_create_pollset_thread(-1, NULL);
+#endif
+
erts_alloc_register_scheduler(vesdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
{
@@ -9313,12 +9425,12 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls)
}
}
- leader_update = erts_thr_progress_update(esdp);
+ leader_update = erts_thr_progress_update(erts_thr_prgr_data(esdp));
aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work);
if (aux_work | leader_update) {
erts_runq_unlock(rq);
if (leader_update)
- erts_thr_progress_leader_update(esdp);
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
if (aux_work)
handle_aux_work(&esdp->aux_work_data, aux_work, 0);
erts_runq_lock(rq);
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 8d20ccdf90..a1b029adbe 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size;
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9))
#define ERTS_RUNQ_FLG_HALTING \
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10))
+#define ERTS_RUNQ_FLG_CHECKIO \
+ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11))
-#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11)
+#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12)
#define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \
(ERTS_RUNQ_FLGS_EMIGRATE_QMASK \
diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c
index 243db4c734..ac5054ea10 100644
--- a/erts/emulator/beam/erl_process_dump.c
+++ b/erts/emulator/beam/erl_process_dump.c
@@ -58,6 +58,7 @@ static void dump_externally(fmtfn_t to, void *to_arg, Eterm term);
static void mark_literal(Eterm* ptr);
static void init_literal_areas(void);
static void dump_literals(fmtfn_t to, void *to_arg);
+static void dump_persistent_terms(fmtfn_t to, void *to_arg);
static void dump_module_literals(fmtfn_t to, void *to_arg,
ErtsLiteralArea* lit_area);
@@ -74,6 +75,7 @@ erts_deep_process_dump(fmtfn_t to, void *to_arg)
all_binaries = NULL;
init_literal_areas();
+ erts_init_persistent_dumping();
for (i = 0; i < max; i++) {
Process *p = erts_pix2proc(i);
@@ -93,6 +95,7 @@ erts_deep_process_dump(fmtfn_t to, void *to_arg)
}
}
+ dump_persistent_terms(to, to_arg);
dump_literals(to, to_arg);
dump_binaries(to, to_arg, all_binaries);
}
@@ -775,6 +778,9 @@ init_literal_areas(void)
qsort(lit_areas, num_lit_areas, sizeof(ErtsLiteralArea *),
compare_areas);
+ qsort(erts_persistent_areas, erts_num_persistent_areas,
+ sizeof(ErtsLiteralArea *), compare_areas);
+
erts_runlock_old_code(code_ix);
}
@@ -796,6 +802,13 @@ static void mark_literal(Eterm* ptr)
ap = bsearch(ptr, lit_areas, num_lit_areas, sizeof(ErtsLiteralArea*),
search_areas);
+ if (ap == 0) {
+ ap = bsearch(ptr, erts_persistent_areas,
+ erts_num_persistent_areas,
+ sizeof(ErtsLiteralArea*),
+ search_areas);
+ }
+
/*
* If the literal was created by native code, this search will not
@@ -807,12 +820,12 @@ static void mark_literal(Eterm* ptr)
}
}
-
static void
dump_literals(fmtfn_t to, void *to_arg)
{
ErtsCodeIndex code_ix;
int i;
+ Uint idx;
code_ix = erts_active_code_ix();
erts_rlock_old_code(code_ix);
@@ -825,6 +838,28 @@ dump_literals(fmtfn_t to, void *to_arg)
}
erts_runlock_old_code(code_ix);
+
+ for (idx = 0; idx < erts_num_persistent_areas; idx++) {
+ dump_module_literals(to, to_arg, erts_persistent_areas[idx]);
+ }
+}
+
+static void
+dump_persistent_terms(fmtfn_t to, void *to_arg)
+{
+ Uint idx;
+
+ erts_print(to, to_arg, "=persistent_terms\n");
+
+ for (idx = 0; idx < erts_num_persistent_areas; idx++) {
+ ErtsLiteralArea* ap = erts_persistent_areas[idx];
+ Eterm tuple = make_tuple(ap->start);
+ Eterm* tup_val = tuple_val(tuple);
+
+ dump_element(to, to_arg, tup_val[1]);
+ erts_putc(to, to_arg, '|');
+ dump_element_nl(to, to_arg, tup_val[2]);
+ }
}
static void
@@ -963,7 +998,8 @@ dump_module_literals(fmtfn_t to, void *to_arg, ErtsLiteralArea* lit_area)
}
erts_putc(to, to_arg, '\n');
}
- } else if (is_export_header(w)) {
+ } else {
+ /* Dump everything else in the external format */
dump_externally(to, to_arg, term);
erts_putc(to, to_arg, '\n');
}
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
index b119c59ab3..74cc966cbe 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -188,6 +188,7 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix)
erts_sspa_chunk_t *chnk;
erts_sspa_chunk_header_t *chdr;
erts_sspa_blk_t *res;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_ALLOC);
chnk = erts_sspa_cix2chunk(data, cix);
chdr = &chnk->aligned.header;
@@ -201,11 +202,15 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix)
chdr->local.last = NULL;
ERTS_SSPA_DBG_CHK_LCL(chdr);
}
- if (chdr->local.cnt <= chdr->local.lim)
- return (char *) erts_sspa_process_remote_frees(chdr, res);
+ if (chdr->local.cnt <= chdr->local.lim) {
+ res = erts_sspa_process_remote_frees(chdr, res);
+ ERTS_MSACC_POP_STATE_M_X();
+ return (char*) res;
+ }
else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS)
chdr->head.no_thr_progress_check++;
ASSERT(res);
+ ERTS_MSACC_POP_STATE_M_X();
return (char *) res;
}
diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c
index aa08eb40ec..bac437efe9 100644
--- a/erts/emulator/beam/erl_thr_progress.c
+++ b/erts/emulator/beam/erl_thr_progress.c
@@ -508,6 +508,10 @@ init_wakeup_request_array(ErtsThrPrgrVal *w)
}
}
+ErtsThrPrgrData *erts_thr_progress_data(void) {
+ return erts_tsd_get(erts_thr_prgr_data_key__);
+}
+
void
erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks)
{
@@ -551,7 +555,7 @@ erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks)
}
-void
+ErtsThrPrgrData *
erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
ErtsThrPrgrCallbacks *callbacks,
int pref_wakeup)
@@ -630,6 +634,7 @@ erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
wakeup_managed(id);
}
callbacks->finalize_wait(callbacks->arg);
+ return tpd;
}
static ERTS_INLINE int
@@ -796,7 +801,7 @@ leader_update(ErtsThrPrgrData *tpd)
== ERTS_THR_PRGR_LFLG_NO_LEADER))
&& got_sched_wakeups()) {
/* Someone need to make progress */
- wakeup_managed(0);
+ wakeup_managed(tpd->id);
}
}
}
@@ -849,23 +854,22 @@ update(ErtsThrPrgrData *tpd)
}
int
-erts_thr_progress_update(ErtsSchedulerData *esdp)
+erts_thr_progress_update(ErtsThrPrgrData *tpd)
{
- return update(thr_prgr_data(esdp));
+ return update(tpd);
}
int
-erts_thr_progress_leader_update(ErtsSchedulerData *esdp)
+erts_thr_progress_leader_update(ErtsThrPrgrData *tpd)
{
- return leader_update(thr_prgr_data(esdp));
+ return leader_update(tpd);
}
void
-erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp)
+erts_thr_progress_prepare_wait(ErtsThrPrgrData *tpd)
{
erts_aint32_t lflgs;
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0);
@@ -884,14 +888,13 @@ erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp)
== ERTS_THR_PRGR_LFLG_NO_LEADER
&& got_sched_wakeups()) {
/* Someone need to make progress */
- wakeup_managed(0);
+ wakeup_managed(tpd->id);
}
}
void
-erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp)
+erts_thr_progress_finalize_wait(ErtsThrPrgrData *tpd)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
ErtsThrPrgrVal current, val;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -921,9 +924,8 @@ erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp)
}
void
-erts_thr_progress_active(ErtsSchedulerData *esdp, int on)
+erts_thr_progress_active(ErtsThrPrgrData *tpd, int on)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0);
@@ -973,7 +975,7 @@ unmanaged_continue(ErtsThrPrgrDelayHandle handle)
== (ERTS_THR_PRGR_LFLG_NO_LEADER|ERTS_THR_PRGR_LFLG_WAITING_UM)
&& got_sched_wakeups()) {
/* Others waiting for us... */
- wakeup_managed(0);
+ wakeup_managed(1);
}
}
}
@@ -1182,10 +1184,10 @@ request_wakeup_unmanaged(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value)
}
void
-erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+erts_thr_progress_wakeup(ErtsThrPrgrData *tpd,
ErtsThrPrgrVal value)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
+
ASSERT(!tpd->is_temporary);
if (tpd->is_managed)
request_wakeup_managed(tpd, value);
diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h
index 8329995b24..00a9e61407 100644
--- a/erts/emulator/beam/erl_thr_progress.h
+++ b/erts/emulator/beam/erl_thr_progress.h
@@ -123,22 +123,24 @@ extern ErtsThrPrgr erts_thr_prgr__;
void erts_thr_progress_pre_init(void);
void erts_thr_progress_init(int no_schedulers, int managed, int unmanaged);
-void erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
- ErtsThrPrgrCallbacks *,
- int);
+ErtsThrPrgrData *erts_thr_progress_register_managed_thread(
+ ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *, int);
void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *);
-void erts_thr_progress_active(ErtsSchedulerData *esdp, int on);
-void erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+void erts_thr_progress_active(ErtsThrPrgrData *, int on);
+void erts_thr_progress_wakeup(ErtsThrPrgrData *,
ErtsThrPrgrVal value);
-int erts_thr_progress_update(ErtsSchedulerData *esdp);
-int erts_thr_progress_leader_update(ErtsSchedulerData *esdp);
-void erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp);
-void erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp);
+int erts_thr_progress_update(ErtsThrPrgrData *);
+int erts_thr_progress_leader_update(ErtsThrPrgrData *);
+void erts_thr_progress_prepare_wait(ErtsThrPrgrData *);
+void erts_thr_progress_finalize_wait(ErtsThrPrgrData *);
ErtsThrPrgrDelayHandle erts_thr_progress_unmanaged_delay__(void);
void erts_thr_progress_unmanaged_continue__(int umrefc_ix);
+ErtsThrPrgrData *erts_thr_progress_data(void);
void erts_thr_progress_dbg_print_state(void);
+ERTS_GLB_INLINE ErtsThrPrgrData *erts_thr_prgr_data(ErtsSchedulerData *esdp);
+
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc);
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc);
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_mb__(ERTS_THR_PRGR_ATOMIC *atmc);
@@ -161,6 +163,15 @@ ERTS_GLB_INLINE int erts_thr_progress_has_reached(ErtsThrPrgrVal val);
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+ERTS_GLB_INLINE ErtsThrPrgrData *
+erts_thr_prgr_data(ErtsSchedulerData *esdp) {
+ if (esdp) {
+ return &esdp->thr_progress_data;
+ } else {
+ return erts_thr_progress_data();
+ }
+}
+
ERTS_GLB_INLINE ErtsThrPrgrVal
erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc)
{
diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c
index 53a020e7a5..2350d4c02f 100644
--- a/erts/emulator/beam/erl_trace.c
+++ b/erts/emulator/beam/erl_trace.c
@@ -2177,6 +2177,7 @@ sys_msg_dispatcher_func(void *unused)
{
ErtsThrPrgrCallbacks callbacks;
ErtsSysMsgQ *local_sys_message_queue = NULL;
+ ErtsThrPrgrData *tpd;
int wait = 0;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -2189,7 +2190,7 @@ sys_msg_dispatcher_func(void *unused)
callbacks.wait = sys_msg_dispatcher_wait;
callbacks.finalize_wait = sys_msg_dispatcher_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
while (1) {
int end_wait = 0;
@@ -2210,8 +2211,8 @@ sys_msg_dispatcher_func(void *unused)
if (!sys_message_queue) {
erts_mtx_unlock(&smq_mtx);
end_wait = 1;
- erts_thr_progress_active(NULL, 0);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_active(tpd, 0);
+ erts_thr_progress_prepare_wait(tpd);
erts_mtx_lock(&smq_mtx);
}
@@ -2225,8 +2226,8 @@ sys_msg_dispatcher_func(void *unused)
erts_mtx_unlock(&smq_mtx);
if (end_wait) {
- erts_thr_progress_finalize_wait(NULL);
- erts_thr_progress_active(NULL, 1);
+ erts_thr_progress_finalize_wait(tpd);
+ erts_thr_progress_active(tpd, 1);
}
/* Send trace messages ... */
@@ -2239,8 +2240,8 @@ sys_msg_dispatcher_func(void *unused)
Process *proc = NULL;
Port *port = NULL;
- if (erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
#ifdef DEBUG_PRINTOUTS
print_msg_type(smqp);
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index b3bfa69052..880febba8b 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -22,6 +22,7 @@
#define ERL_UTILS_H__
#include "sys.h"
+#include "atom.h"
#include "erl_printf.h"
struct process;
@@ -112,10 +113,12 @@ int eq(Eterm, Eterm);
#define EQ(x,y) (((x) == (y)) || (is_not_both_immed((x),(y)) && eq((x),(y))))
-int erts_cmp_atoms(Eterm a, Eterm b);
-Sint erts_cmp(Eterm, Eterm, int, int);
-Sint erts_cmp_compound(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE Sint erts_cmp(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b);
+
Sint cmp(Eterm a, Eterm b);
+Sint erts_cmp_compound(Eterm, Eterm, int, int);
+
#define CMP(A,B) erts_cmp(A,B,0,0)
#define CMP_TERM(A,B) erts_cmp(A,B,1,0)
#define CMP_EQ_ONLY(A,B) erts_cmp(A,B,0,1)
@@ -150,4 +153,56 @@ Sint cmp(Eterm a, Eterm b);
if (erts_cmp_compound(X,Y,0,EqOnly) Op 0) { Action; }; \
}
+#define erts_float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) {
+ Atom *aa = atom_tab(atom_val(a));
+ Atom *bb = atom_tab(atom_val(b));
+
+ byte *name_a, *name_b;
+ int len_a, len_b, diff;
+
+ diff = aa->ord0 - bb->ord0;
+
+ if (diff != 0) {
+ return diff;
+ }
+
+ name_a = &aa->name[3];
+ name_b = &bb->name[3];
+ len_a = aa->len-3;
+ len_b = bb->len-3;
+
+ if (len_a > 0 && len_b > 0) {
+ diff = sys_memcmp(name_a, name_b, MIN(len_a, len_b));
+
+ if (diff != 0) {
+ return diff;
+ }
+ }
+
+ return len_a - len_b;
+}
+
+ERTS_GLB_INLINE Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) {
+ if (is_atom(a) && is_atom(b)) {
+ return erts_cmp_atoms(a, b);
+ } else if (is_both_small(a, b)) {
+ return (signed_val(a) - signed_val(b));
+ } else if (is_float(a) && is_float(b)) {
+ FloatDef af, bf;
+
+ GET_DOUBLE(a, af);
+ GET_DOUBLE(b, bf);
+
+ return erts_float_comp(af.fd, bf.fd);
+ }
+
+ return erts_cmp_compound(a,b,exact,eq_only);
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
#endif
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 621ba108ba..9a66e491f3 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -1953,7 +1953,8 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla
#define RETURN_STATE() \
do { \
- hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE+3); \
+ static const int TUPLE2_SIZE = 2 + 1; \
+ hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + TUPLE2_SIZE); \
c_term = erts_mk_magic_ref(&hp, &MSO(p), context_b); \
res = TUPLE2(hp, Term, c_term); \
BUMP_ALL_REDS(p); \
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 21ae205237..0631404599 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -906,6 +906,8 @@ typedef struct ErtsLiteralArea_ {
Eterm start[1]; /* beginning of area */
} ErtsLiteralArea;
+void erts_queue_release_literals(Process *c_p, ErtsLiteralArea* literals);
+
#define ERTS_LITERAL_AREA_ALLOC_SIZE(N) \
(sizeof(ErtsLiteralArea) + sizeof(Eterm)*((N) - 1))
@@ -1001,6 +1003,7 @@ typedef struct {
Uint literal_size;
Eterm *lit_purge_ptr;
Uint lit_purge_sz;
+ int copy_literals;
} erts_shcopy_t;
#define INITIALIZE_SHCOPY(info) \
@@ -1010,6 +1013,7 @@ typedef struct {
info.bitstore_start = info.bitstore_default; \
info.shtable_start = info.shtable_default; \
info.literal_size = 0; \
+ info.copy_literals = 0; \
if (larea__) { \
info.lit_purge_ptr = &larea__->start[0]; \
info.lit_purge_sz = larea__->end - info.lit_purge_ptr; \
@@ -1238,6 +1242,13 @@ Sint erts_re_set_loop_limit(Sint limit);
void erts_init_bif_binary(void);
Sint erts_binary_set_loop_limit(Sint limit);
+/* erl_bif_persistent.c */
+void erts_init_bif_persistent_term(void);
+Uint erts_persistent_term_count(void);
+void erts_init_persistent_dumping(void);
+extern ErtsLiteralArea** erts_persistent_areas;
+extern Uint erts_num_persistent_areas;
+
/* external.c */
void erts_init_external(void);
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index bb22548587..869a575cb4 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -325,6 +325,7 @@ typedef long Sint erts_align_attribute(sizeof(long));
#define UWORD_CONSTANT(Const) Const##UL
#define ERTS_UWORD_MAX ULONG_MAX
#define ERTS_SWORD_MAX LONG_MAX
+#define ERTS_SWORD_MIN LONG_MIN
#define ERTS_SIZEOF_ETERM SIZEOF_LONG
#define ErtsStrToSint strtol
#elif SIZEOF_VOID_P == SIZEOF_INT
@@ -335,6 +336,7 @@ typedef int Sint erts_align_attribute(sizeof(int));
#define UWORD_CONSTANT(Const) Const##U
#define ERTS_UWORD_MAX UINT_MAX
#define ERTS_SWORD_MAX INT_MAX
+#define ERTS_SWORD_MIN INT_MIN
#define ERTS_SIZEOF_ETERM SIZEOF_INT
#define ErtsStrToSint strtol
#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG
@@ -345,6 +347,7 @@ typedef long long Sint erts_align_attribute(sizeof(long long));
#define UWORD_CONSTANT(Const) Const##ULL
#define ERTS_UWORD_MAX ULLONG_MAX
#define ERTS_SWORD_MAX LLONG_MAX
+#define ERTS_SWORD_MIN LLONG_MIN
#define ERTS_SIZEOF_ETERM SIZEOF_LONG_LONG
#if defined(__WIN32__)
#define ErtsStrToSint _strtoi64
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 08f8ca9788..d81bd89a48 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -2615,27 +2615,6 @@ not_equal:
}
-/*
- * Lexically compare two strings of bytes (string s1 length l1 and s2 l2).
- *
- * s1 < s2 return -1
- * s1 = s2 return 0
- * s1 > s2 return +1
- */
-static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
-{
- int i;
- i = 0;
- while((i < l1) && (i < l2)) {
- if (s1[i] < s2[i]) return(-1);
- if (s1[i] > s2[i]) return(1);
- i++;
- }
- if (l1 < l2) return(-1);
- if (l1 > l2) return(1);
- return(0);
-}
-
/*
* Compare objects.
@@ -2649,20 +2628,6 @@ static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
*
*/
-
-#define float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
-
-int erts_cmp_atoms(Eterm a, Eterm b)
-{
- Atom *aa = atom_tab(atom_val(a));
- Atom *bb = atom_tab(atom_val(b));
- int diff = aa->ord0 - bb->ord0;
- if (diff)
- return diff;
- return cmpbytes(aa->name+3, aa->len-3,
- bb->name+3, bb->len-3);
-}
-
/* cmp(Eterm a, Eterm b)
* For compatibility with HiPE - arith-based compare.
*/
@@ -2673,22 +2638,6 @@ Sint cmp(Eterm a, Eterm b)
Sint erts_cmp_compound(Eterm a, Eterm b, int exact, int eq_only);
-Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only)
-{
- if (is_atom(a) && is_atom(b)) {
- return erts_cmp_atoms(a, b);
- } else if (is_both_small(a, b)) {
- return (signed_val(a) - signed_val(b));
- } else if (is_float(a) && is_float(b)) {
- FloatDef af, bf;
- GET_DOUBLE(a, af);
- GET_DOUBLE(b, bf);
- return float_comp(af.fd, bf.fd);
- }
- return erts_cmp_compound(a,b,exact,eq_only);
-}
-
-
/* erts_cmp(Eterm a, Eterm b, int exact)
* exact = 1 -> term-based compare
* exact = 0 -> arith-based compare
@@ -2985,7 +2934,7 @@ tailrecur_ne:
GET_DOUBLE(a, af);
GET_DOUBLE(b, bf);
- ON_CMP_GOTO(float_comp(af.fd, bf.fd));
+ ON_CMP_GOTO(erts_float_comp(af.fd, bf.fd));
}
case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
@@ -3022,10 +2971,7 @@ tailrecur_ne:
ErlFunThing* f2 = (ErlFunThing *) fun_val(b);
Sint diff;
- diff = cmpbytes(atom_tab(atom_val(f1->fe->module))->name,
- atom_tab(atom_val(f1->fe->module))->len,
- atom_tab(atom_val(f2->fe->module))->name,
- atom_tab(atom_val(f2->fe->module))->len);
+ diff = erts_cmp_atoms((f1->fe)->module, (f2->fe)->module);
if (diff != 0) {
RETURN_NEQ(diff);
}
@@ -3219,7 +3165,7 @@ tailrecur_ne:
if (f2.fd < MAX_LOSSLESS_FLOAT && f2.fd > MIN_LOSSLESS_FLOAT) {
/* Float is within the no loss limit */
f1.fd = signed_val(aw);
- j = float_comp(f1.fd, f2.fd);
+ j = erts_float_comp(f1.fd, f2.fd);
}
#if ERTS_SIZEOF_ETERM == 8
else if (f2.fd > (double) (MAX_SMALL + 1)) {
@@ -3266,7 +3212,7 @@ tailrecur_ne:
if (big_to_double(aw, &f1.fd) < 0) {
j = big_sign(aw) ? -1 : 1;
} else {
- j = float_comp(f1.fd, f2.fd);
+ j = erts_float_comp(f1.fd, f2.fd);
}
} else {
big = double_to_big(f2.fd, big_buf, sizeof(big_buf)/sizeof(Eterm));
@@ -3282,7 +3228,7 @@ tailrecur_ne:
if (f1.fd < MAX_LOSSLESS_FLOAT && f1.fd > MIN_LOSSLESS_FLOAT) {
/* Float is within the no loss limit */
f2.fd = signed_val(bw);
- j = float_comp(f1.fd, f2.fd);
+ j = erts_float_comp(f1.fd, f2.fd);
}
#if ERTS_SIZEOF_ETERM == 8
else if (f1.fd > (double) (MAX_SMALL + 1)) {
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 7f20477363..47eb5df7dd 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -38,6 +38,7 @@
#include <ctype.h>
#include <sys/types.h>
#include <errno.h>
+#include <stdint.h>
#define IDENTITY(c) c
#define STRINGIFY_1(b) IDENTITY(#b)
@@ -812,6 +813,7 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define INET_OPT_PKTOPTIONS 45 /* IP(V6)_PKTOPTIONS get ancillary data */
#define INET_OPT_TTL 46 /* IP_TTL */
#define INET_OPT_RECVTTL 47 /* IP_RECVTTL ancillary data */
+#define TCP_OPT_NOPUSH 48 /* super-Nagle, aka TCP_CORK */
/* SCTP options: a separate range, from 100: */
#define SCTP_OPT_RTOINFO 100
#define SCTP_OPT_ASSOCINFO 101
@@ -954,6 +956,13 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#endif
#endif
+typedef struct _tcp_descriptor tcp_descriptor;
+
+#if defined(TCP_CORK)
+#define INET_TCP_NOPUSH TCP_CORK
+#elif defined(TCP_NOPUSH) && !defined(__DARWIN__)
+#define INET_TCP_NOPUSH TCP_NOPUSH
+#endif
#define BIN_REALLOC_MARGIN(x) ((x)/4) /* 25% */
@@ -1003,16 +1012,19 @@ typedef struct _multi_timer_data {
struct _multi_timer_data *prev;
} MultiTimerData;
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
- ErlDrvTermData caller, unsigned timeout,
- void (*timeout_fun)(ErlDrvData drv_data,
- ErlDrvTermData caller));
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ ErlDrvTermData caller, unsigned timeout,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data);
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p);
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p);
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller);
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port);
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port);
typedef struct {
int id; /* id used to identify reply */
@@ -1271,7 +1283,7 @@ static struct erl_drv_entry sctp_inet_driver_entry =
};
#endif
-typedef struct {
+struct _tcp_descriptor {
inet_descriptor inet; /* common data structure (DON'T MOVE) */
int high; /* high watermark */
int low; /* low watermark */
@@ -1287,7 +1299,8 @@ typedef struct {
int http_state; /* 0 = response|request 1=headers fields */
inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */
inet_async_multi_op *multi_last;
- MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd_cache; /* A cache for timer allocations */
#ifdef HAVE_SENDFILE
struct {
ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time
@@ -1303,7 +1316,7 @@ typedef struct {
Uint64 length;
} sendfile;
#endif
-} tcp_descriptor;
+};
/* send function */
static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len);
@@ -1313,7 +1326,10 @@ static int tcp_deliver(tcp_descriptor* desc, int len);
static int tcp_shutdown_error(tcp_descriptor* desc, int err);
+#ifdef HAVE_SENDFILE
static int tcp_inet_sendfile(tcp_descriptor* desc);
+static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error);
+#endif
static int tcp_inet_output(tcp_descriptor* desc, HANDLE event);
static int tcp_inet_input(tcp_descriptor* desc, HANDLE event);
@@ -5178,6 +5194,71 @@ static int hwaddr_libdlpi_lookup(const char *ifnm,
}
#endif
+#ifdef HAVE_GETIFADDRS
+/* Returns 0 for success and errno() for failure */
+static int call_getifaddrs(inet_descriptor* desc_p, struct ifaddrs **ifa_pp)
+{
+ int result, save_errno;
+#ifdef HAVE_SETNS
+ int current_ns;
+
+ current_ns = 0;
+ if (desc_p->netns != NULL) {
+ int new_ns;
+ /* Temporarily change network namespace for this thread
+ * over the getifaddrs() call
+ */
+ current_ns = open("/proc/self/ns/net", O_RDONLY);
+ if (current_ns == INVALID_SOCKET)
+ return sock_errno();
+ new_ns = open(desc_p->netns, O_RDONLY);
+ if (new_ns == INVALID_SOCKET) {
+ save_errno = sock_errno();
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ return save_errno;
+ }
+ if (setns(new_ns, CLONE_NEWNET) != 0) {
+ save_errno = sock_errno();
+ while (close(new_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ return save_errno;
+ }
+ else {
+ while (close(new_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ }
+ }
+#endif
+ save_errno = 0;
+ result = getifaddrs(ifa_pp);
+ if (result < 0)
+ save_errno = sock_errno();
+#ifdef HAVE_SETNS
+ if (desc_p->netns != NULL) {
+ /* Restore network namespace */
+ if (setns(current_ns, CLONE_NEWNET) != 0) {
+ /* XXX Failed to restore network namespace.
+ * What to do? Tidy up and return an error...
+ * Note that the thread now might still be in the set namespace.
+ * Can this even happen? Should the emulator be aborted?
+ */
+ if (result >= 0) {
+ /* We got a result but have to waste it */
+ save_errno = sock_errno();
+ freeifaddrs(*ifa_pp);
+ }
+ }
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ }
+#endif
+ return save_errno;
+}
+#endif /* #ifdef HAVE_GETIFADDRS */
+
/* FIXME: temporary hack */
#ifndef IFHWADDRLEN
#define IFHWADDRLEN 6
@@ -5255,8 +5336,8 @@ static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc,
struct sockaddr_dl *sdlp;
int found = 0;
- if (getifaddrs(&ifa) == -1)
- goto error;
+ if (call_getifaddrs(desc, &ifa) != 0)
+ goto error;
for (ifp = ifa; ifp; ifp = ifp->ifa_next) {
if ((ifp->ifa_addr->sa_family == AF_LINK) &&
@@ -5974,6 +6055,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
ErlDrvSizeT buf_size;
char *buf_p;
char *buf_alloc_p;
+ int save_errno;
buf_size = GETIFADDRS_BUFSZ;
buf_alloc_p = ALLOC(GETIFADDRS_BUFSZ);
@@ -6008,9 +6090,9 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
} \
} while (0)
- if (getifaddrs(&ifa_p) < 0) {
- return ctl_error(sock_errno(), rbuf_pp, rsize);
- }
+ if ((save_errno = call_getifaddrs(desc_p, &ifa_p)) != 0)
+ return ctl_error(save_errno, rbuf_pp, rsize);
+
ifa_free_p = ifa_p;
*buf_p++ = INET_REP_OK;
for (; ifa_p; ifa_p = ifa_p->ifa_next) {
@@ -6532,6 +6614,19 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, ival));
break;
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ DEBUGF(("inet_set_opts(%ld): s=%d, t=%d TCP_NOPUSH=%d\r\n",
+ (long)desc->port, desc->s, type, ival));
+ break;
+#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
+#endif
+
#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
@@ -7693,6 +7788,16 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
proto = IPPROTO_TCP;
type = TCP_NODELAY;
break;
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ break;
+#else
+ *ptr++ = opt;
+ put_int32(0, ptr);
+ continue;
+#endif
#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
@@ -7839,8 +7944,8 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
* cmsg options and values
*/
PLACE_FOR(1+4, ptr);
- *ptr = opt;
- arg_ptr = ptr+1; /* Where to put total length */
+ *ptr++ = opt;
+ arg_ptr = ptr; /* Where to put total length */
arg_sz = 0; /* Total length */
for (cmsg_top = (struct cmsghdr*)(cmsgbuf.buf + cmsg_sz),
cmsg = (struct cmsghdr*)cmsgbuf.buf;
@@ -7852,7 +7957,6 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
PLACE_FOR(1+4, ptr); \
*ptr++ = OPT; \
put_cmsg_int32(cmsg, ptr); \
- ptr += 4; \
arg_sz += 1+4; \
continue; \
}
@@ -7866,7 +7970,6 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
PUT_CMSG_INT32(IPPROTO_IP, IP_TTL, INET_OPT_TTL);
#endif
/* BSD uses the RECV* names in CMSG fields */
- }
#if defined(IPPROTO_IP) && defined(IP_RECVTOS)
PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTOS, INET_OPT_TOS);
#endif
@@ -7877,6 +7980,7 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTTL, INET_OPT_TTL);
#endif
#undef PUT_CMSG_INT32
+ }
put_int32(arg_sz, arg_ptr); /* Put total length */
continue;
}
@@ -9677,6 +9781,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args)
desc->tcp_add_flags = 0;
desc->http_state = 0;
desc->mtd = NULL;
+ desc->mtd_cache = NULL;
desc->multi_first = desc->multi_last = NULL;
DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port));
return (ErlDrvData) desc;
@@ -9780,15 +9885,14 @@ static void tcp_close_check(tcp_descriptor* desc)
driver_demonitor_process(desc->inet.port, &monitor);
send_async_error(desc->inet.dport, id, caller, am_closed);
}
- clean_multi_timers(&(desc->mtd), desc->inet.port);
}
-
else if (desc->inet.state == INET_STATE_CONNECTING) {
async_error_am(INETP(desc), am_closed);
}
else if (desc->inet.state == INET_STATE_CONNECTED) {
async_error_am_all(INETP(desc), am_closed);
}
+ clean_multi_timers(desc, desc->inet.port);
}
/*
@@ -9831,6 +9935,15 @@ static void tcp_desc_close(tcp_descriptor* desc)
erl_inet_close(INETP(desc));
}
+static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(!desc->inet.active);
+ sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
+ desc->i_remain = 0;
+ async_error_am(INETP(desc), am_timeout);
+}
+
/* TCP requests from Erlang */
static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
char* buf, ErlDrvSizeT len,
@@ -10001,12 +10114,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
if (time_left <= 0) {
time_left = 1;
}
- omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller,
+ omtd = add_multi_timer(desc, desc->inet.port, ocaller,
time_left, &tcp_inet_multi_timeout);
}
enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor);
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -10021,7 +10134,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_xerror("noproc", rbuf, rsize);
}
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -10118,7 +10231,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
async_error_am(INETP(desc), am_timeout);
else {
if (timeout != INET_INFINITY)
- driver_set_timer(desc->inet.port, timeout);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ timeout, &tcp_inet_recv_timeout);
if (!INETP(desc)->is_ignored)
sock_select(INETP(desc),(FD_READ|FD_CLOSE),1);
else
@@ -10205,12 +10319,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
desc->tcp_add_flags |= TCP_ADDF_SENDFILE;
/* See if we can finish sending without selecting & rescheduling. */
- tcp_inet_sendfile(desc);
-
- if(desc->sendfile.length > 0) {
- sock_select(INETP(desc), FD_WRITE, 1);
+ if (tcp_inet_sendfile(desc) == 0) {
+ if(desc->sendfile.length > 0) {
+ sock_select(INETP(desc), FD_WRITE, 1);
+ }
}
-
return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
#else
return ctl_error(ENOTSUP, rbuf, rsize);
@@ -10224,12 +10337,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
}
+static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(IS_BUSY(INETP(desc)));
+ ASSERT(desc->busy_on_send);
+ desc->inet.caller = desc->inet.busy_caller;
+ desc->inet.state &= ~INET_F_BUSY;
+ desc->busy_on_send = 0;
+ set_busy_port(desc->inet.port, 0);
+ inet_reply_error_am(INETP(desc), am_timeout);
+ if (desc->send_timeout_close) {
+ tcp_desc_close(desc);
+ }
+}
+
/*
** tcp_inet_timeout:
** called when timer expire:
** TCP socket may be:
**
-** a) receiving -- deselect
+** a) receiving -- send timeout
** b) connecting -- close socket
** c) accepting -- reset listener
**
@@ -10243,26 +10371,9 @@ static void tcp_inet_timeout(ErlDrvData e)
DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n",
(long)desc->inet.port, desc->inet.s));
if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */
- fire_multi_timers(&(desc->mtd), desc->inet.port, e);
+ fire_multi_timers(desc, desc->inet.port, e);
} else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) {
- if (desc->busy_on_send) {
- ASSERT(IS_BUSY(INETP(desc)));
- desc->inet.caller = desc->inet.busy_caller;
- desc->inet.state &= ~INET_F_BUSY;
- desc->busy_on_send = 0;
- set_busy_port(desc->inet.port, 0);
- inet_reply_error_am(INETP(desc), am_timeout);
- if (desc->send_timeout_close) {
- tcp_desc_close(desc);
- }
- }
- else {
- /* assume recv timeout */
- ASSERT(!desc->inet.active);
- sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
- desc->i_remain = 0;
- async_error_am(INETP(desc), am_timeout);
- }
+ fire_multi_timers(desc, desc->inet.port, e);
}
else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) {
/* assume connect timeout */
@@ -10392,7 +10503,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
return;
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
if (desc->multi_first == NULL) {
sock_select(INETP(desc),FD_ACCEPT,0);
@@ -10423,6 +10534,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
#ifdef DEBUG
long port = (long) desc->inet.port; /* Used after driver_exit() */
#endif
+ int blocking_send = 0;
DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n",
port, desc->inet.s, __FILE__, __LINE__));
if (IS_BUSY(INETP(desc))) {
@@ -10430,7 +10542,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port));
}
@@ -10438,16 +10550,25 @@ static int tcp_recv_closed(tcp_descriptor* desc)
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port));
- } else {
+ blocking_send = 1;
+ }
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, ENOTCONN);
+ blocking_send = 1;
+ }
+#endif
+ if (!blocking_send) {
/* No blocking send op to reply to right now.
* If next op is a send, make sure it returns {error,closed}
* rather than {error,enotconn}.
*/
desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
}
+
if (!desc->inet.active) {
- /* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ /* We must cancel any timer here ! */
+ clean_multi_timers(desc, INETP(desc)->port);
/* passive mode do not terminate port ! */
tcp_clear_input(desc);
if (desc->inet.exitf) {
@@ -10482,16 +10603,21 @@ static int tcp_recv_error(tcp_descriptor* desc, int err)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
}
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, err);
+ }
+#endif
if (!desc->inet.active) {
/* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ clean_multi_timers(desc, INETP(desc)->port);
tcp_clear_input(desc);
if (desc->inet.exitf) {
tcp_desc_close(desc);
@@ -10596,13 +10722,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
if (len == 0) {
/* empty buffer or waiting for more input */
if ((desc->i_buf == NULL) || (desc->i_remain > 0))
- return count;
+ return 0;
if ((n = tcp_remain(desc, &len)) != 0) {
if (n < 0) /* packet error */
return n;
if (len > 0) /* more data pending */
desc->i_remain = len;
- return count;
+ return 0;
}
}
@@ -10654,9 +10780,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
len = 0;
if (!desc->inet.active) {
- if (!desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- }
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout);
sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
if (desc->i_buf != NULL)
tcp_restart_input(desc);
@@ -10682,7 +10806,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
int len;
int nread;
- if (desc->i_buf == NULL) { /* allocte a read buffer */
+ if (desc->i_buf == NULL) { /* allocate a read buffer */
int sz = (request_len > 0) ? request_len : desc->inet.bufsz;
if ((desc->i_buf = alloc_buffer(sz)) == NULL)
@@ -10755,10 +10879,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start);
}
else {
- if ((nread = tcp_remain(desc, &len)) < 0)
+ nread = tcp_remain(desc, &len);
+ if (nread < 0)
return tcp_recv_error(desc, EMSGSIZE);
else if (nread == 0)
- return tcp_deliver(desc, len);
+ return tcp_deliver(desc, len);
else if (len > 0)
desc->i_remain = len; /* set remain */
}
@@ -11077,7 +11202,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
driver_demonitor_process(desc->inet.port, &monitor);
@@ -11136,8 +11261,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
if (IS_BUSY(INETP(desc))) {
desc->inet.caller = desc->inet.busy_caller;
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- desc->busy_on_send = 0;
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
+ desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
@@ -11152,27 +11277,31 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n",
(long)desc->inet.port, __FILE__, __LINE__));
if (desc->inet.active) {
+ ErlDrvTermData err_atom;
if (show_econnreset) {
tcp_error_message(desc, err);
- tcp_closed_message(desc);
- inet_reply_error(INETP(desc), err);
+ err_atom = error_atom(err);
} else {
- tcp_closed_message(desc);
- inet_reply_error_am(INETP(desc), am_closed);
+ err_atom = am_closed;
}
+ tcp_closed_message(desc);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE))
+ inet_reply_error_am(INETP(desc), err_atom);
+
if (desc->inet.exitf)
driver_exit(desc->inet.port, 0);
else
tcp_desc_close(desc);
} else {
tcp_close_check(desc);
- tcp_desc_close(desc);
if (desc->inet.caller) {
- if (show_econnreset)
- inet_reply_error(INETP(desc), err);
- else
- inet_reply_error_am(INETP(desc), am_closed);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) {
+ if (show_econnreset)
+ inet_reply_error(INETP(desc), err);
+ else
+ inet_reply_error_am(INETP(desc), am_closed);
+ }
}
else {
/* No blocking send op to reply to right now.
@@ -11181,6 +11310,7 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
*/
desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
}
+ tcp_desc_close(desc);
/*
* Make sure that the next receive operation gets an {error,closed}
@@ -11237,6 +11367,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err)
return tcp_send_or_shutdown_error(desc, err);
}
+static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy)
+{
+ tcp_descriptor *desc = (tcp_descriptor*)data;
+ (void)tcp_inet_output(desc, INETP(desc)->s);
+}
+
/*
** Send non-blocking vector data
*/
@@ -11289,7 +11425,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -11304,7 +11442,10 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
INETP(desc)->is_ignored |= INET_IGNORE_WRITE;
n = 0;
} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
- n = 0;
+ driver_enqv(ix, ev, 0);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ 0, &tcp_inet_delay_send);
+ return 0;
} else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov,
vsize, &n, 0))) {
if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
@@ -11387,7 +11528,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -11491,7 +11634,8 @@ static int tcp_sendfile_completed(tcp_descriptor* desc) {
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port,
+ &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
@@ -11693,8 +11837,8 @@ socket_error: {
DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n",
(long)desc->inet.port, socket_errno, errno));
- result = tcp_send_error(desc, socket_errno);
tcp_sendfile_aborted(desc, socket_errno);
+ result = tcp_send_error(desc, socket_errno);
goto done;
}
@@ -11798,6 +11942,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
#ifdef __WIN32__
desc->inet.send_would_block = 1;
#endif
+ /* If DELAY_SEND is set ready_output may have
+ been called without doing select so we do
+ a select in order to get into the correct
+ state */
+ if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND)
+ sock_select(INETP(desc), FD_WRITE, 1);
goto done;
} else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning
0 when sending packets with
@@ -11823,7 +11973,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
set_busy_port(desc->inet.port, 0);
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
inet_reply_ok(INETP(desc));
@@ -12635,7 +12785,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_buf = NULL;
if (!desc->active) {
async_error(desc, err);
- driver_cancel_timer(desc->port);
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
}
else {
@@ -12724,7 +12874,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
return count;
count++;
if (!desc->active) {
- driver_cancel_timer(desc->port); /* possibly cancel */
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
return count; /* passive mode (read one packet only) */
}
@@ -12803,55 +12953,69 @@ make_noninheritable_handle(SOCKET s)
* Multi-timers
*/
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data)
{
ErlDrvTime next_timeout;
- if (!*first) {
+ MultiTimerData *curr = desc->mtd;
+ if (!curr) {
ASSERT(0);
return;
}
#ifdef DEBUG
{
ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC);
- ASSERT(chk >= (*first)->when);
+ ASSERT(chk >= curr->when);
}
#endif
do {
- MultiTimerData *save = *first;
- *first = save->next;
+ MultiTimerData *save = curr;
+
(*(save->timeout_function))(data,save->caller);
- FREE(save);
- if (*first == NULL) {
+
+ curr = curr->next;
+
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = save;
+ else
+ FREE(save);
+
+ if (curr == NULL) {
+ desc->mtd = NULL;
return;
}
- (*first)->prev = NULL;
- next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ curr->prev = NULL;
+ next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
} while (next_timeout <= 0);
+ desc->mtd = curr;
driver_set_timer(port, (unsigned long) next_timeout);
}
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port)
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port)
{
- MultiTimerData *p;
- if (*first) {
+ if (desc->mtd) {
driver_cancel_timer(port);
}
- while (*first) {
- p = *first;
- *first = p->next;
- FREE(p);
+ while (desc->mtd) {
+ MultiTimerData *p = desc->mtd;
+ desc->mtd = p->next;
+ FREE(p);
+ }
+ desc->mtd = NULL;
+ if (desc->mtd_cache) {
+ FREE(desc->mtd_cache);
+ desc->mtd_cache = NULL;
}
}
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p)
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p)
{
if (p->prev != NULL) {
p->prev->next = p->next;
} else {
driver_cancel_timer(port);
- *first = p->next;
- if (*first) {
- ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ desc->mtd = p->next;
+ if (desc->mtd) {
+ ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
if (ntmo < 0)
ntmo = 0;
driver_set_timer(port, (unsigned long) ntmo);
@@ -12860,36 +13024,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim
if (p->next != NULL) {
p->next->prev = p->prev;
}
- FREE(p);
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = p;
+ else
+ FREE(p);
}
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
+/* Cancel a timer based on the timeout_fun */
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller))
+{
+ MultiTimerData *timer = desc->mtd;
+ while(timer && timer->timeout_function != timeout_fun) {
+ timer = timer->next;
+ }
+ if (timer) {
+ remove_multi_timer(desc, port, timer);
+ }
+}
+
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvTermData caller, unsigned timeout,
void (*timeout_fun)(ErlDrvData drv_data,
ErlDrvTermData caller))
{
MultiTimerData *mtd, *p, *s;
- mtd = ALLOC(sizeof(MultiTimerData));
- mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1;
+
+ /* Use cached timer if available */
+ if (desc->mtd_cache != NULL) {
+ mtd = desc->mtd_cache;
+ desc->mtd_cache = NULL;
+ } else
+ mtd = ALLOC(sizeof(MultiTimerData));
+
+ if (timeout)
+ mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout);
+ else
+ mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */
+
mtd->timeout_function = timeout_fun;
mtd->caller = caller;
mtd->next = mtd->prev = NULL;
- for(p = *first,s = NULL; p != NULL; s = p, p = p->next) {
+
+ /* Find correct slot in timer linked list */
+ for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) {
if (p->when >= mtd->when) {
break;
}
}
+ /* Insert in linked list */
if (!p) {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
}
} else {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
@@ -12897,10 +13092,8 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
mtd->next = p;
p->prev = mtd;
}
+ /* Possibly set new timer */
if (!s) {
- if (mtd->next) {
- driver_cancel_timer(port);
- }
driver_set_timer(port,timeout);
}
return mtd;
diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c
index a05d50b333..5fff55b467 100644
--- a/erts/emulator/nifs/common/prim_file_nif.c
+++ b/erts/emulator/nifs/common/prim_file_nif.c
@@ -38,6 +38,9 @@ static void unload(ErlNifEnv *env, void* priv_data);
static ErlNifResourceType *efile_resource_type;
+static ERL_NIF_TERM am_erts_prim_file;
+static ERL_NIF_TERM am_close;
+
static ERL_NIF_TERM am_ok;
static ERL_NIF_TERM am_error;
static ERL_NIF_TERM am_continue;
@@ -96,11 +99,14 @@ static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+
+/* Internal ops */
+static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
static ERL_NIF_TERM get_handle_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-
/* All file handle operations are passed through a wrapper that handles state
* transitions, marking it as busy during the course of the operation, and
* closing on completion if the owner died in the middle of an operation.
@@ -128,7 +134,11 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]
*
* CLOSE_PENDING ->
* CLOSED (file_handle_wrapper)
- */
+ *
+ * Should the owner of a file die, we can't close it immediately as that could
+ * potentially block a normal scheduler. When entering the CLOSED state from
+ * owner_death_callback, we will instead send a message to the erts_prim_file
+ * process that will then close the file through delayed_close_nif. */
typedef ERL_NIF_TERM (*file_op_impl_t)(efile_data_t *d, ErlNifEnv *env,
int argc, const ERL_NIF_TERM argv[]);
@@ -142,7 +152,6 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env
return file_handle_wrapper( name ## _impl , env, argc, argv); \
}
-WRAP_FILE_HANDLE_EXPORT(close_nif)
WRAP_FILE_HANDLE_EXPORT(read_nif)
WRAP_FILE_HANDLE_EXPORT(write_nif)
WRAP_FILE_HANDLE_EXPORT(pread_nif)
@@ -193,18 +202,27 @@ static ErlNifFunc nif_funcs[] = {
/* Internal ops. */
{"get_handle_nif", 1, get_handle_nif},
+ {"delayed_close_nif", 1, delayed_close_nif, ERL_NIF_DIRTY_JOB_IO_BOUND},
{"altname_nif", 1, altname_nif, ERL_NIF_DIRTY_JOB_IO_BOUND},
};
ERL_NIF_INIT(prim_file, nif_funcs, load, NULL, upgrade, unload)
+static ErlNifPid erts_prim_file_pid;
+
static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon);
-static void gc_callback(ErlNifEnv *env, void* data);
-static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info)
+static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM prim_file_pid)
{
ErlNifResourceTypeInit callbacks;
+ if(!enif_get_local_pid(env, prim_file_pid, &erts_prim_file_pid)) {
+ ASSERT(!"bad pid passed to prim_file_nif");
+ }
+
+ am_erts_prim_file = enif_make_atom(env, "erts_prim_file");
+ am_close = enif_make_atom(env, "close");
+
am_ok = enif_make_atom(env, "ok");
am_error = enif_make_atom(env, "error");
am_continue = enif_make_atom(env, "continue");
@@ -239,7 +257,7 @@ static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info)
am_eof = enif_make_atom(env, "eof");
callbacks.down = owner_death_callback;
- callbacks.dtor = gc_callback;
+ callbacks.dtor = NULL;
callbacks.stop = NULL;
efile_resource_type = enif_open_resource_type_x(env, "efile", &callbacks,
@@ -305,8 +323,10 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env
/* This is the only point where a change from CLOSE_PENDING is
* possible, and we're running synchronously, so we can't race with
* anything else here. */
+ posix_errno_t ignored;
+
erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED);
- efile_close(d);
+ efile_close(d, &ignored);
}
} else {
/* CLOSE_PENDING should be impossible at this point since it requires
@@ -319,6 +339,24 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env
return result;
}
+/* This is a special close operation used by the erts_prim_file process for
+ * cleaning up orphaned files. It differs from the ordinary close_nif in that
+ * it only works for files that have already entered the CLOSED state. */
+static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+ posix_errno_t ignored;
+ efile_data_t *d;
+
+ ASSERT(argc == 1);
+ if(!get_file_data(env, argv[0], &d)) {
+ return enif_make_badarg(env);
+ }
+
+ ASSERT(erts_atomic32_read_acqb(&d->state) == EFILE_STATE_CLOSED);
+ efile_close(d, &ignored);
+
+ return am_ok;
+}
+
static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon) {
efile_data_t *d = (efile_data_t*)obj;
@@ -334,8 +372,24 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN
switch(previous_state) {
case EFILE_STATE_IDLE:
- efile_close(d);
- return;
+ {
+ /* We cannot close the file here as that could block a normal
+ * scheduler, so we tell erts_prim_file to do it for us.
+ *
+ * This can in turn become a bottleneck (especially in cases
+ * like NFS failure), but it's less problematic than blocking
+ * thread progress. */
+ ERL_NIF_TERM message, file_ref;
+
+ file_ref = enif_make_resource(env, d);
+ message = enif_make_tuple2(env, am_close, file_ref);
+
+ if(!enif_send(env, &erts_prim_file_pid, NULL, message)) {
+ ERTS_INTERNAL_ERROR("Failed to defer prim_file close.");
+ }
+
+ return;
+ }
case EFILE_STATE_CLOSE_PENDING:
case EFILE_STATE_CLOSED:
/* We're either already closed or managed to mark ourselves for
@@ -352,24 +406,6 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN
}
}
-static void gc_callback(ErlNifEnv *env, void* data) {
- efile_data_t *d = (efile_data_t*)data;
-
- enum efile_state_t previous_state;
-
- (void)env;
-
- previous_state = erts_atomic32_cmpxchg_acqb(&d->state,
- EFILE_STATE_CLOSED, EFILE_STATE_IDLE);
-
- ASSERT(previous_state != EFILE_STATE_CLOSE_PENDING &&
- previous_state != EFILE_STATE_BUSY);
-
- if(previous_state == EFILE_STATE_IDLE) {
- efile_close(d);
- }
-}
-
static ERL_NIF_TERM efile_filetype_to_atom(enum efile_filetype_t type) {
switch(type) {
case EFILE_FILETYPE_DEVICE: return am_device;
@@ -454,40 +490,62 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]
return posix_error_to_tuple(env, posix_errno);
}
- result = enif_make_resource(env, d);
- enif_release_resource(d);
-
enif_self(env, &controlling_process);
if(enif_monitor_process(env, d, &controlling_process, &d->monitor)) {
+ /* We need to close the file manually as we haven't registered a
+ * destructor. */
+ posix_errno_t ignored;
+
+ erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED);
+ efile_close(d, &ignored);
+
return posix_error_to_tuple(env, EINVAL);
}
+ /* Note that we do not call enif_release_resource at this point. While it's
+ * normally safe to leave resource management to the GC, efile_close is a
+ * blocking operation which must not be done in the GC callback, and we
+ * can't defer it as the resource is gone as soon as it returns.
+ *
+ * We instead keep the resource alive until efile_close is called, after
+ * which it's safe to leave things to the GC. If the controlling process
+ * were to die before the user had a chance to close their file, the above
+ * monitor will tell the erts_prim_file process to close it for them. */
+ result = enif_make_resource(env, d);
+
return enif_make_tuple2(env, am_ok, result);
}
-static ERL_NIF_TERM close_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
enum efile_state_t previous_state;
+ efile_data_t *d;
- if(argc != 0) {
+ ASSERT(argc == 1);
+ if(!get_file_data(env, argv[0], &d)) {
return enif_make_badarg(env);
}
previous_state = erts_atomic32_cmpxchg_acqb(&d->state,
- EFILE_STATE_CLOSED, EFILE_STATE_BUSY);
+ EFILE_STATE_CLOSED, EFILE_STATE_IDLE);
- ASSERT(previous_state == EFILE_STATE_CLOSE_PENDING ||
- previous_state == EFILE_STATE_BUSY);
+ if(previous_state == EFILE_STATE_IDLE) {
+ posix_errno_t error;
- if(previous_state == EFILE_STATE_BUSY) {
enif_demonitor_process(env, d, &d->monitor);
- if(!efile_close(d)) {
- return posix_error_to_tuple(env, d->posix_errno);
+ if(!efile_close(d, &error)) {
+ return posix_error_to_tuple(env, error);
}
- }
- return am_ok;
+ return am_ok;
+ } else {
+ /* CLOSE_PENDING should be impossible at this point since it requires
+ * a transition from BUSY; the only valid state here is CLOSED. */
+ ASSERT(previous_state == EFILE_STATE_CLOSED);
+
+ return posix_error_to_tuple(env, EINVAL);
+ }
}
static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
@@ -514,6 +572,7 @@ static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, con
ASSERT(bytes_read <= block_size);
if(bytes_read < 0) {
+ enif_release_binary(&result);
return posix_error_to_tuple(env, d->posix_errno);
} else if(bytes_read == 0) {
enif_release_binary(&result);
@@ -576,6 +635,7 @@ static ERL_NIF_TERM pread_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, co
bytes_read = efile_preadv(d, offset, read_vec, 1);
if(bytes_read < 0) {
+ enif_release_binary(&result);
return posix_error_to_tuple(env, d->posix_errno);
} else if(bytes_read == 0) {
enif_release_binary(&result);
@@ -802,6 +862,7 @@ static ERL_NIF_TERM ipread_s32bu_p32bu_nif_impl(efile_data_t *d, ErlNifEnv *env,
bytes_read = efile_preadv(d, payload_offset, read_vec, 1);
if(bytes_read < 0) {
+ enif_release_binary(&payload);
return posix_error_to_tuple(env, d->posix_errno);
} else if(bytes_read == 0) {
enif_release_binary(&payload);
@@ -872,7 +933,7 @@ static ERL_NIF_TERM set_permissions_nif(ErlNifEnv *env, int argc, const ERL_NIF_
posix_errno_t posix_errno;
efile_path_t path;
- Uint32 permissions;
+ unsigned int permissions;
if(argc != 2 || !enif_get_uint(env, argv[1], &permissions)) {
return enif_make_badarg(env);
@@ -891,7 +952,7 @@ static ERL_NIF_TERM set_owner_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
posix_errno_t posix_errno;
efile_path_t path;
- Sint32 uid, gid;
+ int uid, gid;
if(argc != 3 || !enif_get_int(env, argv[1], &uid)
|| !enif_get_int(env, argv[2], &gid)) {
@@ -1187,7 +1248,7 @@ static posix_errno_t read_file(efile_data_t *d, size_t size, ErlNifBinary *resul
}
static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
- posix_errno_t posix_errno;
+ posix_errno_t posix_errno, ignored;
efile_fileinfo_t info = {0};
efile_path_t path;
@@ -1208,7 +1269,9 @@ static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
}
posix_errno = read_file(d, info.size, &result);
- enif_release_resource(d);
+
+ erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED);
+ efile_close(d, &ignored);
if(posix_errno) {
return posix_error_to_tuple(env, posix_errno);
diff --git a/erts/emulator/nifs/common/prim_file_nif.h b/erts/emulator/nifs/common/prim_file_nif.h
index 099c06c48c..b2e30c59dd 100644
--- a/erts/emulator/nifs/common/prim_file_nif.h
+++ b/erts/emulator/nifs/common/prim_file_nif.h
@@ -159,8 +159,11 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
ErlNifResourceType *nif_type, efile_data_t **d);
/** @brief Closes a file. The file must have entered the CLOSED state prior to
- * calling this to prevent double close. */
-int efile_close(efile_data_t *d);
+ * calling this to prevent double close.
+ *
+ * Note that the file is completely invalid after this point, so the error code
+ * is provided in \c error rather than d->posix_errno. */
+int efile_close(efile_data_t *d, posix_errno_t *error);
/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
diff --git a/erts/emulator/nifs/unix/unix_prim_file.c b/erts/emulator/nifs/unix/unix_prim_file.c
index dea73db18a..169b193993 100644
--- a/erts/emulator/nifs/unix/unix_prim_file.c
+++ b/erts/emulator/nifs/unix/unix_prim_file.c
@@ -202,21 +202,24 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
return errno;
}
-int efile_close(efile_data_t *d) {
+int efile_close(efile_data_t *d, posix_errno_t *error) {
efile_unix_t *u = (efile_unix_t*)d;
int fd;
+ ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER);
ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED);
ASSERT(u->fd != -1);
fd = u->fd;
u->fd = -1;
+ enif_release_resource(d);
+
/* close(2) either always closes (*BSD, Linux) or leaves the fd in an
* undefined state (POSIX 2008, Solaris), so we must not retry on EINTR. */
if(close(fd) < 0) {
- u->common.posix_errno = errno;
+ *error = errno;
return 0;
}
diff --git a/erts/emulator/nifs/win32/win_prim_file.c b/erts/emulator/nifs/win32/win_prim_file.c
index f7fae3c637..d0aa70542f 100644
--- a/erts/emulator/nifs/win32/win_prim_file.c
+++ b/erts/emulator/nifs/win32/win_prim_file.c
@@ -33,16 +33,32 @@
#define FILE_SHARE_FLAGS (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE)
-#define LP_PREFIX L"\\\\?\\"
-#define LP_PREFIX_SIZE (sizeof(LP_PREFIX) - sizeof(WCHAR))
+/* Long paths can either be in the file (?) or the device (.) namespace. UNC
+ * paths are always in the file namespace. */
+#define LP_FILE_PREFIX L"\\\\?\\"
+#define LP_DEV_PREFIX L"\\\\.\\"
+#define LP_UNC_PREFIX (LP_FILE_PREFIX L"UNC\\")
+
+#define LP_PREFIX_SIZE (sizeof(LP_FILE_PREFIX) - sizeof(WCHAR))
#define LP_PREFIX_LENGTH (LP_PREFIX_SIZE / sizeof(WCHAR))
+#define LP_UNC_PREFIX_SIZE (sizeof(LP_UNC_PREFIX) - sizeof(WCHAR))
+#define LP_UNC_PREFIX_LENGTH (LP_UNC_PREFIX_SIZE / sizeof(WCHAR))
+
+#define IS_LONG_PATH(length, data) \
+ ((length) >= LP_PREFIX_LENGTH && \
+ (!sys_memcmp((data), LP_FILE_PREFIX, LP_PREFIX_SIZE) || \
+ !sys_memcmp((data), LP_DEV_PREFIX, LP_PREFIX_SIZE)))
+
+#define IS_LONG_UNC_PATH(length, data) \
+ ((length) >= LP_UNC_PREFIX_LENGTH && \
+ !sys_memcmp((data), LP_UNC_PREFIX, LP_UNC_PREFIX_SIZE))
+
#define PATH_LENGTH(path) (path->size / sizeof(WCHAR) - 1)
#define ASSERT_PATH_FORMAT(path) \
do { \
- ASSERT(PATH_LENGTH(path) >= 4 && \
- !memcmp(path->data, LP_PREFIX, LP_PREFIX_SIZE)); \
+ ASSERT(IS_LONG_PATH(PATH_LENGTH(path), (path)->data)); \
ASSERT(PATH_LENGTH(path) == wcslen((WCHAR*)path->data)); \
} while(0)
@@ -106,7 +122,7 @@ static posix_errno_t get_full_path(ErlNifEnv *env, WCHAR *input, efile_path_t *r
return ENOENT;
}
- maximum_length += LP_PREFIX_LENGTH;
+ maximum_length += MAX(LP_PREFIX_LENGTH, LP_UNC_PREFIX_LENGTH);
if(!enif_alloc_binary(maximum_length * sizeof(WCHAR), result)) {
return ENOMEM;
@@ -115,18 +131,28 @@ static posix_errno_t get_full_path(ErlNifEnv *env, WCHAR *input, efile_path_t *r
actual_length = GetFullPathNameW(input, maximum_length, (WCHAR*)result->data, NULL);
if(actual_length < maximum_length) {
- int has_long_path_prefix;
+ int is_long_path, maybe_unc_path;
WCHAR *path_start;
- /* Make sure we have a long-path prefix; GetFullPathNameW only adds one
- * if the path is relative. */
- has_long_path_prefix = actual_length >= LP_PREFIX_LENGTH &&
- !sys_memcmp(result->data, LP_PREFIX, LP_PREFIX_SIZE);
-
- if(!has_long_path_prefix) {
+ /* The APIs we use have varying path length limits and sometimes
+ * behave differently when given a long-path prefix, so it's simplest
+ * to always use long paths. */
+
+ is_long_path = IS_LONG_PATH(actual_length, result->data);
+ maybe_unc_path = !sys_memcmp(result->data, L"\\\\", sizeof(WCHAR) * 2);
+
+ if(maybe_unc_path && !is_long_path) {
+ /* \\localhost\c$\gurka -> \\?\UNC\localhost\c$\gurka */
+ sys_memmove(result->data + LP_UNC_PREFIX_SIZE,
+ &((WCHAR*)result->data)[2],
+ (actual_length - 1) * sizeof(WCHAR));
+ sys_memcpy(result->data, LP_UNC_PREFIX, LP_UNC_PREFIX_SIZE);
+ actual_length += LP_UNC_PREFIX_LENGTH;
+ } else if(!is_long_path) {
+ /* C:\gurka -> \\?\C:\gurka */
sys_memmove(result->data + LP_PREFIX_SIZE, result->data,
(actual_length + 1) * sizeof(WCHAR));
- sys_memcpy(result->data, LP_PREFIX, LP_PREFIX_SIZE);
+ sys_memcpy(result->data, LP_FILE_PREFIX, LP_PREFIX_SIZE);
actual_length += LP_PREFIX_LENGTH;
}
@@ -200,13 +226,19 @@ static int normalize_path_result(ErlNifBinary *path) {
ASSERT(length < path->size / sizeof(WCHAR));
/* Get rid of the long-path prefix, if present. */
- if(length >= LP_PREFIX_LENGTH) {
- if(!sys_memcmp(path_start, LP_PREFIX, LP_PREFIX_SIZE)) {
- length -= LP_PREFIX_LENGTH;
- sys_memmove(path_start, &path_start[LP_PREFIX_LENGTH],
- length * sizeof(WCHAR));
- }
+ if(IS_LONG_UNC_PATH(length, path_start)) {
+ /* The first two characters (\\) are the same for both long and short
+ * UNC paths. */
+ sys_memmove(&path_start[2], &path_start[LP_UNC_PREFIX_LENGTH],
+ (length - LP_UNC_PREFIX_LENGTH) * sizeof(WCHAR));
+
+ length -= LP_UNC_PREFIX_LENGTH - 2;
+ } else if(IS_LONG_PATH(length, path_start)) {
+ length -= LP_PREFIX_LENGTH;
+
+ sys_memmove(path_start, &path_start[LP_PREFIX_LENGTH],
+ length * sizeof(WCHAR));
}
path_end = &path_start[length];
@@ -318,49 +350,55 @@ static int has_same_mount_point(const efile_path_t *path_a, const efile_path_t *
/* Mirrors the PathIsRootW function of the shell API, but doesn't choke on
* paths longer than MAX_PATH. */
static int is_path_root(const efile_path_t *path) {
- const WCHAR *path_start, *path_end;
+ const WCHAR *path_start, *path_end, *path_iterator;
int length;
ASSERT_PATH_FORMAT(path);
- path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH;
- length = PATH_LENGTH(path) - LP_PREFIX_LENGTH;
-
- path_end = &path_start[length];
+ if(!IS_LONG_UNC_PATH(PATH_LENGTH(path), path->data)) {
+ path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH;
+ length = PATH_LENGTH(path) - LP_PREFIX_LENGTH;
- if(length == 1) {
/* A single \ refers to the root of the current working directory. */
- return IS_SLASH(path_start[0]);
- } else if(length == 3 && iswalpha(path_start[0]) && path_start[1] == L':') {
- /* Drive letter. */
- return IS_SLASH(path_start[2]);
- } else if(length >= 4) {
- /* Check whether we're a UNC root, eg. \\server, \\server\share */
- const WCHAR *path_iterator;
+ if(length == 1) {
+ return IS_SLASH(path_start[0]);
+ }
- if(!IS_SLASH(path_start[0]) || !IS_SLASH(path_start[1])) {
- return 0;
+ /* Drive letter. */
+ if(length == 3 && iswalpha(path_start[0]) && path_start[1] == L':') {
+ return IS_SLASH(path_start[2]);
}
- path_iterator = path_start + 2;
+ return 0;
+ }
- /* Slide to the slash between the server and share names, if present. */
- while(path_iterator < path_end && !IS_SLASH(*path_iterator)) {
- path_iterator++;
- }
+ /* Check whether we're a UNC root, eg. \\server, \\server\share */
- /* Slide past the end of the string, stopping at the first slash we
- * encounter. */
- do {
- path_iterator++;
- } while(path_iterator < path_end && !IS_SLASH(*path_iterator));
+ path_start = (WCHAR*)path->data + LP_UNC_PREFIX_LENGTH;
+ length = PATH_LENGTH(path) - LP_UNC_PREFIX_LENGTH;
- /* If we're past the end of the string and it didnt't end with a slash,
- * then we're a root path. */
- return path_iterator >= path_end && !IS_SLASH(path_start[length - 1]);
+ path_end = &path_start[length];
+ path_iterator = path_start;
+
+ /* Server name must be at least one character. */
+ if(length <= 1) {
+ return 0;
}
- return 0;
+ /* Slide to the slash between the server and share names, if present. */
+ while(path_iterator < path_end && !IS_SLASH(*path_iterator)) {
+ path_iterator++;
+ }
+
+ /* Slide past the end of the string, stopping at the first slash we
+ * encounter. */
+ do {
+ path_iterator++;
+ } while(path_iterator < path_end && !IS_SLASH(*path_iterator));
+
+ /* If we're past the end of the string and it didnt't end with a slash,
+ * then we're a root path. */
+ return path_iterator >= path_end && !IS_SLASH(path_start[length - 1]);
}
posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
@@ -428,18 +466,21 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
}
}
-int efile_close(efile_data_t *d) {
+int efile_close(efile_data_t *d, posix_errno_t *error) {
efile_win_t *w = (efile_win_t*)d;
HANDLE handle;
+ ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER);
ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED);
ASSERT(w->handle != INVALID_HANDLE_VALUE);
handle = w->handle;
w->handle = INVALID_HANDLE_VALUE;
+ enif_release_resource(d);
+
if(!CloseHandle(handle)) {
- w->common.posix_errno = windows_to_posix_errno(GetLastError());
+ *error = windows_to_posix_errno(GetLastError());
return 0;
}
@@ -687,7 +728,7 @@ static int is_name_surrogate(const efile_path_t *path) {
if(handle != INVALID_HANDLE_VALUE) {
REPARSE_GUID_DATA_BUFFER reparse_buffer;
- LPDWORD unused_length;
+ DWORD unused_length;
BOOL success;
success = DeviceIoControl(handle,
@@ -1248,11 +1289,22 @@ posix_errno_t efile_set_cwd(const efile_path_t *path) {
/* We have to use _wchdir since that's the only function that updates the
* per-drive working directory, but it naively assumes that all paths
- * starting with \\ are UNC paths, so we have to skip the \\?\-prefix. */
- path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH;
+ * starting with \\ are UNC paths, so we have to skip the long-path prefix.
+ *
+ * _wchdir doesn't handle long-prefixed UNC paths either so we hand those
+ * to SetCurrentDirectoryW instead. The per-drive working directory is
+ * irrelevant for such paths anyway. */
- if(_wchdir(path_start)) {
- return windows_to_posix_errno(GetLastError());
+ if(!IS_LONG_UNC_PATH(PATH_LENGTH(path), path->data)) {
+ path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH;
+
+ if(_wchdir(path_start)) {
+ return windows_to_posix_errno(GetLastError());
+ }
+ } else {
+ if(!SetCurrentDirectoryW((WCHAR*)path->data)) {
+ return windows_to_posix_errno(GetLastError());
+ }
}
return 0;
@@ -1333,7 +1385,7 @@ posix_errno_t efile_altname(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TE
int name_length;
/* Reject path wildcards. */
- if(wcspbrk(&((const WCHAR*)path->data)[4], L"?*")) {
+ if(wcspbrk(&((const WCHAR*)path->data)[LP_PREFIX_LENGTH], L"?*")) {
return ENOENT;
}
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index 9f115706dc..c681fa481f 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -46,11 +46,11 @@
#if 0
#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__)
#define DEBUG_PRINT_FD(FMT, STATE, ...) \
- DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \
+ DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \
(STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \
ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \
ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \
- (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR)
+ (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR)
#define DEBUG_PRINT_MODE
#else
#define DEBUG_PRINT(...)
@@ -76,22 +76,40 @@ typedef enum {
typedef enum {
ERTS_EV_FLAG_CLEAR = 0,
ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */
-#ifdef ERTS_ENABLE_KERNEL_POLL
- ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated
+ to scheduler pollset */
+ ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in
+ scheduler pollset */
+#else
+ ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+ ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+#endif
+#ifdef ERTS_POLL_USE_FALLBACK
+ ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd
and it was put in the nkp version */
#else
ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR,
#endif
/* Combinations */
- ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK
+ ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK,
+ ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER
} EventStateFlags;
#define flag2str(flags) \
((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \
((flags) == ERTS_EV_FLAG_USED ? "USED" : \
((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \
- ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR"))))
+ ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \
+ ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \
+ ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \
+ "ERROR"))))))))
/* How many events that can be handled at once by one erts_poll_wait call */
#define ERTS_CHECK_IO_POLL_RES_LEN 512
@@ -105,6 +123,7 @@ typedef struct erts_poll_thread
{
ErtsPollSet *ps;
ErtsPollResFd *pollres;
+ ErtsThrPrgrData *tpd;
int pollres_len;
} ErtsPollThread;
@@ -112,10 +131,13 @@ typedef struct erts_poll_thread
* Which pollset to use is determined by hashing the fd.
*/
static ErtsPollSet **pollsetv;
+static ErtsPollThread *psiv;
#if ERTS_POLL_USE_FALLBACK
static ErtsPollSet *flbk_pollset;
#endif
-static ErtsPollThread *psiv;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ErtsPollSet *sched_pollset;
+#endif
typedef struct {
#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
@@ -130,10 +152,12 @@ typedef struct {
ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */
} stop;
} driver;
- ErtsPollEvents events; /* The events that have been selected upon */
+ ErtsPollEvents events; /* The events that have been selected upon */
ErtsPollEvents active_events; /* The events currently active in the pollset */
EventStateType type;
EventStateFlags flags;
+ int count; /* Number of times this fd has triggered
+ without being deselected. */
} ErtsDrvEventState;
struct drv_ev_state_shared {
@@ -370,12 +394,22 @@ get_pollset(ErtsSysFdType fd)
#if ERTS_POLL_USE_FALLBACK
static ERTS_INLINE ErtsPollSet *
-get_fallback(void)
+get_fallback_pollset(void)
{
return flbk_pollset;
}
#endif
+static ERTS_INLINE ErtsPollSet *
+get_scheduler_pollset(ErtsSysFdType fd)
+{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ return sched_pollset;
+#else
+ return get_pollset(fd);
+#endif
+}
+
/*
* Place a fd within a pollset. This will automatically use
* the fallback ps if needed.
@@ -391,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op,
ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
if (!(flags & ERTS_EV_FLAG_FALLBACK)) {
- res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+
+ if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) {
+ erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
+ if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) {
+ res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+ } else {
+ res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ }
#if ERTS_POLL_USE_FALLBACK
if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) {
/* When an add fails with NVAL, the poll/kevent operation could not
put that fd in the pollset, so we instead put it into a fallback pollset */
state->flags |= ERTS_EV_FLAG_FALLBACK;
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
}
} else {
ASSERT(op != ERTS_POLL_OP_ADD);
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
#endif
}
@@ -425,59 +468,77 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task);
ErtsSysFdType fd = itp->fd;
erts_mtx_t *mtx = fd_mtx(fd);
- int active_events;
+ ErtsPollOp op = ERTS_POLL_OP_MOD;
+ int active_events, new_events = 0;
ErtsDrvEventState *state;
ErtsDrvSelectDataState *free_select = NULL;
ErtsNifSelectDataState *free_nif = NULL;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO);
+
erts_mtx_lock(mtx);
state = get_drv_ev_state(fd);
+ reset_handle(pthp);
+
active_events = state->active_events;
- switch (type) {
- case ERTS_PORT_TASK_INPUT:
+ if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) {
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+
+ DEBUG_PRINT_FD("executed ready_input", state);
+
+ ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
+ if (state->events & ERTS_POLL_EV_IN) {
+ active_events |= ERTS_POLL_EV_IN;
+ if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) {
+ if (!(state->flags & ERTS_EV_FLAG_SCHEDULER))
+ op = ERTS_POLL_OP_ADD;
+ state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER;
+ new_events = ERTS_POLL_EV_IN;
+ DEBUG_PRINT_FD("moving to scheduler ps", state);
+ } else
+ new_events = active_events;
+ if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING)
+ state->count++;
+ }
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
- DEBUG_PRINT_FD("executed ready_input", state);
+ DEBUG_PRINT_FD("executed ready_output", state);
- ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
- if (state->events & ERTS_POLL_EV_IN)
- active_events |= ERTS_POLL_EV_IN;
- break;
- case ERTS_PORT_TASK_OUTPUT:
+ ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
+ if (state->events & ERTS_POLL_EV_OUT) {
+ active_events |= ERTS_POLL_EV_OUT;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN)
+ new_events = ERTS_POLL_EV_OUT;
+ else
+ new_events = active_events;
+ }
+ break;
+ default:
+ erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
+ break;
+ }
- DEBUG_PRINT_FD("executed ready_output", state);
+ if (state->active_events != active_events && new_events) {
+ state->active_events = active_events;
+ new_events = erts_io_control(state, op, new_events);
+ }
- ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
- if (state->events & ERTS_POLL_EV_OUT)
- active_events |= ERTS_POLL_EV_OUT;
- break;
- default:
- erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
- break;
+ /* We were unable to re-insert the fd into the pollset, signal the callback. */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (state->active_events & ERTS_POLL_EV_IN)
+ iready(state->driver.select->inport, state);
+ if (state->active_events & ERTS_POLL_EV_OUT)
+ oready(state->driver.select->outport, state);
+ state->active_events = 0;
+ }
}
- reset_handle(pthp);
-
- if (active_events) {
- /* This is not needed if active_events has not changed */
- if (state->active_events != active_events) {
- ErtsPollEvents new_events;
- state->active_events = active_events;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events);
-
- /* We were unable to re-insert the fd into the pollset, signal the callback. */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- if (active_events & ERTS_POLL_EV_IN)
- iready(state->driver.select->inport, state);
- if (active_events & ERTS_POLL_EV_OUT)
- oready(state->driver.select->outport, state);
- state->active_events = 0;
- }
- }
- } else {
+ if (!active_events)
check_fd_cleanup(state, &free_select, &free_nif);
- }
erts_mtx_unlock(mtx);
@@ -485,6 +546,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
free_drv_select_data(free_select);
if (free_nif)
free_nif_select_data(free_nif);
+
+ ERTS_MSACC_POP_STATE_M_X();
}
static ERTS_INLINE void
@@ -755,11 +818,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) {
ctl_op = ERTS_POLL_OP_ADD;
}
+ new_events = state->active_events;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ new_events &= ~ERTS_POLL_EV_IN;
}
else {
ctl_events &= old_events;
state->events &= ~ctl_events;
state->active_events &= ~ctl_events;
+ new_events = state->active_events;
+
+ if (ctl_events & ERTS_POLL_EV_IN) {
+ state->count = 0;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ new_events = 0;
+ }
+ }
if (!state->events) {
if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE)
@@ -770,7 +844,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
new_events = erts_io_control_wakeup(state, ctl_op,
- state->active_events,
+ new_events,
&wake_poller);
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE);
@@ -802,6 +876,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events & ERTS_POLL_EV_IN) {
abort_tasks(state, ERL_DRV_READ);
state->driver.select->inport = NIL;
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
}
if (ctl_events & ERTS_POLL_EV_OUT) {
abort_tasks(state, ERL_DRV_WRITE);
@@ -810,6 +885,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (state->events == 0) {
if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
state->type = ERTS_EV_TYPE_NONE;
+ if (state->flags & ERTS_EV_FLAG_SCHEDULER)
+ erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
state->flags = 0;
}
/*else keep it, as fd will probably be selected upon again */
@@ -1426,7 +1503,8 @@ iready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_INPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) {
stale_drv_select(id, state, ERL_DRV_READ);
} else {
DEBUG_PRINT_FD("schedule ready_input(%T, %d)",
@@ -1444,7 +1522,8 @@ oready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_OUTPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ 0) != 0) {
stale_drv_select(id, state, ERL_DRV_WRITE);
} else {
DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd);
@@ -1506,7 +1585,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set)
{
if (psi) {
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
erts_poll_interrupt_flbk(psi->ps, set);
return;
}
@@ -1516,12 +1595,13 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set)
}
ErtsPollThread *
-erts_create_pollset_thread(int id) {
+erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) {
+ psiv[id].tpd = tpd;
return psiv+id;
}
void
-erts_check_io(ErtsPollThread *psi)
+erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time)
{
int pollres_len;
int poll_ret, i;
@@ -1536,14 +1616,14 @@ erts_check_io(ErtsPollThread *psi)
pollres_len = psi->pollres_len;
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
- poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len);
+ poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
} else
#endif
{
- poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len);
+ poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
}
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -1579,7 +1659,12 @@ erts_check_io(ErtsPollThread *psi)
ErtsNifSelectDataState *free_nif = NULL;
ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]);
ErtsDrvEventState *state;
- ErtsPollEvents revents;
+ ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
+
+ /* The fd will be set to -1 if a pollset internal fd was triggered
+ that was determined to be too expensive to remove from the result.
+ */
+ if (fd == -1) continue;
erts_mtx_lock(fd_mtx(fd));
@@ -1590,8 +1675,6 @@ erts_check_io(ErtsPollThread *psi)
continue;
}
- revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
-
DEBUG_PRINT_FD("triggered %s", state, ev2str(revents));
if (revents & ERTS_POLL_EV_ERR) {
@@ -1603,25 +1686,39 @@ erts_check_io(ErtsPollThread *psi)
*/
revents = state->active_events;
state->active_events = 0;
+
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ erts_io_control(state, ERTS_POLL_OP_MOD, 0);
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
} else {
/* Disregard any events that are not active at the moment,
for instance this could happen if the driver/nif does
select/deselect in rapid succession. */
revents &= state->active_events | ERTS_POLL_EV_NVAL;
- state->active_events &= ~revents;
- /* Reactivate the poll op if there are still active events */
- if (state->active_events) {
- ErtsPollEvents new_events;
- DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events));
+ if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) {
+ ErtsPollEvents reactive_events;
+ state->active_events &= ~revents;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events);
+ reactive_events = state->active_events;
- /* Unable to re-enable the fd, signal all callbacks */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- revents |= state->active_events;
- state->active_events = 0;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ reactive_events &= ~ERTS_POLL_EV_IN;
+
+ /* Reactivate the poll op if there are still active events */
+ if (reactive_events) {
+ ErtsPollEvents new_events;
+ DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events));
+
+ new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events);
+
+ /* Unable to re-enable the fd, signal all callbacks */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ revents |= reactive_events;
+ state->active_events &= ~reactive_events;
+ }
}
}
}
@@ -1697,7 +1794,7 @@ erts_check_io(ErtsPollThread *psi)
case ERTS_EV_TYPE_STOP_USE: {
#if ERTS_POLL_USE_FALLBACK
- ASSERT(psi->ps == get_fallback());
+ ASSERT(psi->ps == get_fallback_pollset());
#endif
drv_ptr = state->driver.stop.drv_ptr;
state->type = ERTS_EV_TYPE_NONE;
@@ -2035,12 +2132,17 @@ erts_init_check_io(int *argc, char **argv)
for (j=0; j < erts_no_pollsets; j++)
pollsetv[j] = erts_poll_create_pollset(j);
-#if ERTS_POLL_USE_FALLBACK
- flbk_pollset = erts_poll_create_pollset_flbk(-1);
+ no_poll_threads = erts_no_poll_threads;
+
+ j = -1;
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ sched_pollset = erts_poll_create_pollset(j--);
+ no_poll_threads++;
#endif
- no_poll_threads = erts_no_poll_threads;
#if ERTS_POLL_USE_FALLBACK
+ flbk_pollset = erts_poll_create_pollset_flbk(j--);
no_poll_threads++;
#endif
@@ -2050,7 +2152,15 @@ erts_init_check_io(int *argc, char **argv)
psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
- psiv[0].ps = get_fallback();
+ psiv[0].ps = get_fallback_pollset();
+ psiv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
+ psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
+ psiv[0].ps = get_scheduler_pollset(0);
psiv++;
#endif
@@ -2107,7 +2217,12 @@ erts_check_io_size(void)
int i;
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info(get_fallback(), &pi);
+ erts_poll_info(get_fallback_pollset(), &pi);
+ res += pi.memory_size;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &pi);
res += pi.memory_size;
#endif
@@ -2139,13 +2254,21 @@ erts_check_io_info(void *proc)
Uint sz, *szp, *hp, **hpp;
ErtsPollInfo *piv;
Sint i, j = 0, len;
- int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK;
+ int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING;
ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1);
+ ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1);
piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets);
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info_flbk(get_fallback(), &piv[0]);
+ erts_poll_info_flbk(get_fallback_pollset(), &piv[0]);
+ piv[0].poll_threads = 1;
+ piv[0].active_fds = 0;
+ piv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &piv[0]);
piv[0].poll_threads = 1;
piv[0].active_fds = 0;
piv++;
@@ -2199,6 +2322,7 @@ erts_check_io_info(void *proc)
sz = 0;
piv -= ERTS_POLL_USE_FALLBACK;
+ piv -= ERTS_POLL_USE_SCHEDULER_POLLING;
bld_it:
@@ -2303,15 +2427,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev)
static ERTS_INLINE void
print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f)
{
- const char* delim = "";
- if(f & ERTS_EV_FLAG_USED) {
- erts_dsprintf(dsbufp, "%s","USED");
- delim = "|";
- }
- if(f & ERTS_EV_FLAG_FALLBACK) {
- erts_dsprintf(dsbufp, "%s%s", delim, "FLBK");
- delim = "|";
- }
+ erts_dsprintf(dsbufp, "%s", flag2str(f));
}
#ifdef DEBUG_PRINT_MODE
@@ -2653,13 +2769,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
#if ERTS_POLL_USE_FALLBACK
erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n");
- erts_poll_get_selected_events_flbk(get_fallback(), counters.epep,
+ erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep,
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
}
#endif
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n");
+ erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep,
+ drv_ev_state.max_fds);
+ for (fd = 0; fd < len; fd++) {
+ if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) {
+ if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE)
+ counters.epep[fd] &= ~ERTS_POLL_EV_OUT;
+ doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
+ }
+#endif
+
erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n");
for (i = 0; i < erts_no_pollsets; i++) {
@@ -2668,8 +2797,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
- && get_pollset_id(fd) == i)
+ && get_pollset_id(fd) == i) {
+ if (counters.epep[fd] != ERTS_POLL_EV_NONE &&
+ drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ /* We add the in flag if it is enabled in the scheduler pollset
+ and get_selected_events works on the platform */
+ counters.epep[fd] |= ERTS_POLL_EV_IN;
+ }
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
}
}
for (fd = len ; fd < drv_ev_state.max_fds; fd++) {
@@ -2716,7 +2852,7 @@ void erts_lcnt_update_cio_locks(int enable) {
#endif
#if ERTS_POLL_USE_FALLBACK
- erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable);
+ erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable);
#endif
for (i = 0; i < erts_no_pollsets; i++)
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index 443ef1264c..31182be5ec 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -68,7 +68,7 @@ int erts_check_io_max_files(void);
*
* @param pt the poll thread structure to use.
*/
-void erts_check_io(struct erts_poll_thread *pt);
+void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time);
/**
* Initialize the check io framework. This function will parse the arguments
* and delete any entries that it is interested in.
@@ -90,8 +90,11 @@ void erts_check_io_interrupt(struct erts_poll_thread *pt, int set);
/**
* Create a new poll thread structure that is associated with the number no.
* It is the callers responsibility that no is unique.
+ *
+ * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler
+ * @param tpd the thread progress data of the pollset thread
*/
-struct erts_poll_thread* erts_create_pollset_thread(int no);
+struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd);
#ifdef ERTS_ENABLE_LOCK_COUNT
/**
* Toggle lock counting on all check io locks
@@ -126,16 +129,6 @@ extern int erts_no_poll_threads;
#include "erl_poll.h"
#include "erl_port_task.h"
-#ifdef __WIN32__
-/*
- * Current erts_poll implementation for Windows cannot handle
- * active events in the set of events polled.
- */
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#else
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#endif
-
typedef struct {
Eterm inport;
Eterm outport;
@@ -147,10 +140,6 @@ struct erts_nif_select_event {
Eterm pid;
Eterm immed;
Uint32 refn[ERTS_REF_NUMBERS];
- Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress
- * 1: Do deselect before next poll
- * >1: Countdown of ignored events
- */
};
typedef struct {
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index b4d1575ee5..51d50933ff 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -75,6 +75,7 @@
# define WANT_NONBLOCKING
#endif
+#include "erl_thr_progress.h"
#include "erl_poll.h"
#if ERTS_POLL_USE_KQUEUE
# include <sys/types.h>
@@ -95,7 +96,6 @@
# include <limits.h>
# endif
#endif
-#include "erl_thr_progress.h"
#include "erl_driver.h"
#include "erl_alloc.h"
#include "erl_msacc.h"
@@ -121,7 +121,8 @@
/* Define to print info about modifications done to each fd */
#define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__)
/* Define to print entry and exit from erts_poll_wait (can be very spammy) */
-//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0)
#else
#define ERTS_POLL_DEBUG_PRINT 0
@@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds,
#define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE)
-#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE)
+#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0)
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
#if ERTS_POLL_USE_KERNEL_POLL
int kp_fd;
+ int oneshot;
#endif /* ERTS_POLL_USE_KERNEL_POLL */
#if ERTS_POLL_USE_POLL
@@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
ErtsPollSetUpdateRequestsBlock *curr_upd_req_block;
erts_atomic32_t have_update_requests;
erts_mtx_t mtx;
- erts_atomic32_t wakeup_state;
+#else
+ int do_wakeup;
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
- int wake_fds[2];
+#if ERTS_POLL_USE_TIMERFD
+ int timer_fd;
#endif
+ ErtsMonotonicTime timeout_time;
+ erts_atomic32_t wakeup_state;
+ int wake_fds[2];
};
void erts_silence_warn_unused_result(long unused);
@@ -365,63 +371,47 @@ static void print_misc_debug_info(void);
uint32_t epoll_events(int kp_fd, int fd);
#endif
-
#define ERTS_POLL_NOT_WOKEN 0
#define ERTS_POLL_WOKEN -1
#define ERTS_POLL_WOKEN_INTR 1
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
static ERTS_INLINE void
reset_wakeup_state(ErtsPollSet *ps)
{
erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN);
}
-#endif
static ERTS_INLINE int
is_woken(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN;
-#else
- return 0;
-#endif
}
static ERTS_INLINE int
is_interrupted_reset(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
== ERTS_POLL_WOKEN_INTR);
-#else
- return 0;
-#endif
}
static ERTS_INLINE void
woke_up(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state);
if (wakeup_state == ERTS_POLL_NOT_WOKEN)
(void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state,
ERTS_POLL_WOKEN,
ERTS_POLL_NOT_WOKEN);
ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN);
-#endif
}
/*
* --- Wakeup pipe -----------------------------------------------------------
*/
-#if ERTS_POLL_USE_WAKEUP_PIPE
-
static ERTS_INLINE void
wake_poller(ErtsPollSet *ps, int interrupted)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
int wake;
erts_aint32_t wakeup_state;
if (!interrupted)
@@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted)
wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
if (wake)
-#endif
{
ssize_t res;
+ DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted);
if (ps->wake_fds[1] < 0)
return; /* Not initialized yet */
do {
@@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps)
fd,
erl_errno_id(errno), errno);
}
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
if (intr)
erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
-#endif
}
static void
@@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps)
ps->wake_fds[1] = wake_fds[1];
}
+/*
+ * --- timer fd -----------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_TIMERFD
+
+/* We use the timerfd when using epoll_wait to get high accuracy
+ timeouts, i.e. we want to sleep with < ms accuracy. */
+
+static void
+create_timerfd(ErtsPollSet *ps)
+{
+ int do_wake = 0;
+ int timer_fd = timerfd_create(CLOCK_MONOTONIC,0);
+ ERTS_POLL_EXPORT(erts_poll_control)(ps,
+ timer_fd,
+ ERTS_POLL_OP_ADD,
+ ERTS_POLL_EV_IN,
+ &do_wake);
+ if (ps->internal_fd_limit <= timer_fd)
+ ps->internal_fd_limit = timer_fd + 1;
+ ps->timer_fd = timer_fd;
+}
+
+static ERTS_INLINE void
+timerfd_set(ErtsPollSet *ps, struct itimerspec *its)
+{
+#ifdef DEBUG
+ struct itimerspec old_its;
+ int res;
+ res = timerfd_settime(ps->timer_fd, 0, its, &old_its);
+ ASSERT(res == 0);
+ ASSERT(old_its.it_interval.tv_sec == 0 &&
+ old_its.it_interval.tv_nsec == 0 &&
+ old_its.it_value.tv_sec == 0 &&
+ old_its.it_value.tv_nsec == 0);
+
+#else
+ timerfd_settime(ps->timer_fd, 0, its, NULL);
#endif
+}
+
+static ERTS_INLINE int
+timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) {
+
+ struct itimerspec its;
+ /* we always have to clear the timer */
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 0;
+ timerfd_settime(ps->timer_fd, 0, &its, NULL);
+
+ /* only timeout fd triggered */
+ if (res == 1 && pr[0].data.fd == ps->timer_fd)
+ return 0;
+
+ return res;
+}
+
+#endif /* ERTS_POLL_USE_TIMERFD */
/*
* --- Poll set update requests ----------------------------------------------
@@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
struct epoll_event epe_templ;
struct epoll_event epe;
- epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT;
+ epe_templ.events = ERTS_POLL_EV_E2N(events);
epe_templ.data.fd = fd;
+ if (ps->oneshot)
+ epe_templ.events |= EPOLLONESHOT;
+
#ifdef VALGRIND
/* Silence invalid valgrind warning ... */
memset((void *) &epe.data, 0, sizeof(epoll_data_t));
@@ -802,6 +853,7 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
int res = 0, len = 0;
struct kevent evts[2];
struct timespec ts = {0, 0};
+ uint32_t oneshot = 0;
if (op == ERTS_POLL_OP_ADD) {
/* This is a hack to make the "noshell" option work; kqueue can poll
@@ -840,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
man page), but it seems to be the way it works...
*/
+ if (ps->oneshot)
+ oneshot = EV_DISPATCH;
+
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
/* We could probably skip this delete, do we want to? */
@@ -849,27 +904,29 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
uint32_t flags;
erts_atomic_inc_nob(&ps->no_of_user_fds);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
} else {
uint32_t flags;
ASSERT(op == ERTS_POLL_OP_MOD);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
}
#else
- uint32_t flags = EV_ADD|EV_ONESHOT;
+ uint32_t flags = EV_ADD;
+
+ if (ps->oneshot) flags |= EV_ONESHOT;
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
@@ -903,14 +960,17 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd);
for (i = 0; i < len; i++) {
const char *flags = "UNKNOWN";
- if (evts[i].flags == EV_DELETE) flags = "EV_DELETE";
+ if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE";
if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT";
+ if (evts[i].flags == (EV_ADD)) flags = "EV_ADD";
#ifdef EV_DISPATCH
if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH";
if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE";
if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH";
- if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE";
+ if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
#endif
keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "",
@@ -1273,11 +1333,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op,
goto done;
}
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) {
new_events = ERTS_POLL_EV_NVAL;
goto done;
}
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd) {
+ new_events = ERTS_POLL_EV_NVAL;
+ goto done;
+ }
#endif
}
@@ -1333,11 +1397,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
ERTS_POLLSET_UNLOCK(ps);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (*do_wake) {
+ if (*do_wake)
wake_poller(ps, 0);
- }
-#endif
return res;
}
@@ -1351,52 +1412,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
static ERTS_INLINE int
ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE
int n = chk_fds_res < max_res ? chk_fds_res : max_res, i;
int res = n;
-#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fd = ps->wake_fds[0];
-#endif
- for (i = 0; i < n; i++) {
- int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
-#ifdef DEBUG_PRINT_MODE
- ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
-#endif
+ if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) {
+
+ for (i = 0; i < n; i++) {
+ int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
+#if ERTS_POLL_DEBUG_PRINT
+ ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
- DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
- ev2str(evts),
+ if (fd != wake_fd
+#if ERTS_POLL_USE_TIMERFD
+ && fd != ps->timer_fd
+#endif
+ )
+ DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
+ ev2str(evts),
#if ERTS_POLL_USE_KQUEUE
- "kqueue"
+ "kqueue"
#elif ERTS_POLL_USE_EPOLL
- "epoll"
+ "epoll"
#else
- "/dev/poll"
+ "/dev/poll"
+#endif
+ );
#endif
- );
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == wake_fd) {
- cleanup_wakeup_pipe(ps);
- ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
- if (n == 1)
- return 0;
- }
+ if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
+#if ERTS_POLL_USE_TIMERFD
+ else if (fd == ps->timer_fd) {
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
#endif
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- else {
- /* Reset the events to emulate ONESHOT semantics */
- ps->fds_status[fd].events = 0;
- enqueue_update_request(ps, fd);
- }
+ else {
+ /* Reset the events to emulate ONESHOT semantics */
+ ps->fds_status[fd].events = 0;
+ enqueue_update_request(ps, fd);
+ }
#endif
+ }
}
- return res;
-#else
- ASSERT(chk_fds_res <= max_res);
- return chk_fds_res;
-#endif
+ if (res == 0)
+ return res;
+ else
+ return n;
}
#else /* !ERTS_POLL_USE_KERNEL_POLL */
@@ -1577,19 +1647,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res,
#endif /* !ERTS_POLL_USE_KERNEL_POLL */
+static ERTS_INLINE ErtsMonotonicTime
+get_timeout(ErtsPollSet *ps,
+ int resolution,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout;
+
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT) {
+ timeout = 0;
+ }
+ else if (timeout_time == ERTS_POLL_INF_TIMEOUT) {
+ timeout = -1;
+ }
+ else {
+ ErtsMonotonicTime diff_time, current_time;
+ current_time = erts_get_monotonic_time(NULL);
+ diff_time = timeout_time - current_time;
+ if (diff_time <= 0) {
+ timeout = 0;
+ }
+ else {
+ switch (resolution) {
+ case 1000:
+ /* Round up to nearest even milli second */
+ timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1;
+ if (timeout > (ErtsMonotonicTime) INT_MAX)
+ timeout = (ErtsMonotonicTime) INT_MAX;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000);
+ break;
+ case 1000000:
+ /* Round up to nearest even micro second */
+ timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000);
+ break;
+ case 1000000000:
+ /* Round up to nearest even nano second */
+ timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000);
+ break;
+ default:
+ ERTS_INTERNAL_ERROR("Invalid resolution");
+ timeout = 0;
+ break;
+ }
+ }
+ }
+ return timeout;
+}
+
+#if ERTS_POLL_USE_SELECT
+
+static ERTS_INLINE int
+get_timeout_timeval(ErtsPollSet *ps,
+ SysTimeval *tvp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tvp->tv_sec = 0;
+ tvp->tv_usec = 0;
+
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000);
+ tvp->tv_sec = sec;
+ tvp->tv_usec = timeout - sec*(1000*1000);
+
+ ASSERT(tvp->tv_sec >= 0);
+ ASSERT(tvp->tv_usec >= 0);
+ ASSERT(tvp->tv_usec < 1000*1000);
+
+ return 1;
+ }
+
+}
+
+#endif
+
+#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_timespec(ErtsPollSet *ps,
+ struct timespec *tsp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000*1000);
+ tsp->tv_sec = sec;
+ tsp->tv_nsec = timeout - sec*(1000*1000*1000);
+
+ ASSERT(tsp->tv_sec >= 0);
+ ASSERT(tsp->tv_nsec >= 0);
+ ASSERT(tsp->tv_nsec < 1000*1000*1000);
+
+ return 1;
+ }
+}
+
+#endif
+
+#if ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_itimerspec(ErtsPollSet *ps,
+ struct itimerspec *itsp,
+ ErtsMonotonicTime timeout_time)
+{
+
+ itsp->it_interval.tv_sec = 0;
+ itsp->it_interval.tv_nsec = 0;
+
+ return get_timeout_timespec(ps, &itsp->it_value, timeout_time);
+}
+
+#endif
+
static ERTS_INLINE int
-check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
+check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time)
{
int res;
- int timeout = do_wait ? -1 : 0;
- DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait);
+ int timeout;
+ DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time);
{
#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+#if ERTS_POLL_USE_TIMERFD
+ struct itimerspec its;
+ timeout = get_timeout_itimerspec(ps, &its, timeout_time);
+ if (timeout > 0) {
+ timerfd_set(ps, &its);
+ res = epoll_wait(ps->kp_fd, pr, max_res, -1);
+ res = timerfd_clear(ps, pr, res, max_res);
+ } else {
+ res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
+ }
+#else /* !ERTS_POLL_USE_TIMERFD */
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
-
+#endif /* !ERTS_POLL_USE_TIMERFD */
#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
- struct timespec ts = {0, 0};
- struct timespec *tsp = timeout ? NULL : &ts;
+ struct timespec ts;
+ struct timespec *tsp;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ tsp = timeout < 0 ? NULL : &ts;
res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp);
#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
/*
@@ -1601,16 +1820,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */;
poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
poll_res.dp_fds = pr;
- poll_res.dp_timeout = timeout;
+ poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time);
res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
-
+#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */
+ struct timespec ts;
+ struct timespec *tsp = &ts;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout < 0) tsp = NULL;
+ res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL);
#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */
-
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = poll(ps->poll_fds, ps->no_poll_fds, timeout);
-
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
- SysTimeval tv = {0, 0};
- SysTimeval *tvp = timeout ? NULL : &tv;
+ SysTimeval tv;
+ SysTimeval *tvp;
+ timeout = get_timeout_timeval(ps, &tv, timeout_time);
+ tvp = timeout < 0 ? NULL : &tv;
ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
@@ -1629,7 +1854,9 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
int
ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd pr[],
- int *len)
+ int *len,
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time)
{
int res, no_fds, used_fds = 0;
int ebadf = 0;
@@ -1654,61 +1881,65 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
}
#endif
- do_wait = !is_woken(ps) && used_fds == 0;
+ do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT;
DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait);
if (do_wait) {
- erts_thr_progress_prepare_wait(NULL);
+ tpd = tpd ? tpd : erts_thr_prgr_data(NULL);
+ erts_thr_progress_prepare_wait(tpd);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
- }
+ } else
+ timeout_time = ERTS_POLL_NO_TIMEOUT;
while (1) {
- res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds);
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time);
+ if (res != 0)
+ break;
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT)
+ break;
+ if (erts_get_monotonic_time(NULL) >= timeout_time)
+ break;
+ }
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (res < 0
- && errno == EBADF
- && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
- /*
- * This may have happened because another thread deselected
- * a fd in our poll set and then closed it, i.e. the driver
- * behaved correctly. We wan't to avoid looking for a bad
- * fd, that may even not exist anymore. Therefore, handle
- * update requests and try again. This behaviour should only
- * happen when using SELECT as the polling mechanism.
- */
- ERTS_POLLSET_LOCK(ps);
- used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
- if (used_fds == no_fds) {
- *len = used_fds;
- ERTS_POLLSET_UNLOCK(ps);
- return 0;
- }
- res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds);
- /* Keep the lock over the non-blocking poll in order to not
- get any nasty races happening. */
+ if (res < 0
+ && errno == EBADF
+ && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ /*
+ * This may have happened because another thread deselected
+ * a fd in our poll set and then closed it, i.e. the driver
+ * behaved correctly. We wan't to avoid looking for a bad
+ * fd, that may even not exist anymore. Therefore, handle
+ * update requests and try again. This behaviour should only
+ * happen when using SELECT as the polling mechanism.
+ */
+ ERTS_POLLSET_LOCK(ps);
+ used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
+ if (used_fds == no_fds) {
+ *len = used_fds;
ERTS_POLLSET_UNLOCK(ps);
- if (res == 0) {
- errno = EAGAIN;
- res = -1;
- }
+ return 0;
+ }
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT);
+ /* Keep the lock over the non-blocking poll in order to not
+ get any nasty races happening. */
+ ERTS_POLLSET_UNLOCK(ps);
+ if (res == 0) {
+ errno = EAGAIN;
+ res = -1;
}
-#endif
-
- if (res != 0)
- break;
- if (!do_wait)
- break;
}
+#endif
if (do_wait) {
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
ERTS_MSACC_UPDATE_CACHE();
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO);
}
- woke_up(ps);
+ if (ERTS_POLL_USE_WAKEUP(ps))
+ woke_up(ps);
if (res < 0) {
#if ERTS_POLL_USE_SELECT
@@ -1719,11 +1950,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
#endif
res = errno;
}
- else {
+ else if (res == 0) {
+ res = used_fds == 0 ? ETIMEDOUT : 0;
+#ifdef HARD_DEBUG
+ check_poll_result(pr, used_fds);
+#endif
+ *len = used_fds;
+ } else {
#if ERTS_POLL_USE_SELECT
save_results:
#endif
-
ps_locked = 1;
ERTS_POLLSET_LOCK(ps);
@@ -1753,12 +1989,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
void
ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (!set)
- reset_wakeup_state(ps);
- else
- wake_poller(ps, 1);
-#endif
+ DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set);
+ if (ERTS_POLL_USE_WAKEUP(ps)) {
+ if (!set)
+ reset_wakeup_state(ps);
+ else
+ wake_poller(ps, 1);
+ }
}
int
@@ -1874,10 +2111,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id)
if (ps->internal_fd_limit <= kp_fd)
ps->internal_fd_limit = kp_fd + 1;
ps->kp_fd = kp_fd;
+ if (ps->id == -1)
+ ps->oneshot = 0;
+ else
+ ps->oneshot = 1;
#endif
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+
erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0);
create_wakeup_pipe(ps);
+
+#if ERTS_POLL_USE_TIMERFD
+ create_timerfd(ps);
+#endif
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
handle_update_requests(ps, NULL, 0);
cleanup_wakeup_pipe(ps);
#endif
@@ -1992,9 +2239,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip)
pip->memory_size = size;
pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
pip->poll_set_size++; /* Wakeup pipe */
-#endif
pip->lazy_updates =
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -2177,6 +2422,12 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps,
ASSERT(0);
return;
}
+ if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1])
+ continue;
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd)
+ continue;
+#endif
data &= 0xFFFFFFFF;
ASSERT(fd == data);
/* Events are the events that are being monitored, which of course include
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
index e1cea7eb8b..d40dabc529 100644
--- a/erts/emulator/sys/common/erl_poll.h
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -51,6 +51,7 @@
#include "sys.h"
#define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN
+#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX
#ifdef ERTS_ENABLE_KERNEL_POLL
# undef ERTS_ENABLE_KERNEL_POLL
@@ -130,6 +131,9 @@
#endif
#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10
+#define ERTS_POLL_USE_TIMERFD 0
typedef Uint32 ErtsPollEvents;
@@ -156,6 +160,14 @@ typedef enum {
#include <sys/epoll.h>
+#if ERTS_POLL_USE_EPOLL
+#ifdef HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#undef ERTS_POLL_USE_TIMERFD
+#define ERTS_POLL_USE_TIMERFD 1
+#endif
+#endif
+
#define ERTS_POLL_EV_E2N(EV) \
((uint32_t) (EV))
#define ERTS_POLL_EV_N2E(EV) \
@@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd {
#endif
-#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))
+#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)))
#define ev2str(ev) \
(((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \
diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h
index 1170a549b9..f3a91e54f7 100644
--- a/erts/emulator/sys/common/erl_poll_api.h
+++ b/erts/emulator/sys/common/erl_poll_api.h
@@ -72,11 +72,15 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
* @param res an array of fd results that the ready fds are put in.
* @param[in] length the length of the res array
* @param[out] length the number of ready events returned in res
+ * @param tpd the thread progress data to note sleep state in
+ * @param timeout_time the time in native to wake up at
* @return 0 on success, else the ERRNO of the error that happened.
*/
int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd res[],
- int *length);
+ int *length,
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time);
/**
* Interrupt the thread waiting in the pollset. This function should be called
* with set = 0 before any thread calls erts_poll_wait in order to clear any
diff --git a/erts/emulator/sys/unix/sys_uds.c b/erts/emulator/sys/unix/sys_uds.c
index 39a4866065..c9f73622ba 100644
--- a/erts/emulator/sys/unix/sys_uds.c
+++ b/erts/emulator/sys/unix/sys_uds.c
@@ -88,8 +88,9 @@ sys_uds_readv(int fd, struct iovec *iov, size_t iov_len,
if((msg.msg_flags & MSG_CTRUNC) == MSG_CTRUNC)
{
/* We assume that we have given enough space for any header
- that are sent to us. So the only remaining reason to get
- this flag set is if the caller has run out of file descriptors.
+ that are sent to us. So the only remaining reasons to get
+ this flag set is if the caller has run out of file descriptors
+ or an SELinux policy prunes the response (eg. O_APPEND on STDERR).
*/
errno = EMFILE;
return -1;
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 39bb4d515e..3843a27a6e 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -1017,10 +1017,12 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps,
int erts_poll_wait(ErtsPollSet *ps,
ErtsPollResFd pr[],
- int *len)
+ int *len,
+ ErtsThrPrgrData *tpd,
+ Sint64 timeout_in)
{
int no_fds;
- DWORD timeout = INFINITE;
+ DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in;
EventData* ev;
int res = 0;
int num = 0;
@@ -1056,10 +1058,10 @@ int erts_poll_wait(ErtsPollSet *ps,
HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout));
ERTS_POLLSET_UNLOCK(ps);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(tpd);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
handle = WaitForMultipleObjects(num_h, harr, FALSE, timeout);
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
ERTS_MSACC_POP_STATE();
ERTS_POLLSET_LOCK(ps);
HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout));
diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile
index bf00de2204..6a064ec8d4 100644
--- a/erts/emulator/test/Makefile
+++ b/erts/emulator/test/Makefile
@@ -33,6 +33,7 @@ MODULES= \
after_SUITE \
alloc_SUITE \
async_ports_SUITE \
+ atomics_SUITE \
beam_SUITE \
beam_literals_SUITE \
bif_SUITE \
@@ -50,6 +51,7 @@ MODULES= \
call_trace_SUITE \
code_SUITE \
code_parallel_load_SUITE \
+ counters_SUITE \
crypto_SUITE \
ddll_SUITE \
decode_packet_SUITE \
@@ -92,6 +94,7 @@ MODULES= \
port_SUITE \
port_bif_SUITE \
prim_eval_SUITE \
+ persistent_term_SUITE \
process_SUITE \
pseudoknot_SUITE \
receive_SUITE \
diff --git a/erts/emulator/test/atomics_SUITE.erl b/erts/emulator/test/atomics_SUITE.erl
new file mode 100644
index 0000000000..a5407c42ee
--- /dev/null
+++ b/erts/emulator/test/atomics_SUITE.erl
@@ -0,0 +1,150 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(atomics_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-compile(export_all).
+
+suite() -> [{ct_hooks,[ts_install_cth]}].
+
+all() ->
+ [signed, unsigned, bad, signed_limits, unsigned_limits].
+
+signed(Config) when is_list(Config) ->
+ Size = 10,
+ Ref = atomics:new(Size,[]),
+ #{size:=Size, memory:=Memory} = atomics:info(Ref),
+ {_,true} = {Memory, Memory > Size*8},
+ {_,true} = {Memory, Memory < Size*max_atomic_sz() + 100},
+ [signed_do(Ref, Ix) || Ix <- lists:seq(1, Size)],
+ ok.
+
+signed_do(Ref, Ix) ->
+ 0 = atomics:get(Ref, Ix),
+ ok = atomics:put(Ref, Ix, 3),
+ ok = atomics:add(Ref, Ix, 14),
+ 17 = atomics:get(Ref, Ix),
+ 20 = atomics:add_get(Ref, Ix, 3),
+ -3 = atomics:add_get(Ref, Ix, -23),
+ 17 = atomics:add_get(Ref, Ix, 20),
+ ok = atomics:sub(Ref, Ix, 4),
+ 13 = atomics:get(Ref, Ix),
+ -7 = atomics:sub_get(Ref, Ix, 20),
+ 3 = atomics:sub_get(Ref, Ix, -10),
+ 3 = atomics:exchange(Ref, Ix, 666),
+ ok = atomics:compare_exchange(Ref, Ix, 666, 777),
+ 777 = atomics:compare_exchange(Ref, Ix, 666, -666),
+ ok.
+
+unsigned(Config) when is_list(Config) ->
+ Size = 10,
+ Ref = atomics:new(Size,[{signed, false}]),
+ #{size:=Size, memory:=Memory} = atomics:info(Ref),
+ true = Memory > Size*8,
+ true = Memory < Size*max_atomic_sz() + 100,
+ [unsigned_do(Ref, Ix) || Ix <- lists:seq(1, Size)],
+ ok.
+
+unsigned_do(Ref, Ix) ->
+ 0 = atomics:get(Ref, Ix),
+ ok = atomics:put(Ref, Ix, 3),
+ ok = atomics:add(Ref, Ix, 14),
+ 17 = atomics:get(Ref, Ix),
+ 20 = atomics:add_get(Ref, Ix, 3),
+ ok = atomics:sub(Ref, Ix, 7),
+ 13 = atomics:get(Ref, Ix),
+ 3 = atomics:sub_get(Ref, Ix, 10),
+ 3 = atomics:exchange(Ref, Ix, 666),
+ ok = atomics:compare_exchange(Ref, Ix, 666, 777),
+ 777 = atomics:compare_exchange(Ref, Ix, 666, 888),
+ ok.
+
+bad(Config) when is_list(Config) ->
+ {'EXIT',{badarg,_}} = (catch atomics:new(0,[])),
+ {'EXIT',{badarg,_}} = (catch atomics:new(10,[bad])),
+ {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,bad}])),
+ {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,true}, bad])),
+ {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,false} | bad])),
+ Ref = atomics:new(10,[]),
+ {'EXIT',{badarg,_}} = (catch atomics:get(1742, 7)),
+ {'EXIT',{badarg,_}} = (catch atomics:get(make_ref(), 7)),
+ {'EXIT',{badarg,_}} = (catch atomics:get(Ref, -1)),
+ {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 0)),
+ {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 11)),
+ {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 7.0)),
+ ok.
+
+
+signed_limits(Config) when is_list(Config) ->
+ Bits = 64,
+ Max = (1 bsl (Bits-1)) - 1,
+ Min = -(1 bsl (Bits-1)),
+
+ Ref = atomics:new(1,[{signed, true}]),
+ #{max:=Max, min:=Min} = atomics:info(Ref),
+ 0 = atomics:get(Ref, 1),
+ ok = atomics:add(Ref, 1, Max),
+ Min = atomics:add_get(Ref, 1, 1),
+ Max = atomics:sub_get(Ref, 1, 1),
+
+ IncrMax = (Max bsl 1) bor 1,
+ ok = atomics:put(Ref, 1, 0),
+ ok = atomics:add(Ref, 1, IncrMax),
+ -1 = atomics:get(Ref, 1),
+ {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, IncrMax+1)),
+ {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, Min-1)),
+
+ ok.
+
+unsigned_limits(Config) when is_list(Config) ->
+ Bits = 64,
+ Max = (1 bsl Bits) - 1,
+ Min = 0,
+
+ Ref = atomics:new(1,[{signed,false}]),
+ #{max:=Max, min:=Min} = atomics:info(Ref),
+ 0 = atomics:get(Ref, 1),
+ ok = atomics:add(Ref, 1, Max),
+ Min = atomics:add_get(Ref, 1, 1),
+ Max = atomics:sub_get(Ref, 1, 1),
+
+ atomics:put(Ref, 1, Max),
+ io:format("Max=~p~n", [atomics:get(Ref, 1)]),
+
+ {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, Max+1)),
+ IncrMin = -(1 bsl (Bits-1)),
+ ok = atomics:put(Ref, 1, -IncrMin),
+ ok = atomics:add(Ref, 1, IncrMin),
+ 0 = atomics:get(Ref, 1),
+ {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, IncrMin-1)),
+
+ ok.
+
+max_atomic_sz() ->
+ case erlang:system_info({wordsize, external}) of
+ 4 -> 16;
+ 8 ->
+ EI = erlang:system_info(ethread_info),
+ case lists:keyfind("64-bit native atomics", 1, EI) of
+ {_, "no", _} -> 16;
+ _ -> 8
+ end
+ end.
diff --git a/erts/emulator/test/big_SUITE.erl b/erts/emulator/test/big_SUITE.erl
index 0a42b09903..3b9b9e5989 100644
--- a/erts/emulator/test/big_SUITE.erl
+++ b/erts/emulator/test/big_SUITE.erl
@@ -24,7 +24,7 @@
-export([t_div/1, eq_28/1, eq_32/1, eq_big/1, eq_math/1, big_literals/1,
borders/1, negative/1, big_float_1/1, big_float_2/1,
- bxor_2pow/1,
+ bxor_2pow/1, band_2pow/1,
shift_limit_1/1, powmod/1, system_limit/1, toobig/1, otp_6692/1]).
%% Internal exports.
@@ -43,7 +43,7 @@ suite() ->
all() ->
[t_div, eq_28, eq_32, eq_big, eq_math, big_literals,
borders, negative, {group, big_float}, shift_limit_1,
- bxor_2pow,
+ bxor_2pow, band_2pow,
powmod, system_limit, toobig, otp_6692].
groups() ->
@@ -168,7 +168,11 @@ eval({op,_,Op,A0,B0}, LFH) ->
Res = eval_op(Op, A, B),
erlang:garbage_collect(),
Res;
-eval({integer,_,I}, _) -> I;
+eval({integer,_,I}, _) ->
+ %% "Parasitic" ("symbiotic"?) test of squaring all numbers
+ %% found in the test data.
+ test_squaring(I),
+ I;
eval({call,_,{atom,_,Local},Args0}, LFH) ->
Args = eval_list(Args0, LFH),
LFH(Local, Args).
@@ -192,6 +196,18 @@ eval_op('bxor', A, B) -> A bxor B;
eval_op('bsl', A, B) -> A bsl B;
eval_op('bsr', A, B) -> A bsr B.
+test_squaring(I) ->
+ %% Multiplying an integer by itself is specially optimized, so we
+ %% should take special care to test squaring. The optimization
+ %% will kick in when the two operands have the same address.
+ Sqr = I * I,
+
+ %% This expression will be multiplied in the usual way, because
+ %% the the two operands for '*' are stored at different addresses.
+ Sqr = I * ((I + id(1)) - id(1)),
+
+ ok.
+
%% Built in test functions
fac(0) -> 1;
@@ -456,3 +472,38 @@ my_bxor(A, B, N, Acc0) ->
false -> Acc0 bor (1 bsl N)
end,
my_bxor(A bsr 1, B bsr 1, N+1, Acc1).
+
+
+%% ERL-804
+band_2pow(_Config) ->
+ IL = lists:seq(8*3, 8*16, 4),
+ JL = lists:seq(0, 64),
+ [band_2pow_1((1 bsl I), (1 bsl J))
+ || I <- IL, J <- JL],
+ ok.
+
+band_2pow_1(A, B) ->
+ for(-1,1, fun(Ad) ->
+ for(-1,1, fun(Bd) ->
+ band_2pow_2(A+Ad, B+Bd),
+ band_2pow_2(-A+Ad, B+Bd),
+ band_2pow_2(A+Ad, -B+Bd),
+ band_2pow_2(-A+Ad, -B+Bd)
+ end)
+ end).
+
+band_2pow_2(A, B) ->
+ Correct = my_band(A, B),
+ case A band B of
+ Correct -> ok;
+ Wrong ->
+ io:format("~.16# band ~.16#\n", [A,B]),
+ io:format("Expected ~.16#\n", [Correct]),
+ io:format("Got ~.16#\n", [Wrong]),
+ ct:fail({failed, 'band'})
+
+ end.
+
+%% Implement band without band
+my_band(A, B) ->
+ bnot ((bnot A) bor (bnot B)).
diff --git a/erts/emulator/test/code_SUITE.erl b/erts/emulator/test/code_SUITE.erl
index 9c6dc3ff83..0444ba4f89 100644
--- a/erts/emulator/test/code_SUITE.erl
+++ b/erts/emulator/test/code_SUITE.erl
@@ -28,7 +28,7 @@
fake_literals/1,
false_dependency/1,coverage/1,fun_confusion/1,
t_copy_literals/1, t_copy_literals_frags/1,
- erl_544/1]).
+ erl_544/1, max_heap_size/1]).
-define(line_trace, 1).
-include_lib("common_test/include/ct.hrl").
@@ -43,7 +43,7 @@ all() ->
constant_pools, constant_refc_binaries, fake_literals,
false_dependency,
coverage, fun_confusion, t_copy_literals, t_copy_literals_frags,
- erl_544].
+ erl_544, max_heap_size].
init_per_suite(Config) ->
erts_debug:set_internal_state(available_internal_state, true),
@@ -968,6 +968,39 @@ erl_544(Config) when is_list(Config) ->
{skipped, "Only run when native file name encoding is utf8"}
end.
+%% Test that the copying of literals to a process during purging of
+%% literals will cause the process to be killed if the max heap size
+%% is exceeded.
+max_heap_size(_Config) ->
+ Mod = ?FUNCTION_NAME,
+ Value = [I || I <- lists:seq(1, 5000)],
+ Code = gen_lit(Mod, [{term,Value}]),
+ {module,Mod} = erlang:load_module(Mod, Code),
+ SpawnOpts = [monitor,
+ {max_heap_size,
+ #{size=>1024,
+ kill=>true,
+ error_logger=>true}}],
+ {Pid,Ref} = spawn_opt(fun() ->
+ max_heap_size_proc(Mod)
+ end, SpawnOpts),
+ receive
+ {'DOWN',Ref,process,Pid,Reason} ->
+ killed = Reason;
+ Other ->
+ ct:fail({unexpected_message,Other})
+ after 10000 ->
+ ct:fail({process_did_not_die, Pid, erlang:process_info(Pid)})
+ end.
+
+max_heap_size_proc(Mod) ->
+ Value = Mod:term(),
+ code:delete(Mod),
+ code:purge(Mod),
+ receive
+ _ -> Value
+ end.
+
%% Utilities.
make_sub_binary(Bin) when is_binary(Bin) ->
diff --git a/erts/emulator/test/counters_SUITE.erl b/erts/emulator/test/counters_SUITE.erl
new file mode 100644
index 0000000000..b3f0358c1e
--- /dev/null
+++ b/erts/emulator/test/counters_SUITE.erl
@@ -0,0 +1,234 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(counters_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([suite/0, all/0]).
+-export([basic/1, bad/1, limits/1, indep/1, write_concurrency/1]).
+
+suite() -> [{ct_hooks,[ts_install_cth]}].
+
+all() ->
+ [basic, bad, limits, indep, write_concurrency].
+
+basic(Config) when is_list(Config) ->
+ Size = 10,
+ [begin
+ Ref = counters:new(Size,[Type]),
+ #{size:=Size, memory:=Memory} = counters:info(Ref),
+ check_memory(Type, Memory, Size),
+ [basic_do(Ref, Ix) || Ix <- lists:seq(1, Size)]
+ end
+ || Type <- [atomics, write_concurrency]],
+ ok.
+
+basic_do(Ref, Ix) ->
+ 0 = counters:get(Ref, Ix),
+ ok = counters:add(Ref, Ix, 3),
+ 3 = counters:get(Ref, Ix),
+ ok = counters:add(Ref, Ix, 14),
+ 17 = counters:get(Ref, Ix),
+ ok = counters:add(Ref, Ix, -20),
+ -3 = counters:get(Ref, Ix),
+ ok = counters:add(Ref, Ix, 100),
+ 97 = counters:get(Ref, Ix),
+ ok = counters:sub(Ref, Ix, 20),
+ 77 = counters:get(Ref, Ix),
+ ok = counters:sub(Ref, Ix, -10),
+ 87 = counters:get(Ref, Ix),
+ ok = counters:put(Ref, Ix, 0),
+ 0 = counters:get(Ref, Ix),
+ ok = counters:put(Ref, Ix, 123),
+ 123 = counters:get(Ref, Ix),
+ ok = counters:put(Ref, Ix, -321),
+ -321 = counters:get(Ref, Ix),
+ ok.
+
+check_memory(atomics, Memory, Size) ->
+ {_,true} = {Memory, Memory > Size*8},
+ {_,true} = {Memory, Memory < Size*max_atomic_sz() + 100};
+check_memory(write_concurrency, Memory, Size) ->
+ NWords = erlang:system_info(schedulers) + 1,
+ {_,true} = {Memory, Memory > NWords*Size*8},
+ {_,true} = {Memory, Memory < NWords*(Size+7)*max_atomic_sz() + 100}.
+
+max_atomic_sz() ->
+ case erlang:system_info({wordsize, external}) of
+ 4 -> 16;
+ 8 ->
+ EI = erlang:system_info(ethread_info),
+ case lists:keyfind("64-bit native atomics", 1, EI) of
+ {_, "no", _} -> 16;
+ _ -> 8
+ end
+ end.
+
+bad(Config) when is_list(Config) ->
+ {'EXIT',{badarg,_}} = (catch counters:new(0,[])),
+ {'EXIT',{badarg,_}} = (catch counters:new(10,[bad])),
+ {'EXIT',{badarg,_}} = (catch counters:new(10,[atomic, bad])),
+ {'EXIT',{badarg,_}} = (catch counters:new(10,[write_concurrency | bad])),
+ Ref = counters:new(10,[]),
+ {'EXIT',{badarg,_}} = (catch counters:get(1742, 7)),
+ {'EXIT',{badarg,_}} = (catch counters:get(make_ref(), 7)),
+ {'EXIT',{badarg,_}} = (catch counters:get(Ref, -1)),
+ {'EXIT',{badarg,_}} = (catch counters:get(Ref, 0)),
+ {'EXIT',{badarg,_}} = (catch counters:get(Ref, 11)),
+ {'EXIT',{badarg,_}} = (catch counters:get(Ref, 7.0)),
+ ok.
+
+
+limits(Config) when is_list(Config) ->
+ limits_do(counters:new(1,[atomics])),
+ limits_do(counters:new(1,[write_concurrency])),
+ ok.
+
+limits_do(Ref) ->
+ Bits = 64,
+ Max = (1 bsl (Bits-1)) - 1,
+ Min = -(1 bsl (Bits-1)),
+
+ 0 = counters:get(Ref, 1),
+ ok = counters:put(Ref, 1, Max),
+ Max = counters:get(Ref, 1),
+ ok = counters:add(Ref, 1, 1),
+ Min = counters:get(Ref, 1),
+ ok = counters:sub(Ref, 1, 1),
+ Max = counters:get(Ref, 1),
+ ok = counters:put(Ref, 1, Min),
+ Min = counters:get(Ref, 1),
+
+ IncrMax = (Max bsl 1) bor 1,
+ ok = counters:put(Ref, 1, 0),
+ ok = counters:add(Ref, 1, IncrMax),
+ -1 = counters:get(Ref, 1),
+ {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, IncrMax+1)),
+ {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)),
+ {'EXIT',{badarg,_}} = (catch counters:put(Ref, 1, Max+1)),
+ {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)),
+ ok.
+
+
+%% Verify that independent workers, using different counters
+%% within the same array, do not interfere with each other.
+indep(Config) when is_list(Config) ->
+ NScheds = erlang:system_info(schedulers),
+ Ref = counters:new(NScheds,[write_concurrency]),
+ Rounds = 100,
+ Papa = self(),
+ Pids = [spawn_opt(fun () ->
+ Val = I*197,
+ counters:put(Ref, I, Val),
+ indep_looper(Rounds, Ref, I, Val),
+ Papa ! {self(), done}
+ end,
+ [link, {scheduler, I}])
+ || I <- lists:seq(1, NScheds)],
+ [receive {P,done} -> ok end || P <- Pids],
+ ok.
+
+indep_looper(0, _, _ , _) ->
+ ok;
+indep_looper(N, Ref, I, Val0) ->
+ %%io:format("Val0 = ~p\n", [Val0]),
+ Val0 = counters:get(Ref, I),
+ Val1 = indep_adder(Ref, I, Val0),
+ indep_subber(Ref, I, Val1),
+ Val2 = N*7 + I,
+ counters:put(Ref, I, Val2),
+ indep_looper(N-1, Ref, I, Val2).
+
+indep_adder(Ref, I, Val) when Val < (1 bsl 62) ->
+ %%io:format("adder Val = ~p\n", [Val]),
+ Incr = abs(Val div 2) + I + 984735,
+ counters:add(Ref, I, Incr),
+ Res = Val + Incr,
+ Res = counters:get(Ref, I),
+ indep_adder(Ref, I, Res);
+indep_adder(_Ref, _I, Val) ->
+ Val.
+
+indep_subber(Ref, I, Val) when Val > -(1 bsl 62) ->
+ %%io:format("subber Val = ~p\n", [Val]),
+ Decr = (abs(Val div 2) + I + 725634),
+ counters:sub(Ref, I, Decr),
+ Res = Val - Decr,
+ Res = counters:get(Ref, I),
+ indep_subber(Ref, I, Res);
+indep_subber(_Ref, _I, Val) ->
+ Val.
+
+
+
+%% Verify write_concurrency yields correct results.
+write_concurrency(Config) when is_list(Config) ->
+ rand:seed(exs1024s),
+ io:format("*** SEED: ~p ***\n", [rand:export_seed()]),
+ NScheds = erlang:system_info(schedulers),
+ Size = 100,
+ Ref = counters:new(Size,[write_concurrency]),
+ Rounds = 1000,
+ Papa = self(),
+ Pids = [spawn_opt(fun Worker() ->
+ receive
+ {go, Ix, Incr} ->
+ wc_looper(Rounds, Ref, Ix, Incr),
+ Papa ! {self(), done, Rounds*Incr},
+ Worker();
+ stop ->
+ ok
+ end
+ end,
+ [link, {scheduler, N}])
+ || N <- lists:seq(1, NScheds)],
+ [begin
+ Base = rand_log64(),
+ counters:put(Ref, Index, Base),
+ SendList = [{P,{go, Index, rand_log64()}} || P <- Pids],
+ [P ! Msg || {P,Msg} <- SendList],
+ Added = lists:sum([receive {P,done,Contrib} -> Contrib end || P <- Pids]),
+ Result = mask_sint64(Base+Added),
+ {_,Result} = {Result, counters:get(Ref, Index)}
+ end
+ || Index <- lists:seq(1, Size)],
+
+ [begin unlink(P), P ! stop end || P <- Pids],
+ ok.
+
+wc_looper(0, _, _, _) ->
+ ok;
+wc_looper(N, Ref, Ix, Incr) ->
+ counters:add(Ref, Ix, Incr),
+ wc_looper(N-1, Ref, Ix, Incr).
+
+mask_sint64(X) ->
+ SMask = 1 bsl 63,
+ UMask = SMask - 1,
+ (X band UMask) - (X band SMask).
+
+%% A random signed 64-bit integer
+%% with a uniformly distributed number of significant bits.
+rand_log64() ->
+ Uint = round(math:pow(2, rand:uniform()*63)),
+ case rand:uniform(2) of
+ 1 -> -Uint;
+ 2 -> Uint
+ end.
diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl
index 6f5d639d04..bd62708aa7 100644
--- a/erts/emulator/test/driver_SUITE.erl
+++ b/erts/emulator/test/driver_SUITE.erl
@@ -1754,7 +1754,7 @@ smp_select0(Config) ->
ProcFun = fun()-> io:format("Worker ~p starting\n",[self()]),
Port = open_port({spawn, DrvName}, []),
smp_select_loop(Port, 100000),
- sleep(1000), % wait for driver to handle pending events
+ smp_select_done(Port),
true = erlang:port_close(Port),
Master ! {ok,self()},
io:format("Worker ~p finished\n",[self()])
@@ -1784,6 +1784,21 @@ smp_select_loop(Port, N) ->
smp_select_loop(Port, N-1)
end.
+smp_select_done(Port) ->
+ case erlang:port_control(Port, ?CHKIO_SMP_SELECT, "done") of
+ "wait" ->
+ receive
+ {Port, done} ->
+ ok
+ after 10*1000 ->
+ %% Seems we have a lost ready_input event.
+ %% Go ahead anyway, port will crash VM when closed.
+ ok
+ end;
+
+ "ok" -> ok
+ end.
+
smp_select_wait([], _) ->
ok;
smp_select_wait(Pids, TimeoutMsg) ->
diff --git a/erts/emulator/test/driver_SUITE_data/chkio_drv.c b/erts/emulator/test/driver_SUITE_data/chkio_drv.c
index ee8f28e8b1..b9ee155b4b 100644
--- a/erts/emulator/test/driver_SUITE_data/chkio_drv.c
+++ b/erts/emulator/test/driver_SUITE_data/chkio_drv.c
@@ -90,7 +90,7 @@ typedef struct chkio_smp_select {
int next_read;
int next_write;
int first_write;
- enum {Closed, Opened, Selected, Waiting} state;
+ enum {Closed, Opened, Selected, Waiting, WaitingDone} state;
int wasSelected;
unsigned rand_state;
}ChkioSmpSelect;
@@ -292,18 +292,20 @@ stop_steal_aux(ChkioDrvData *cddp)
static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port)
{
switch (pip->state) {
+ case WaitingDone:
case Waiting: {
int word;
- fprintf(stderr, "Closing pipe in state Waiting. Event lost?\n");
+ fprintf(stderr, "Closing pipe in state Waiting*. Event lost?\r\n");
for (;;) {
int bytes = read(pip->read_fd, &word, sizeof(word));
if (bytes != sizeof(word)) {
if (bytes != 0) {
- fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\n", bytes, errno);
+ fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\r\n",
+ bytes, errno);
}
break;
}
- fprintf(stderr, "Read from pipe: %d\n", word);
+ fprintf(stderr, "Read from pipe: %d\r\n", word);
}
abort();
}
@@ -318,6 +320,8 @@ static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port)
close(pip->write_fd);
pip->state = Closed;
break;
+ case Closed:
+ break;
}
driver_free(pip);
}
@@ -383,6 +387,9 @@ chkio_drv_start(ErlDrvPort port, char *command)
cddp->id = driver_mk_port(port);
cddp->test = CHKIO_STOP;
cddp->test_data = NULL;
+
+ drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */
+
return (ErlDrvData) cddp;
#endif
}
@@ -526,7 +533,7 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event)
printf("Read event on uninitiated pipe %d\n", fd);
abort();
}
- if (pip->state != Selected && pip->state != Waiting) {
+ if (pip->state != Selected && pip->state != Waiting && pip->state != WaitingDone) {
printf("Read event on pipe in strange state %d\n", pip->state);
abort();
}
@@ -536,9 +543,9 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event)
inPipe = (pip->next_write - pip->next_read);
if (inPipe == 0) {
bytes = read(pip->read_fd, &word, sizeof(word));
- printf("Unexpected empty pipe, expected %u -> %u, bytes=%d, word=%d, written=%d\n",
- pip->next_read, pip->next_write-1, bytes, word,
- (pip->next_write - pip->first_write));
+ printf("Unexpected empty pipe: ptr=%p, fds=%d->%d, read bytes=%d, word=%d, written=%d\n",
+ pip, pip->write_fd, pip->read_fd,
+ bytes, word, (pip->next_write - pip->first_write));
/*abort();
Allow unexpected events as it's been seen to be triggered by epoll
on Linux. Most of the time the unwanted events are filtered by
@@ -564,7 +571,20 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event)
TRACEF(("Read %d from fd=%d\n", word, fd));
pip->next_read++;
}
- pip->state = Selected; /* not Waiting anymore */
+ if (pip->state == WaitingDone) {
+ if (pip->next_write == pip->next_read) {
+ /* All data read, send {Port, done} */
+ ErlDrvTermData spec[] = {ERL_DRV_PORT, driver_mk_port(cddp->port),
+ ERL_DRV_ATOM, driver_mk_atom("done"),
+ ERL_DRV_TUPLE, 2};
+ erl_drv_output_term(driver_mk_port(cddp->port),
+ spec, sizeof(spec) / sizeof(spec[0]));
+ pip->state = Selected;
+ }
+ }
+ else {
+ pip->state = Selected; /* not Waiting anymore */
+ }
break;
}
case CHKIO_DRV_USE:
@@ -962,6 +982,16 @@ chkio_drv_control(ErlDrvData drv_data,
}
case CHKIO_SMP_SELECT: {
ChkioSmpSelect* pip = (ChkioSmpSelect*) cddp->test_data;
+ if (len == 4 && memcmp(buf, "done", 4) == 0) {
+ if (pip && pip->state == Waiting) {
+ pip->state = WaitingDone;
+ res_str = "wait";
+ }
+ else
+ res_str = "ok";
+ res_len = -1;
+ break;
+ }
if (pip == NULL) {
erl_drv_mutex_lock(smp_pipes_mtx);
if (smp_pipes) {
@@ -1014,7 +1044,6 @@ chkio_drv_control(ErlDrvData drv_data,
if (pip->wasSelected && (op & 1)) {
TRACEF(("%T: Close pipe [%d->%d]\n", cddp->id, pip->write_fd,
pip->read_fd));
- drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */
if (driver_select(cddp->port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd,
DO_READ|ERL_DRV_USE, 0)
|| close(pip->write_fd)) {
diff --git a/erts/emulator/test/persistent_term_SUITE.erl b/erts/emulator/test/persistent_term_SUITE.erl
new file mode 100644
index 0000000000..58cd3276b0
--- /dev/null
+++ b/erts/emulator/test/persistent_term_SUITE.erl
@@ -0,0 +1,614 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%5
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(persistent_term_SUITE).
+-include_lib("common_test/include/ct.hrl").
+
+-export([all/0,suite/0,
+ basic/1,purging/1,sharing/1,get_trapping/1,
+ info/1,info_trapping/1,killed_while_trapping/1,
+ off_heap_values/1,keys/1,collisions/1,
+ init_restart/1]).
+
+%%
+-export([test_init_restart_cmd/1]).
+
+suite() ->
+ [{ct_hooks,[ts_install_cth]},
+ {timetrap,{minutes,10}}].
+
+all() ->
+ [basic,purging,sharing,get_trapping,info,info_trapping,
+ killed_while_trapping,off_heap_values,keys,collisions,
+ init_restart].
+
+basic(_Config) ->
+ Chk = chk(),
+ N = 777,
+ Seq = lists:seq(1, N),
+ par(2, N, Seq),
+ seq(3, Seq),
+ seq(3, Seq), %Same values.
+ _ = [begin
+ Key = {?MODULE,{key,I}},
+ true = persistent_term:erase(Key),
+ false = persistent_term:erase(Key),
+ {'EXIT',{badarg,_}} = (catch persistent_term:get(Key))
+ end || I <- Seq],
+ [] = [P || {{?MODULE,_},_}=P <- persistent_term:get()],
+ chk(Chk).
+
+par(C, N, Seq) ->
+ _ = [spawn_link(fun() ->
+ ok = persistent_term:put({?MODULE,{key,I}},
+ {value,C*I})
+ end) || I <- Seq],
+ Result = wait(N),
+ _ = [begin
+ Double = C*I,
+ {{?MODULE,{key,I}},{value,Double}} = Res
+ end || {I,Res} <- lists:zip(Seq, Result)],
+ ok.
+
+seq(C, Seq) ->
+ _ = [ok = persistent_term:put({?MODULE,{key,I}}, {value,C*I}) ||
+ I <- Seq],
+ All = persistent_term:get(),
+ All = [P || {{?MODULE,_},_}=P <- persistent_term:get()],
+ All = [{Key,persistent_term:get(Key)} || {Key,_} <- All],
+ Result = lists:sort(All),
+ _ = [begin
+ Double = C*I,
+ {{?MODULE,{key,I}},{value,Double}} = Res
+ end || {I,Res} <- lists:zip(Seq, Result)],
+ ok.
+
+wait(N) ->
+ All = [P || {{?MODULE,_},_}=P <- persistent_term:get()],
+ case length(All) of
+ N ->
+ All = [{Key,persistent_term:get(Key)} || {Key,_} <- All],
+ lists:sort(All);
+ _ ->
+ receive after 10 -> ok end,
+ wait(N)
+ end.
+
+%% Make sure that terms that have been erased are copied into all
+%% processes that still hold a pointer to them.
+
+purging(_Config) ->
+ Chk = chk(),
+ do_purging(fun(K) -> persistent_term:put(K, {?MODULE,new}) end,
+ replaced),
+ do_purging(fun persistent_term:erase/1, erased),
+ chk(Chk).
+
+do_purging(Eraser, Type) ->
+ Parent = self(),
+ Key = {?MODULE,?FUNCTION_NAME},
+ ok = persistent_term:put(Key, {term,[<<"abc",0:777/unit:8>>]}),
+ Ps0 = [spawn_monitor(fun() -> purging_tester(Parent, Key) end) ||
+ _ <- lists:seq(1, 50)],
+ Ps = maps:from_list(Ps0),
+ purging_recv(gotten, Ps),
+ Eraser(Key),
+ _ = [P ! {Parent,Type} || P <- maps:keys(Ps)],
+ purging_wait(Ps).
+
+purging_recv(Tag, Ps) when map_size(Ps) > 0 ->
+ receive
+ {Pid,Tag} ->
+ true = is_map_key(Pid, Ps),
+ purging_recv(Tag, maps:remove(Pid, Ps))
+ end;
+purging_recv(_, _) -> ok.
+
+purging_wait(Ps) when map_size(Ps) > 0 ->
+ receive
+ {'DOWN',Ref,process,Pid,Reason} ->
+ normal = Reason,
+ Ref = map_get(Pid, Ps),
+ purging_wait(maps:remove(Pid, Ps))
+ end;
+purging_wait(_) -> ok.
+
+purging_tester(Parent, Key) ->
+ Term = persistent_term:get(Key),
+ purging_check_term(Term),
+ 0 = erts_debug:size_shared(Term),
+ Parent ! {self(),gotten},
+ receive
+ {Parent,erased} ->
+ {'EXIT',{badarg,_}} = (catch persistent_term:get(Key)),
+ purging_tester_1(Term);
+ {Parent,replaced} ->
+ {?MODULE,new} = persistent_term:get(Key),
+ purging_tester_1(Term)
+ end.
+
+%% Wait for the term to be copied into this process.
+purging_tester_1(Term) ->
+ purging_check_term(Term),
+ receive after 1 -> ok end,
+ case erts_debug:size_shared(Term) of
+ 0 ->
+ purging_tester_1(Term);
+ Size ->
+ %% The term has been copied into this process.
+ purging_check_term(Term),
+ Size = erts_debug:size(Term)
+ end.
+
+purging_check_term({term,[<<"abc",0:777/unit:8>>]}) ->
+ ok.
+
+%% Test that sharing is preserved when storing terms.
+
+sharing(_Config) ->
+ Chk = chk(),
+ Depth = 10,
+ Size = 2*Depth,
+ Shared = lists:foldl(fun(_, A) -> [A|A] end,
+ [], lists:seq(1, Depth)),
+ Size = erts_debug:size(Shared),
+ Key = {?MODULE,?FUNCTION_NAME},
+ ok = persistent_term:put(Key, Shared),
+ SharedStored = persistent_term:get(Key),
+ Size = erts_debug:size(SharedStored),
+ 0 = erts_debug:size_shared(SharedStored),
+
+ {Pid,Ref} = spawn_monitor(fun() ->
+ Term = persistent_term:get(Key),
+ Size = erts_debug:size(Term),
+ 0 = erts_debug:size_shared(Term),
+ true = Term =:= SharedStored
+ end),
+ receive
+ {'DOWN',Ref,process,Pid,normal} ->
+ true = persistent_term:erase(Key),
+ Size = erts_debug:size(SharedStored),
+ chk(Chk)
+ end.
+
+%% Test trapping of persistent_term:get/0.
+
+get_trapping(_Config) ->
+ Chk = chk(),
+
+ %% Assume that the get/0 traps after 4000 iterations
+ %% in a non-debug emulator.
+ N = case test_server:timetrap_scale_factor() of
+ 1 -> 10000;
+ _ -> 1000
+ end,
+ spawn_link(fun() -> get_trapping_create(N) end),
+ All = do_get_trapping(N, []),
+ N = get_trapping_check_result(lists:sort(All), 1),
+ erlang:garbage_collect(),
+ get_trapping_erase(N),
+ chk(Chk).
+
+do_get_trapping(N, Prev) ->
+ case persistent_term:get() of
+ Prev when length(Prev) >= N ->
+ All = [P || {{?MODULE,{get_trapping,_}},_}=P <- Prev],
+ case length(All) of
+ N -> All;
+ _ -> do_get_trapping(N, Prev)
+ end;
+ New ->
+ receive after 1 -> ok end,
+ do_get_trapping(N, New)
+ end.
+
+get_trapping_create(0) ->
+ ok;
+get_trapping_create(N) ->
+ ok = persistent_term:put({?MODULE,{get_trapping,N}}, N),
+ get_trapping_create(N-1).
+
+get_trapping_check_result([{{?MODULE,{get_trapping,N}},N}|T], N) ->
+ get_trapping_check_result(T, N+1);
+get_trapping_check_result([], N) -> N-1.
+
+get_trapping_erase(0) ->
+ ok;
+get_trapping_erase(N) ->
+ true = persistent_term:erase({?MODULE,{get_trapping,N}}),
+ get_trapping_erase(N-1).
+
+%% Test retrieving information about persistent terms.
+
+info(_Config) ->
+ Chk = chk(),
+
+ %% White box test of info/0.
+ N = 100,
+ try
+ Overhead = info_literal_area_overhead(),
+ io:format("Overhead = ~p\n", [Overhead]),
+ info_wb(N, Overhead, info_info())
+ after
+ _ = [_ = persistent_term:erase({?MODULE,I}) ||
+ I <- lists:seq(1, N)]
+ end,
+
+ chk(Chk).
+
+%% White box test of persistent_term:info/0. We take into account
+%% that there might already exist persistent terms (created by the
+%% OTP standard libraries), but we assume that they are not
+%% changed during the execution of this test case.
+
+info_wb(0, _, _) ->
+ ok;
+info_wb(N, Overhead, {BaseCount,BaseMemory}) ->
+ Key = {?MODULE,N},
+ Value = lists:seq(1, N),
+ ok = persistent_term:put(Key, Value),
+
+ %% Calculate the extra memory needed for this term.
+ WordSize = erlang:system_info(wordsize),
+ ExtraMemory = Overhead + 2 * N * WordSize,
+
+ %% Call persistent_term:info/0.
+ {Count,Memory} = info_info(),
+
+ %% There should be one more persistent term.
+ Count = BaseCount + 1,
+
+ %% Verify that the amount of memory is correct.
+ case BaseMemory + ExtraMemory of
+ Memory ->
+ %% Exactly right. The size of the hash table was not changed.
+ ok;
+ Expected ->
+ %% The size of the hash table has been doubled to avoid filling
+ %% the table to more than 50 percent. The previous number
+ %% of entries must have been exactly half the size of the
+ %% hash table. The expected number of extra words added by
+ %% the resizing will be twice that number.
+ ExtraWords = BaseCount * 2,
+ true = ExtraWords * WordSize =:= (Memory - Expected)
+ end,
+ info_wb(N-1, Overhead, {Count,Memory}).
+
+info_info() ->
+ #{count:=Count,memory:=Memory} = persistent_term:info(),
+ true = is_integer(Count) andalso Count >= 0,
+ true = is_integer(Memory) andalso Memory >= 0,
+ {Count,Memory}.
+
+%% Calculate the number of extra bytes needed for storing each term in
+%% the literal, assuming that the key is a tuple of size 2 with
+%% immediate elements. The calculated number is the size of the
+%% ErtsLiteralArea struct excluding the storage for the literal term
+%% itself.
+
+info_literal_area_overhead() ->
+ Key1 = {?MODULE,1},
+ Key2 = {?MODULE,2},
+ #{memory:=Mem0} = persistent_term:info(),
+ ok = persistent_term:put(Key1, literal),
+ #{memory:=Mem1} = persistent_term:info(),
+ ok = persistent_term:put(Key2, literal),
+ #{memory:=Mem2} = persistent_term:info(),
+ true = persistent_term:erase(Key1),
+ true = persistent_term:erase(Key2),
+
+ %% The size of the hash table may have doubled when inserting
+ %% one of the keys. To avoiding counting the change in the hash
+ %% table size, take the smaller size increase.
+ min(Mem2-Mem1, Mem1-Mem0).
+
+%% Test trapping of persistent_term:info/0.
+
+info_trapping(_Config) ->
+ Chk = chk(),
+
+ %% Assume that the info/0 traps after 4000 iterations
+ %% in a non-debug emulator.
+ N = case test_server:timetrap_scale_factor() of
+ 1 -> 10000;
+ _ -> 1000
+ end,
+ spawn_link(fun() -> info_trapping_create(N) end),
+ All = do_info_trapping(N, 0),
+ N = info_trapping_check_result(lists:sort(All), 1),
+ erlang:garbage_collect(),
+ info_trapping_erase(N),
+ chk(Chk).
+
+do_info_trapping(N, PrevMem) ->
+ case info_info() of
+ {N,Mem} ->
+ true = Mem >= PrevMem,
+ All = [P || {{?MODULE,{info_trapping,_}},_}=P <- persistent_term:get()],
+ case length(All) of
+ N -> All;
+ _ -> do_info_trapping(N, PrevMem)
+ end;
+ {_,Mem} ->
+ true = Mem >= PrevMem,
+ receive after 1 -> ok end,
+ do_info_trapping(N, Mem)
+ end.
+
+info_trapping_create(0) ->
+ ok;
+info_trapping_create(N) ->
+ ok = persistent_term:put({?MODULE,{info_trapping,N}}, N),
+ info_trapping_create(N-1).
+
+info_trapping_check_result([{{?MODULE,{info_trapping,N}},N}|T], N) ->
+ info_trapping_check_result(T, N+1);
+info_trapping_check_result([], N) -> N-1.
+
+info_trapping_erase(0) ->
+ ok;
+info_trapping_erase(N) ->
+ true = persistent_term:erase({?MODULE,{info_trapping,N}}),
+ info_trapping_erase(N-1).
+
+%% Test that hash tables are deallocated if a process running
+%% persistent_term:get/0 is killed.
+
+killed_while_trapping(_Config) ->
+ Chk = chk(),
+ N = case test_server:timetrap_scale_factor() of
+ 1 -> 20000;
+ _ -> 2000
+ end,
+ kwt_put(N),
+ kwt_spawn(10),
+ kwt_erase(N),
+ chk(Chk).
+
+kwt_put(0) ->
+ ok;
+kwt_put(N) ->
+ ok = persistent_term:put({?MODULE,{kwt,N}}, N),
+ kwt_put(N-1).
+
+kwt_spawn(0) ->
+ ok;
+kwt_spawn(N) ->
+ Pids = [spawn(fun kwt_getter/0) || _ <- lists:seq(1, 20)],
+ erlang:yield(),
+ _ = [exit(Pid, kill) || Pid <- Pids],
+ kwt_spawn(N-1).
+
+kwt_getter() ->
+ _ = persistent_term:get(),
+ kwt_getter().
+
+kwt_erase(0) ->
+ ok;
+kwt_erase(N) ->
+ true = persistent_term:erase({?MODULE,{kwt,N}}),
+ kwt_erase(N-1).
+
+%% Test storing off heap values (such as ref-counted binaries).
+
+off_heap_values(_Config) ->
+ Chk = chk(),
+ Key = {?MODULE,?FUNCTION_NAME},
+ Val = {a,list_to_binary(lists:seq(0, 255)),make_ref(),fun() -> ok end},
+ ok = persistent_term:put(Key, Val),
+ FetchedVal = persistent_term:get(Key),
+ Val = FetchedVal,
+ true = persistent_term:erase(Key),
+ off_heap_values_wait(FetchedVal, Val),
+ chk(Chk).
+
+off_heap_values_wait(FetchedVal, Val) ->
+ case erts_debug:size_shared(FetchedVal) of
+ 0 ->
+ Val = FetchedVal,
+ ok;
+ _ ->
+ erlang:yield(),
+ off_heap_values_wait(FetchedVal, Val)
+ end.
+
+%% Test some more data types as keys. Use the module name as a key
+%% to minimize the risk of collision with any key used
+%% by the OTP libraries.
+
+keys(_Config) ->
+ Chk = chk(),
+ do_key(?MODULE),
+ do_key([?MODULE]),
+ do_key(?MODULE_STRING),
+ do_key(list_to_binary(?MODULE_STRING)),
+ chk(Chk).
+
+do_key(Key) ->
+ Val = term_to_binary(Key),
+ ok = persistent_term:put(Key, Val),
+ StoredVal = persistent_term:get(Key),
+ Val = StoredVal,
+ true = persistent_term:erase(Key).
+
+%% Create persistent terms with keys that are known to collide.
+%% Delete them in random order, making sure that all others
+%% terms can still be found.
+
+collisions(_Config) ->
+ Chk = chk(),
+
+ %% Create persistent terms with random keys.
+ Keys = lists:flatten(colliding_keys()),
+ Kvs = [{K,rand:uniform(1000)} || K <- Keys],
+ _ = [ok = persistent_term:put(K, V) || {K,V} <- Kvs],
+ _ = [V = persistent_term:get(K) || {K,V} <- Kvs],
+
+ %% Now delete the persistent terms in random order.
+ collisions_delete(lists:keysort(2, Kvs)),
+
+ chk(Chk).
+
+collisions_delete([{Key,Val}|Kvs]) ->
+ Val = persistent_term:get(Key),
+ true = persistent_term:erase(Key),
+ true = lists:sort(persistent_term:get()) =:= lists:sort(Kvs),
+ _ = [V = persistent_term:get(K) || {K,V} <- Kvs],
+ collisions_delete(Kvs);
+collisions_delete([]) ->
+ ok.
+
+colliding_keys() ->
+ %% Collisions found by Jesper L. Andersen for breaking maps.
+ L = [[764492191,2361333849],
+ [49527266765044,90940896816021,20062927283041,267080852079651],
+ [249858369443708,206247021789428,20287304470696,25847120931175],
+ [10645228898670,224705626119556,267405565521452,258214397180678],
+ [264783762221048,166955943492306,98802957003141,102012488332476],
+ [69425677456944,177142907243411,137138950917722,228865047699598],
+ [116031213307147,29203342183358,37406949328742,255198080174323],
+ [200358182338308,235207156008390,120922906095920,116215987197289],
+ [58728890318426,68877471005069,176496507286088,221041411345780],
+ [91094120814795,50665258299931,256093108116737,19777509566621],
+ [74646746200247,98350487270564,154448261001199,39881047281135],
+ [23408943649483,164410325820923,248161749770122,274558342231648],
+ [169531547115055,213630535746863,235098262267796,200508473898303],
+ [235098564415817,85039146398174,51721575960328,173069189684390],
+ [176136386396069,155368359051606,147817099696487,265419485459634],
+ [137542881551462,40028925519736,70525669519846,63445773516557],
+ [173854695142814,114282444507812,149945832627054,99605565798831],
+ [177686773562184,127158716984798,132495543008547],
+ [227073396444896,139667311071766,158915951283562],
+ [26212438434289,94902985796531,198145776057315],
+ [266279278943923,58550737262493,74297973216378],
+ [32373606512065,131854353044428,184642643042326],
+ [34335377662439,85341895822066,273492717750246]],
+
+ %% Verify that the keys still collide (this will fail if the
+ %% internal hash function has been changed).
+ erts_debug:set_internal_state(available_internal_state, true),
+ try
+ case erlang:system_info(wordsize) of
+ 8 ->
+ verify_colliding_keys(L);
+ 4 ->
+ %% Not guaranteed to collide on a 32-bit system.
+ ok
+ end
+ after
+ erts_debug:set_internal_state(available_internal_state, false)
+ end,
+
+ L.
+
+verify_colliding_keys([[K|Ks]|Gs]) ->
+ Hash = internal_hash(K),
+ [Hash] = lists:usort([internal_hash(Key) || Key <- Ks]),
+ verify_colliding_keys(Gs);
+verify_colliding_keys([]) ->
+ ok.
+
+internal_hash(Term) ->
+ erts_debug:get_internal_state({internal_hash,Term}).
+
+%% Test that all persistent terms are erased by init:restart/0.
+
+init_restart(_Config) ->
+ File = "command_file",
+ ok = file:write_file(File, term_to_binary(restart)),
+ {ok,[[Erl]]} = init:get_argument(progname),
+ ModPath = filename:dirname(code:which(?MODULE)),
+ Cmd = Erl ++ " -pa " ++ ModPath ++ " -noshell "
+ "-run " ++ ?MODULE_STRING ++ " test_init_restart_cmd " ++
+ File,
+ io:format("~s\n", [Cmd]),
+ Expected = "12ok",
+ case os:cmd(Cmd) of
+ Expected ->
+ ok;
+ Actual ->
+ io:format("Expected: ~s", [Expected]),
+ io:format("Actual: ~s\n", [Actual]),
+ ct:fail(unexpected_output)
+ end.
+
+test_init_restart_cmd([File]) ->
+ try
+ do_test_init_restart_cmd(File)
+ catch
+ C:R ->
+ io:format("\n~p ~p\n", [C,R]),
+ halt()
+ end,
+ receive
+ _ -> ok
+ end.
+
+do_test_init_restart_cmd(File) ->
+ {ok,Bin} = file:read_file(File),
+ Seq = lists:seq(1, 50),
+ case binary_to_term(Bin) of
+ restart ->
+ _ = [persistent_term:put({?MODULE,I}, {value,I}) ||
+ I <- Seq],
+ ok = file:write_file(File, term_to_binary(was_restarted)),
+ io:put_chars("1"),
+ init:restart(),
+ receive
+ _ -> ok
+ end;
+ was_restarted ->
+ io:put_chars("2"),
+ ok = file:delete(File),
+ _ = [begin
+ Key = {?MODULE,I},
+ {'EXIT',{badarg,_}} = (catch persistent_term:get(Key))
+ end || I <- Seq],
+ io:put_chars("ok"),
+ init:stop()
+ end.
+
+%% Check that there is the same number of persistents terms before
+%% and after each test case.
+
+chk() ->
+ persistent_term:info().
+
+chk(Chk) ->
+ Chk = persistent_term:info(),
+ Key = {?MODULE,?FUNCTION_NAME},
+ ok = persistent_term:put(Key, {term,Chk}),
+ Term = persistent_term:get(Key),
+ true = persistent_term:erase(Key),
+ chk_not_stuck(Term),
+ ok.
+
+chk_not_stuck(Term) ->
+ %% Hash tables to be deleted are put onto a queue.
+ %% Make sure that the queue isn't stuck by a table with
+ %% a non-zero ref count.
+
+ case erts_debug:size_shared(Term) of
+ 0 ->
+ erlang:yield(),
+ chk_not_stuck(Term);
+ _ ->
+ ok
+ end.
diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl
index f04efb9003..2e0dfa42f3 100644
--- a/erts/emulator/test/scheduler_SUITE.erl
+++ b/erts/emulator/test/scheduler_SUITE.erl
@@ -1450,26 +1450,29 @@ poll_threads(Config) when is_list(Config) ->
{Conc, PollType, KP} = get_ioconfig(Config),
{Sched, SchedOnln, _} = get_sstate(Config, ""),
- [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
- [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
-
- [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
-
if
Conc ->
- [5] = get_ionum(Config,"+IOt 5 +IOp 1"),
- [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"),
- [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+ [1, 1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
- [2] = get_ionum(Config, "+S 2 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4:2 +IOPt 100"),
- [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
+ [5, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
+ [3, 2, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
+ [2, 2, 2, 2, 2, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+
+ [2, 1] = get_ionum(Config, "+S 2 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"),
+ [4, 4, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
fail = get_ionum(Config, "+IOt 1 +IOp 2"),
ok;
not Conc ->
+ [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
+
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl
index fab2f45f28..4e6baa9e0e 100644
--- a/erts/emulator/test/signal_SUITE.erl
+++ b/erts/emulator/test/signal_SUITE.erl
@@ -85,7 +85,7 @@ xm_sig_order_proc() ->
receive
may_not_reach -> exit(bad_signal_order);
may_reach -> ok
- after 0 -> ok
+ after 0 -> erlang:yield()
end,
xm_sig_order_proc().