diff options
Diffstat (limited to 'erts/emulator')
63 files changed, 5190 insertions, 846 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 054692819e..57a9d45887 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -633,21 +633,24 @@ GENERATE += $(TTF_DIR)/driver_tab.c # This list must be consistent with PRE_LOADED_MODULES in # erts/preloaded/src/Makefile. -PRELOAD_BEAM = $(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \ - $(ERL_TOP)/erts/preloaded/ebin/init.beam \ - $(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \ - $(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \ - $(ERL_TOP)/erts/preloaded/ebin/prim_inet.beam \ - $(ERL_TOP)/erts/preloaded/ebin/prim_file.beam \ - $(ERL_TOP)/erts/preloaded/ebin/zlib.beam \ - $(ERL_TOP)/erts/preloaded/ebin/prim_zip.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erl_prim_loader.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erlang.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erts_internal.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erl_tracer.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erts_literal_area_collector.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam +PRELOAD_BEAM = $(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \ + $(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \ + $(ERL_TOP)/erts/preloaded/ebin/init.beam \ + $(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \ + $(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \ + $(ERL_TOP)/erts/preloaded/ebin/prim_inet.beam \ + $(ERL_TOP)/erts/preloaded/ebin/prim_file.beam \ + $(ERL_TOP)/erts/preloaded/ebin/zlib.beam \ + $(ERL_TOP)/erts/preloaded/ebin/prim_zip.beam \ + $(ERL_TOP)/erts/preloaded/ebin/erl_prim_loader.beam \ + $(ERL_TOP)/erts/preloaded/ebin/erlang.beam \ + $(ERL_TOP)/erts/preloaded/ebin/erts_internal.beam \ + $(ERL_TOP)/erts/preloaded/ebin/erl_tracer.beam \ + $(ERL_TOP)/erts/preloaded/ebin/erts_literal_area_collector.beam \ + $(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam \ + $(ERL_TOP)/erts/preloaded/ebin/atomics.beam \ + $(ERL_TOP)/erts/preloaded/ebin/counters.beam \ + $(ERL_TOP)/erts/preloaded/ebin/persistent_term.beam ifeq ($(TARGET),win32) # On windows the preloaded objects are in a resource object. @@ -839,6 +842,8 @@ RUN_OBJS += \ $(OBJDIR)/erl_bif_ddll.o $(OBJDIR)/erl_bif_guard.o \ $(OBJDIR)/erl_bif_info.o $(OBJDIR)/erl_bif_op.o \ $(OBJDIR)/erl_bif_os.o $(OBJDIR)/erl_bif_lists.o \ + $(OBJDIR)/erl_bif_persistent.o \ + $(OBJDIR)/erl_bif_atomics.o $(OBJDIR)/erl_bif_counters.o \ $(OBJDIR)/erl_bif_trace.o $(OBJDIR)/erl_bif_unique.o \ $(OBJDIR)/erl_bif_wrap.o $(OBJDIR)/erl_nfunc_sched.o \ $(OBJDIR)/erl_guard_bifs.o $(OBJDIR)/erl_dirty_bif_wrap.o \ diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 5381611fab..59b51fd15e 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -174,7 +174,7 @@ atom_alloc(Atom* tmpl) /* * Precompute ordinal value of first 3 bytes + 7 bits. - * This is used by utils.c:erts_cmp_atoms(). + * This is used by erl_utils.h:erts_cmp_atoms(). * We cannot use the full 32 bits of the first 4 bytes, * since we use the sign of the difference between two * ordinal values to represent their relative order. diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index fdce4f7b95..291bc95604 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -182,6 +182,7 @@ atom control atom copy atom copy_literals atom counters +atom count atom cpu atom cpu_timestamp atom cr @@ -287,6 +288,7 @@ atom gc_minor_end atom gc_minor_start atom Ge='>=' atom generational +atom get_all_trap atom get_seq_token atom get_tcw atom gather_gc_info_result @@ -325,6 +327,7 @@ atom index atom infinity atom info atom info_msg +atom info_trap atom init atom initial_call atom input @@ -393,6 +396,7 @@ atom microsecond atom microstate_accounting atom milli_seconds atom millisecond +atom min atom min_heap_size atom min_bin_vheap_size atom minor diff --git a/erts/emulator/beam/beam_bif_load.c b/erts/emulator/beam/beam_bif_load.c index d221e6aea6..bb1b2e5b27 100644 --- a/erts/emulator/beam/beam_bif_load.c +++ b/erts/emulator/beam/beam_bif_load.c @@ -1752,29 +1752,7 @@ BIF_RETTYPE erts_internal_purge_module_2(BIF_ALIST_2) finalize_purge_operation(BIF_P, ret == am_true); if (literals) { - ErtsLiteralAreaRef *ref; - ErtsMessage *mp; - ref = erts_alloc(ERTS_ALC_T_LITERAL_REF, - sizeof(ErtsLiteralAreaRef)); - ref->literal_area = literals; - ref->next = NULL; - erts_mtx_lock(&release_literal_areas.mtx); - if (release_literal_areas.last) { - release_literal_areas.last->next = ref; - release_literal_areas.last = ref; - } - else { - release_literal_areas.first = ref; - release_literal_areas.last = ref; - } - erts_mtx_unlock(&release_literal_areas.mtx); - mp = erts_alloc_message(0, NULL); - ERL_MESSAGE_TOKEN(mp) = am_undefined; - erts_queue_proc_message(BIF_P, - erts_literal_area_collector, - 0, - mp, - am_copy_literals); + erts_queue_release_literals(BIF_P, literals); } return ret; @@ -1786,6 +1764,41 @@ BIF_RETTYPE erts_internal_purge_module_2(BIF_ALIST_2) } } +void +erts_queue_release_literals(Process* c_p, ErtsLiteralArea* literals) +{ + ErtsLiteralAreaRef *ref; + ErtsMessage *mp; + ref = erts_alloc(ERTS_ALC_T_LITERAL_REF, + sizeof(ErtsLiteralAreaRef)); + ref->literal_area = literals; + ref->next = NULL; + erts_mtx_lock(&release_literal_areas.mtx); + if (release_literal_areas.last) { + release_literal_areas.last->next = ref; + release_literal_areas.last = ref; + } else { + release_literal_areas.first = ref; + release_literal_areas.last = ref; + } + erts_mtx_unlock(&release_literal_areas.mtx); + mp = erts_alloc_message(0, NULL); + ERL_MESSAGE_TOKEN(mp) = am_undefined; + if (c_p == NULL) { + erts_queue_message(erts_literal_area_collector, + 0, + mp, + am_copy_literals, + am_system); + } else { + erts_queue_proc_message(c_p, + erts_literal_area_collector, + 0, + mp, + am_copy_literals); + } +} + /* * Move code from current to old and null all export entries for the module */ diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index ab5920a67e..e909a0b4da 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -579,6 +579,7 @@ init_emulator(void) * the instructions' C labels to the loader. * The second call starts execution of BEAM code. This call never returns. */ +ERTS_NO_RETPOLINE void process_main(Eterm * x_reg_array, FloatDef* f_reg_array) { static int init_done = 0; diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 79ed5a0c18..04498332b0 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -3622,6 +3622,10 @@ erts_internal_garbage_collect_1(BIF_ALIST_1) default: BIF_ERROR(BIF_P, BADARG); } erts_garbage_collect(BIF_P, 0, NULL, 0); + if (ERTS_PROC_IS_EXITING(BIF_P)) { + /* The max heap size limit was reached. */ + return THE_NON_VALUE; + } return am_true; } diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 7548924178..d4ba90a61a 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -40,6 +40,7 @@ # Note: Guards BIFs usually require special support in the compiler. # + gcbif erlang:abs/1 bif erlang:adler32/1 bif erlang:adler32/2 @@ -698,3 +699,29 @@ ubif erlang:map_get/2 ubif erlang:is_map_key/2 bif ets:internal_delete_all/2 bif ets:internal_select_delete/2 + +# +# New in 21.2 +# + +bif persistent_term:put/2 +bif persistent_term:get/1 +bif persistent_term:get/0 +bif persistent_term:erase/1 +bif persistent_term:info/0 +bif erts_internal:erase_persistent_terms/0 + +bif erts_internal:atomics_new/2 +bif atomics:get/2 +bif atomics:put/3 +bif atomics:add/3 +bif atomics:add_get/3 +bif atomics:exchange/3 +bif atomics:compare_exchange/4 +bif atomics:info/1 + +bif erts_internal:counters_new/1 +bif erts_internal:counters_get/2 +bif erts_internal:counters_add/3 +bif erts_internal:counters_put/3 +bif erts_internal:counters_info/1 diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index 84338769e0..7e0be2a2a7 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -668,27 +668,25 @@ static dsize_t I_mul(ErtsDigit* x, dsize_t xl, ErtsDigit* y, dsize_t yl, ErtsDig static dsize_t I_sqr(ErtsDigit* x, dsize_t xl, ErtsDigit* r) { - ErtsDigit d_next = *x; ErtsDigit d; ErtsDigit* r0 = r; ErtsDigit* s = r; if ((r + xl) == x) /* "Inline" operation */ *x = 0; - x++; while(xl--) { - ErtsDigit* y = x; + ErtsDigit* y; ErtsDigit y_0 = 0, y_1 = 0, y_2 = 0, y_3 = 0; ErtsDigit b0, b1; ErtsDigit z0, z1, z2; ErtsDigit t; dsize_t y_l = xl; - + + d = *x; + x++; + y = x; s = r; - d = d_next; - d_next = *x; - x++; DMUL(d, d, b1, b0); DSUMc(*s, b0, y_3, t); @@ -1159,8 +1157,11 @@ static dsize_t I_band(ErtsDigit* x, dsize_t xl, short xsgn, *r++ = ~c1 & ~c2; x++; y++; } - while(xl--) - *r++ = ~*x++; + while(xl--) { + DSUBb(*x,0,b1,c1); + *r++ = ~c1; + x++; + } } } return I_btrail(r0, r, sign); diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c index e7bfd04b73..e7bd046e18 100644 --- a/erts/emulator/beam/copy.c +++ b/erts/emulator/beam/copy.c @@ -1074,6 +1074,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info) Eterm* ptr; Eterm *lit_purge_ptr = info->lit_purge_ptr; Uint lit_purge_sz = info->lit_purge_sz; + int copy_literals = info->copy_literals; #ifdef DEBUG Eterm mypid = erts_get_current_pid(); #endif @@ -1119,7 +1120,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info) /* off heap list pointers are copied verbatim */ if (erts_is_literal(obj,ptr)) { VERBOSE(DEBUG_SHCOPY, ("[pid=%T] bypassed copying %p is %T\n", mypid, ptr, obj)); - if (in_literal_purge_area(ptr)) + if (copy_literals || in_literal_purge_area(ptr)) info->literal_size += size_object(obj); goto pop_next; } @@ -1170,7 +1171,7 @@ Uint copy_shared_calculate(Eterm obj, erts_shcopy_t *info) /* off heap pointers to boxes are copied verbatim */ if (erts_is_literal(obj,ptr)) { VERBOSE(DEBUG_SHCOPY, ("[pid=%T] bypassed copying %p is %T\n", mypid, ptr, obj)); - if (in_literal_purge_area(ptr)) + if (copy_literals || in_literal_purge_area(ptr)) info->literal_size += size_object(obj); goto pop_next; } @@ -1338,6 +1339,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info, unsigned remaining; Eterm *lit_purge_ptr = info->lit_purge_ptr; Uint lit_purge_sz = info->lit_purge_sz; + int copy_literals = info->copy_literals; #ifdef DEBUG Eterm mypid = erts_get_current_pid(); Eterm saved_obj = obj; @@ -1387,7 +1389,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info, ptr = list_val(obj); /* off heap list pointers are copied verbatim */ if (erts_is_literal(obj,ptr)) { - if (!in_literal_purge_area(ptr)) { + if (!(copy_literals || in_literal_purge_area(ptr))) { *resp = obj; } else { Uint bsz = 0; @@ -1455,7 +1457,7 @@ Uint copy_shared_perform(Eterm obj, Uint size, erts_shcopy_t *info, ptr = boxed_val(obj); /* off heap pointers to boxes are copied verbatim */ if (erts_is_literal(obj,ptr)) { - if (!in_literal_purge_area(ptr)) { + if (!(copy_literals || in_literal_purge_area(ptr))) { *resp = obj; } else { Uint bsz = 0; diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 8fe1ccb758..9e36d5e0d1 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -4030,6 +4030,9 @@ debug_free(ErtsAlcType_t n, void *extra, void *ptr) ASSERT(ERTS_ALC_N_MIN <= n && n <= ERTS_ALC_N_MAX); + if (!ptr) + return; + dptr = check_memory_fence(ptr, &size, n, ERTS_ALC_O_FREE); #ifdef ERTS_ALC_A_EXEC diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 5409b89bab..4f03a34390 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -274,9 +274,13 @@ type ML_YIELD_STATE SHORT_LIVED SYSTEM monitor_link_yield_state type ML_DIST STANDARD SYSTEM monitor_link_dist type PF3_ARGS SHORT_LIVED PROCESSES process_flag_3_arguments type SETUP_CONN_ARG SHORT_LIVED PROCESSES setup_connection_argument +type LIST_TRAP SHORT_LIVED PROCESSES list_bif_trap_state type ENVIRONMENT SYSTEM SYSTEM environment +type PERSISTENT_TERM LONG_LIVED CODE persisten_term +type PERSISTENT_LOCK_Q SHORT_LIVED SYSTEM persistent_lock_q + # # Types used for special emulators # @@ -334,6 +338,8 @@ type GC_INFO_REQ SHORT_LIVED SYSTEM gc_info_request type PORT_DATA_HEAP STANDARD SYSTEM port_data_heap type MSACC DRIVER SYSTEM microstate_accounting type SYS_CHECK_REQ SHORT_LIVED SYSTEM system_check_request +type ATOMICS STANDARD SYSTEM erl_bif_atomics +type COUNTERS STANDARD SYSTEM erl_bif_counters # # Types used by system specific code diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index a5740a08cf..0be4562785 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -834,6 +834,8 @@ static ERTS_INLINE void clr_bit(UWord* map, Uint ix) &= ~((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS)); } +#ifdef DEBUG + static ERTS_INLINE int is_bit_set(UWord* map, Uint ix) { ASSERT(ix / ERTS_VSPACE_WORD_BITS < VSPACE_MAP_SZ); @@ -841,6 +843,8 @@ static ERTS_INLINE int is_bit_set(UWord* map, Uint ix) & ((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS)); } +#endif + UWord erts_literal_vspace_map[VSPACE_MAP_SZ]; static void set_literal_range(void* start, Uint size) diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c index 605a2b3461..44655ad5df 100644 --- a/erts/emulator/beam/erl_async.c +++ b/erts/emulator/beam/erl_async.c @@ -336,7 +336,7 @@ static ERTS_INLINE ErtsAsync *async_get(ErtsThrQ_t *q, case ERTS_THR_Q_NEED_THR_PRGR: { ErtsThrPrgrVal prgr = erts_thr_q_need_thr_progress(q); - erts_thr_progress_wakeup(NULL, prgr); + erts_thr_progress_wakeup(erts_thr_prgr_data(NULL), prgr); /* * We do no dequeue finalizing in hope that a new async * job will arrive before we are woken due to thread diff --git a/erts/emulator/beam/erl_bif_atomics.c b/erts/emulator/beam/erl_bif_atomics.c new file mode 100644 index 0000000000..029831bd95 --- /dev/null +++ b/erts/emulator/beam/erl_bif_atomics.c @@ -0,0 +1,256 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2018. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/* + * Purpose: High performance atomics. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stddef.h> /* offsetof */ + +#include "sys.h" +#include "export.h" +#include "bif.h" +#include "erl_threads.h" +#include "big.h" +#include "erl_binary.h" +#include "erl_bif_unique.h" +#include "erl_map.h" + +typedef struct +{ + int is_signed; + UWord vlen; + erts_atomic64_t v[1]; +}AtomicsRef; + +static int atomics_destructor(Binary *unused) +{ + return 1; +} + +#define OPT_SIGNED (1 << 0) + +BIF_RETTYPE erts_internal_atomics_new_2(BIF_ALIST_2) +{ + AtomicsRef* p; + Binary* mbin; + UWord i, cnt, opts; + Uint bytes; + Eterm* hp; + + if (!term_to_UWord(BIF_ARG_1, &cnt) + || cnt == 0 + || !term_to_UWord(BIF_ARG_2, &opts)) { + + BIF_ERROR(BIF_P, BADARG); + } + + if (cnt > (ERTS_UWORD_MAX / sizeof(p->v[0]))) + BIF_ERROR(BIF_P, SYSTEM_LIMIT); + + bytes = offsetof(AtomicsRef, v) + cnt*sizeof(p->v[0]); + mbin = erts_create_magic_binary_x(bytes, + atomics_destructor, + ERTS_ALC_T_ATOMICS, + 0); + p = ERTS_MAGIC_BIN_DATA(mbin); + p->is_signed = opts & OPT_SIGNED; + p->vlen = cnt; + for (i=0; i < cnt; i++) + erts_atomic64_init_nob(&p->v[i], 0); + hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE); + return erts_mk_magic_ref(&hp, &MSO(BIF_P), mbin); +} + +static ERTS_INLINE int get_ref(Eterm ref, AtomicsRef** pp) +{ + Binary* mbin; + if (!is_internal_magic_ref(ref)) + return 0; + + mbin = erts_magic_ref2bin(ref); + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbin) != atomics_destructor) + return 0; + *pp = ERTS_MAGIC_BIN_DATA(mbin); + return 1; +} + +static ERTS_INLINE int get_ref_ix(Eterm ref, Eterm ix, + AtomicsRef** pp, UWord* ixp) +{ + return (get_ref(ref, pp) + && term_to_UWord(ix, ixp) + && --(*ixp) < (*pp)->vlen); +} + +static ERTS_INLINE int get_value(AtomicsRef* p, Eterm term, erts_aint64_t *valp) +{ + return (p->is_signed ? + term_to_Sint64(term, (Sint64*)valp) : + term_to_Uint64(term, (Uint64*)valp)); +} + +static ERTS_INLINE int get_incr(AtomicsRef* p, Eterm term, erts_aint64_t *valp) +{ + return (term_to_Sint64(term, (Sint64*)valp) + || term_to_Uint64(term, (Uint64*)valp)); +} + +static ERTS_INLINE Eterm bld_atomic(Process* proc, AtomicsRef* p, + erts_aint64_t val) +{ + if (p->is_signed) { + if (IS_SSMALL(val)) + return make_small((Sint) val); + else { + Uint hsz = ERTS_SINT64_HEAP_SIZE(val); + Eterm* hp = HAlloc(proc, hsz); + return erts_sint64_to_big(val, &hp); + } + } + else { + if ((Uint64)val <= MAX_SMALL) + return make_small((Sint) val); + else { + Uint hsz = ERTS_UINT64_HEAP_SIZE((Uint64)val); + Eterm* hp = HAlloc(proc, hsz); + return erts_uint64_to_big(val, &hp); + } + } +} + +BIF_RETTYPE atomics_put_3(BIF_ALIST_3) +{ + AtomicsRef* p; + UWord ix; + erts_aint64_t val; + + if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix) + || !get_value(p, BIF_ARG_3, &val)) { + BIF_ERROR(BIF_P, BADARG); + } + erts_atomic64_set_mb(&p->v[ix], val); + return am_ok; +} + +BIF_RETTYPE atomics_get_2(BIF_ALIST_2) +{ + AtomicsRef* p; + UWord ix; + + if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix)) { + BIF_ERROR(BIF_P, BADARG); + } + return bld_atomic(BIF_P, p, erts_atomic64_read_mb(&p->v[ix])); +} + +BIF_RETTYPE atomics_add_3(BIF_ALIST_3) +{ + AtomicsRef* p; + UWord ix; + erts_aint64_t incr; + + if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix) + || !get_incr(p, BIF_ARG_3, &incr)) { + BIF_ERROR(BIF_P, BADARG); + } + erts_atomic64_add_mb(&p->v[ix], incr); + return am_ok; +} + +BIF_RETTYPE atomics_add_get_3(BIF_ALIST_3) +{ + AtomicsRef* p; + UWord ix; + erts_aint64_t incr; + + if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix) + || !get_incr(p, BIF_ARG_3, &incr)) { + BIF_ERROR(BIF_P, BADARG); + } + return bld_atomic(BIF_P, p, erts_atomic64_add_read_mb(&p->v[ix], incr)); +} + +BIF_RETTYPE atomics_exchange_3(BIF_ALIST_3) +{ + AtomicsRef* p; + UWord ix; + erts_aint64_t desired, was; + + if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix) + || !get_value(p, BIF_ARG_3, &desired)) { + BIF_ERROR(BIF_P, BADARG); + } + was = erts_atomic64_xchg_mb(&p->v[ix], desired); + return bld_atomic(BIF_P, p, was); +} + +BIF_RETTYPE atomics_compare_exchange_4(BIF_ALIST_4) +{ + AtomicsRef* p; + UWord ix; + erts_aint64_t expected, desired, was; + + if (!get_ref_ix(BIF_ARG_1, BIF_ARG_2, &p, &ix) + || !get_value(p, BIF_ARG_3, &expected) + || !get_value(p, BIF_ARG_4, &desired)) { + BIF_ERROR(BIF_P, BADARG); + } + was = erts_atomic64_cmpxchg_mb(&p->v[ix], desired, expected); + return was == expected ? am_ok : bld_atomic(BIF_P, p, was); +} + +BIF_RETTYPE atomics_info_1(BIF_ALIST_1) +{ + AtomicsRef* p; + Uint hsz = MAP4_SZ; + Eterm *hp; + Uint64 max; + Sint64 min; + UWord memory; + Eterm max_val, min_val, sz_val, mem_val; + + if (!get_ref(BIF_ARG_1, &p)) + BIF_ERROR(BIF_P, BADARG); + + max = p->is_signed ? ERTS_SINT64_MAX : ERTS_UINT64_MAX; + min = p->is_signed ? ERTS_SINT64_MIN : 0; + memory = erts_magic_ref2bin(BIF_ARG_1)->orig_size; + + erts_bld_uint64(NULL, &hsz, max); + erts_bld_sint64(NULL, &hsz, min); + erts_bld_uword(NULL, &hsz, p->vlen); + erts_bld_uword(NULL, &hsz, memory); + + hp = HAlloc(BIF_P, hsz); + max_val = erts_bld_uint64(&hp, NULL, max); + min_val = erts_bld_sint64(&hp, NULL, min); + sz_val = erts_bld_uword(&hp, NULL, p->vlen); + mem_val = erts_bld_uword(&hp, NULL, memory); + + return MAP4(hp, am_max, max_val, + am_memory, mem_val, + am_min, min_val, + am_size, sz_val); +} diff --git a/erts/emulator/beam/erl_bif_counters.c b/erts/emulator/beam/erl_bif_counters.c new file mode 100644 index 0000000000..7c8884ba32 --- /dev/null +++ b/erts/emulator/beam/erl_bif_counters.c @@ -0,0 +1,255 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2018. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/* + * Purpose: The implementation for 'counters' with 'write_concurrency'. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stddef.h> /* offsetof */ + +#include "sys.h" +#include "export.h" +#include "bif.h" +#include "erl_threads.h" +#include "big.h" +#include "erl_binary.h" +#include "erl_bif_unique.h" +#include "erl_map.h" + +/* + * Each logical counter consists of one 64-bit atomic instance per scheduler + * plus one instance for the "base value". + * + * get() reads all atomics for the counter and returns the sum. + * add() reads and writes only its own scheduler specific atomic instance. + * put() reads all scheduler specific atomics and writes a new base value. + */ +#define ATOMICS_PER_COUNTER (erts_no_schedulers + 1) + +#define ATOMICS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic64_t)) + +typedef struct +{ + UWord arity; +#ifdef DEBUG + UWord ulen; +#endif + union { + erts_atomic64_t v[ATOMICS_PER_CACHE_LINE]; + byte cache_line__[ERTS_CACHE_LINE_SIZE]; + } u[1]; +}CountersRef; + +static int counters_destructor(Binary *mbin) +{ + return 1; +} + + +static UWord ERTS_INLINE div_ceil(UWord dividend, UWord divisor) +{ + return (dividend + divisor - 1) / divisor; +} + +BIF_RETTYPE erts_internal_counters_new_1(BIF_ALIST_1) +{ + CountersRef* p; + Binary* mbin; + UWord ui, vi, cnt; + Uint bytes, cache_lines; + Eterm* hp; + + if (!term_to_UWord(BIF_ARG_1, &cnt) + || cnt == 0) { + BIF_ERROR(BIF_P, BADARG); + } + + if (cnt > (ERTS_UWORD_MAX / (sizeof(erts_atomic64_t)*2*ATOMICS_PER_COUNTER))) + BIF_ERROR(BIF_P, SYSTEM_LIMIT); + + cache_lines = ATOMICS_PER_COUNTER * div_ceil(cnt, ATOMICS_PER_CACHE_LINE); + bytes = offsetof(CountersRef, u) + cache_lines * ERTS_CACHE_LINE_SIZE; + mbin = erts_create_magic_binary_x(bytes, + counters_destructor, + ERTS_ALC_T_ATOMICS, + 0); + p = ERTS_MAGIC_BIN_DATA(mbin); + p->arity = cnt; + +#ifdef DEBUG + p->ulen = cache_lines; +#endif + ASSERT((byte*)&p->u[cache_lines] <= ((byte*)p + bytes)); + for (ui=0; ui < cache_lines; ui++) + for (vi=0; vi < ATOMICS_PER_CACHE_LINE; vi++) + erts_atomic64_init_nob(&p->u[ui].v[vi], 0); + hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE); + return erts_mk_magic_ref(&hp, &MSO(BIF_P), mbin); +} + +static ERTS_INLINE int get_ref(Eterm ref, CountersRef** pp) +{ + Binary* mbin; + if (!is_internal_magic_ref(ref)) + return 0; + + mbin = erts_magic_ref2bin(ref); + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbin) != counters_destructor) + return 0; + *pp = ERTS_MAGIC_BIN_DATA(mbin); + return 1; +} + +static ERTS_INLINE int get_ref_cnt(Eterm ref, Eterm index, + CountersRef** pp, + erts_atomic64_t** app, + UWord sched_ix) +{ + CountersRef* p; + UWord ix, ui, vi; + if (!get_ref(ref, &p) || !term_to_UWord(index, &ix) || --ix >= p->arity) + return 0; + ui = (ix / ATOMICS_PER_CACHE_LINE) * ATOMICS_PER_COUNTER + sched_ix; + vi = ix % ATOMICS_PER_CACHE_LINE; + ASSERT(ui < p->ulen); + *pp = p; + *app = &p->u[ui].v[vi]; + return 1; +} + +static ERTS_INLINE int get_ref_my_cnt(Eterm ref, Eterm index, + CountersRef** pp, + erts_atomic64_t** app) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); + ASSERT(esdp->no > 0 && esdp->no < ATOMICS_PER_COUNTER); + return get_ref_cnt(ref, index, pp, app, esdp->no); +} + +static ERTS_INLINE int get_ref_first_cnt(Eterm ref, Eterm index, + CountersRef** pp, + erts_atomic64_t** app) +{ + return get_ref_cnt(ref, index, pp, app, 0); +} + +static ERTS_INLINE int get_incr(CountersRef* p, Eterm term, erts_aint64_t *valp) +{ + return (term_to_Sint64(term, (Sint64*)valp) + || term_to_Uint64(term, (Uint64*)valp)); +} + +static ERTS_INLINE Eterm bld_counter(Process* proc, CountersRef* p, + erts_aint64_t val) +{ + if (IS_SSMALL(val)) + return make_small((Sint) val); + else { + Uint hsz = ERTS_SINT64_HEAP_SIZE(val); + Eterm* hp = HAlloc(proc, hsz); + return erts_sint64_to_big(val, &hp); + } +} + +BIF_RETTYPE erts_internal_counters_get_2(BIF_ALIST_2) +{ + CountersRef* p; + erts_atomic64_t* ap; + erts_aint64_t acc = 0; + int j; + + if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &ap)) { + BIF_ERROR(BIF_P, BADARG); + } + for (j = ATOMICS_PER_COUNTER; j ; --j) { + acc += erts_atomic64_read_nob(ap); + ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE); + } + return bld_counter(BIF_P, p, acc); +} + +BIF_RETTYPE erts_internal_counters_add_3(BIF_ALIST_3) +{ + CountersRef* p; + erts_atomic64_t* ap; + erts_aint64_t incr, sum; + + if (!get_ref_my_cnt(BIF_ARG_1, BIF_ARG_2, &p, &ap) + || !get_incr(p, BIF_ARG_3, &incr)) { + BIF_ERROR(BIF_P, BADARG); + } + sum = incr + erts_atomic64_read_nob(ap); + erts_atomic64_set_nob(ap, sum); + return am_ok; +} + +BIF_RETTYPE erts_internal_counters_put_3(BIF_ALIST_3) +{ + CountersRef* p; + erts_atomic64_t* first_ap; + erts_atomic64_t* ap; + erts_aint64_t acc; + erts_aint64_t val; + int j; + + if (!get_ref_first_cnt(BIF_ARG_1, BIF_ARG_2, &p, &first_ap) + || !term_to_Sint64(BIF_ARG_3, &val)) { + BIF_ERROR(BIF_P, BADARG); + } + + ap = first_ap; + acc = 0; + j = ATOMICS_PER_COUNTER - 1; + do { + ap = (erts_atomic64_t*) ((byte*)ap + ERTS_CACHE_LINE_SIZE); + acc += erts_atomic64_read_nob(ap); + } while (--j); + erts_atomic64_set_nob(first_ap, val-acc); + + return am_ok; +} + +BIF_RETTYPE erts_internal_counters_info_1(BIF_ALIST_1) +{ + CountersRef* p; + Uint hsz = MAP2_SZ; + Eterm *hp; + UWord memory; + Eterm sz_val, mem_val; + + if (!get_ref(BIF_ARG_1, &p)) + BIF_ERROR(BIF_P, BADARG); + + memory = erts_magic_ref2bin(BIF_ARG_1)->orig_size; + erts_bld_uword(NULL, &hsz, p->arity); + erts_bld_uword(NULL, &hsz, memory); + + hp = HAlloc(BIF_P, hsz); + sz_val = erts_bld_uword(&hp, NULL, p->arity); + mem_val = erts_bld_uword(&hp, NULL, memory); + + return MAP2(hp, am_memory, mem_val, + am_size, sz_val); +} diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c index 395be67a90..aaf262780f 100644 --- a/erts/emulator/beam/erl_bif_lists.c +++ b/erts/emulator/beam/erl_bif_lists.c @@ -29,12 +29,13 @@ #include "sys.h" #include "erl_vm.h" #include "global.h" -#include "erl_process.h" -#include "error.h" #include "bif.h" +#include "erl_binary.h" + static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List); + static BIF_RETTYPE append(Process* p, Eterm A, Eterm B) { Eterm list; @@ -146,110 +147,732 @@ BIF_RETTYPE append_2(BIF_ALIST_2) return append(BIF_P, BIF_ARG_1, BIF_ARG_2); } -/* - * erlang:'--'/2 - */ +/* erlang:'--'/2 + * + * Subtracts a list from another (LHS -- RHS), removing the first occurrence of + * each element in LHS from RHS. There is no type coercion so the elements must + * match exactly. + * + * The BIF is broken into several stages that can all trap individually, and it + * chooses its algorithm based on input size. If either input is small it will + * use a linear scan tuned to which side it's on, and if both inputs are large + * enough it will convert RHS into a multiset to provide good asymptotic + * behavior. */ + +#define SUBTRACT_LHS_THRESHOLD 16 +#define SUBTRACT_RHS_THRESHOLD 16 + +typedef enum { + SUBTRACT_STAGE_START, + SUBTRACT_STAGE_LEN_LHS, + + /* Naive linear scan that's efficient when + * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */ + SUBTRACT_STAGE_NAIVE_LHS, + + SUBTRACT_STAGE_LEN_RHS, + + /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */ + SUBTRACT_STAGE_NAIVE_RHS, + + /* Creates a multiset from RHS for faster lookups before sweeping through + * LHS. The set is implemented as a red-black tree and duplicate elements + * are handled by a counter on each node. */ + SUBTRACT_STAGE_SET_BUILD, + SUBTRACT_STAGE_SET_FINISH +} ErtsSubtractCtxStage; + +typedef struct subtract_node__ { + struct subtract_node__ *parent; + struct subtract_node__ *left; + struct subtract_node__ *right; + int is_red; + + Eterm key; + Uint count; +} subtract_tree_t; + +typedef struct { + ErtsSubtractCtxStage stage; + + Eterm lhs_original; + Eterm rhs_original; + + Uint lhs_remaining; + Uint rhs_remaining; + + Eterm iterator; + + Eterm *result_cdr; + Eterm result; + + union { + Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD]; + Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD]; + + struct { + subtract_tree_t *tree; + + /* A memory area for the tree's nodes, saving us the need to have + * one allocation per node. */ + subtract_tree_t *alloc_start; + subtract_tree_t *alloc; + } rhs_set; + } u; +} ErtsSubtractContext; + +#define ERTS_RBT_PREFIX subtract +#define ERTS_RBT_T subtract_tree_t +#define ERTS_RBT_KEY_T Eterm +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) \ + do { \ + (T)->parent = NULL; \ + (T)->left = NULL; \ + (T)->right = NULL; \ + } while(0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->key) +#define ERTS_RBT_CMP_KEYS(KX, KY) CMP_TERM(KX, KY) +#define ERTS_RBT_WANT_LOOKUP_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_WANT_DELETE +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +static int subtract_continue(Process *p, ErtsSubtractContext *context); + +static void subtract_ctx_dtor(ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_SET_BUILD: + case SUBTRACT_STAGE_SET_FINISH: + erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start); + break; + default: + break; + } +} -#define SMALL_VEC_SIZE 10 -static Eterm subtract(Process* p, Eterm A, Eterm B) -{ - Eterm list; - Eterm* hp; - Uint need; - Eterm res; - Eterm small_vec[SMALL_VEC_SIZE]; /* Preallocated memory for small lists */ - Eterm* vec_p; - Eterm* vp; - Sint i; - Sint n; - Sint m; - - if ((n = erts_list_length(A)) < 0) { - BIF_ERROR(p, BADARG); +static int subtract_ctx_bin_dtor(Binary *context_bin) { + ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin); + subtract_ctx_dtor(context); + return 1; +} + +static void subtract_ctx_move(ErtsSubtractContext *from, + ErtsSubtractContext *to) { + int uses_result_cdr = 0; + + to->stage = from->stage; + + to->lhs_original = from->lhs_original; + to->rhs_original = from->rhs_original; + + to->lhs_remaining = from->lhs_remaining; + to->rhs_remaining = from->rhs_remaining; + + to->iterator = from->iterator; + to->result = from->result; + + switch (to->stage) { + case SUBTRACT_STAGE_NAIVE_LHS: + sys_memcpy(to->u.lhs_elements, + from->u.lhs_elements, + sizeof(Eterm) * to->lhs_remaining); + break; + case SUBTRACT_STAGE_NAIVE_RHS: + sys_memcpy(to->u.rhs_elements, + from->u.rhs_elements, + sizeof(Eterm) * to->rhs_remaining); + + uses_result_cdr = 1; + break; + case SUBTRACT_STAGE_SET_FINISH: + uses_result_cdr = 1; + /* FALL THROUGH */ + case SUBTRACT_STAGE_SET_BUILD: + to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start; + to->u.rhs_set.alloc = from->u.rhs_set.alloc; + to->u.rhs_set.tree = from->u.rhs_set.tree; + break; + default: + break; } - if ((m = erts_list_length(B)) < 0) { - BIF_ERROR(p, BADARG); + + if (uses_result_cdr) { + if (from->result_cdr == &from->result) { + to->result_cdr = &to->result; + } else { + to->result_cdr = from->result_cdr; + } } - - if (n == 0) - BIF_RET(NIL); - if (m == 0) - BIF_RET(A); - - /* allocate element vector */ - if (n <= SMALL_VEC_SIZE) - vec_p = small_vec; - else - vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm)); - - /* PUT ALL ELEMENTS IN VP */ - vp = vec_p; - list = A; - i = n; - while(i--) { - Eterm* listp = list_val(list); - *vp++ = CAR(listp); - list = CDR(listp); +} + +static Eterm subtract_create_trap_state(Process *p, + ErtsSubtractContext *context) { + Binary *state_bin; + Eterm *hp; + + state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext), + subtract_ctx_bin_dtor); + + subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin)); + + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + + return erts_mk_magic_ref(&hp, &MSO(p), state_bin); +} + +static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_LHS; + + context->iterator = context->lhs_original; + context->lhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_RHS; + + context->iterator = context->rhs_original; + context->rhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) { + static const Sint ELEMENTS_PER_RED = 32; + + Sint budget, count; + Eterm iterator; + + budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p); + iterator = *iterator_p; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + for (count = 0; count < budget && is_list(iterator); count++) { + iterator = CDR(list_val(iterator)); } - - /* UNMARK ALL DELETED CELLS */ - list = B; - m = 0; /* number of deleted elements */ - while(is_list(list)) { - Eterm* listp = list_val(list); - Eterm elem = CAR(listp); - i = n; - vp = vec_p; - while(i--) { - if (is_value(*vp) && eq(*vp, elem)) { - *vp = THE_NON_VALUE; - m++; - break; - } - vp++; - } - list = CDR(listp); + + if (!is_list(iterator) && !is_nil(iterator)) { + return -1; } - - if (m == n) /* All deleted ? */ - res = NIL; - else if (m == 0) /* None deleted ? */ - res = A; - else { /* REBUILD LIST */ - res = NIL; - need = 2*(n - m); - hp = HAlloc(p, need); - vp = vec_p + n - 1; - while(vp >= vec_p) { - if (is_value(*vp)) { - res = CONS(hp, *vp, res); - hp += 2; - } - vp--; - } + + BUMP_REDS(p, count / ELEMENTS_PER_RED); + + *iterator_p = iterator; + *count_p += count; + + if (is_nil(iterator)) { + return 1; } - if (vec_p != small_vec) - erts_free(ERTS_ALC_T_TMP, (void *) vec_p); - BIF_RET(res); + + return 0; } -BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_LHS; + + context->iterator = context->rhs_original; + context->result = NIL; + + iterator = context->lhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_LHS_THRESHOLD); + + context->u.lhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->lhs_remaining); + + return subtract_continue(p, context); } -BIF_RETTYPE subtract_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + + value = CAR(cell); + next = CDR(cell); + + for (found_at = 0; found_at < context->lhs_remaining; found_at++) { + if (EQ(value, context->u.lhs_elements[found_at])) { + /* We shift the array one step down as we have to preserve + * order. + * + * Note that we can't exit early as that would suppress errors + * in the right-hand side (this runs prior to determining the + * length of RHS). */ + + context->lhs_remaining--; + sys_memmove(&context->u.lhs_elements[found_at], + &context->u.lhs_elements[found_at + 1], + (context->lhs_remaining - found_at) * sizeof(Eterm)); + break; + } + } + + checks += MAX(1, context->lhs_remaining); + context->iterator = next; + } + + BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + return 0; + } else if (!is_nil(context->iterator)) { + return -1; + } + + if (context->lhs_remaining > 0) { + Eterm *hp; + int i; + + hp = HAlloc(p, context->lhs_remaining * 2); + + for (i = context->lhs_remaining - 1; i >= 0; i--) { + Eterm value = context->u.lhs_elements[i]; + + context->result = CONS(hp, value, context->result); + hp += 2; + } + } + + ASSERT(context->lhs_remaining > 0 || context->result == NIL); + + return 1; } +static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_RHS; + + context->iterator = context->lhs_original; + context->result_cdr = &context->result; + context->result = NIL; + + iterator = context->rhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_RHS_THRESHOLD); + + context->u.rhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->rhs_remaining); + + return subtract_continue(p, context); +} + +static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) { + if (EQ(value, context->u.rhs_elements[found_at])) { + break; + } + } + + if (found_at < 0) { + /* Destructively add the value to the result. This is safe + * since the GC is disabled and the unfinished term is never + * leaked to the outside world. */ + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else if (found_at >= 0) { + Eterm swap; + + if (context->rhs_remaining-- == 1) { + /* We've run out of items to remove, so the rest of the + * result will be equal to the remainder of the input. We know + * that LHS is well-formed as any errors would've been reported + * during length determination. */ + *context->result_cdr = next; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + return 1; + } + + swap = context->u.rhs_elements[context->rhs_remaining]; + context->u.rhs_elements[found_at] = swap; + } + + checks += context->rhs_remaining; + context->iterator = next; + context->lhs_remaining--; + } + + /* The result only has to be terminated when returning it to the user, but + * we're doing it when trapping as well to prevent headaches when + * debugging. */ + *context->result_cdr = NIL; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; +} + +static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_BUILD; + + context->u.rhs_set.alloc_start = + erts_alloc(ERTS_ALC_T_LIST_TRAP, + context->rhs_remaining * sizeof(subtract_tree_t)); + + context->u.rhs_set.alloc = context->u.rhs_set.alloc_start; + context->u.rhs_set.tree = NULL; + + context->iterator = context->rhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_build(Process *p, ErtsSubtractContext *context) { + const static Sint INSERTIONS_PER_RED = 16; + Sint budget, insertions; + + budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p); + insertions = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (insertions < budget && is_list(context->iterator)) { + subtract_tree_t *existing_node, *new_node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + new_node = context->u.rhs_set.alloc; + new_node->key = value; + new_node->count = 1; + + existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree, + new_node); + + if (existing_node != NULL) { + existing_node->count++; + } else { + context->u.rhs_set.alloc++; + } + + context->iterator = next; + insertions++; + } + + BUMP_REDS(p, insertions / INSERTIONS_PER_RED); + + ASSERT(is_list(context->iterator) || is_nil(context->iterator)); + ASSERT(context->u.rhs_set.tree != NULL); + + return is_nil(context->iterator); +} + +static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_FINISH; + + context->result_cdr = &context->result; + context->result = NIL; + + context->iterator = context->lhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_finish(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 8; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + subtract_tree_t *node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + ASSERT(context->rhs_remaining > 0); + + node = subtract_rbt_lookup(context->u.rhs_set.tree, value); + + if (node == NULL) { + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else { + if (context->rhs_remaining-- == 1) { + *context->result_cdr = next; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + return 1; + } + + if (node->count-- == 1) { + subtract_rbt_delete(&context->u.rhs_set.tree, node); + } + } + + context->iterator = next; + context->lhs_remaining--; + checks++; + } + + *context->result_cdr = NIL; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; +} + +static int subtract_continue(Process *p, ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_START: { + return subtract_enter_len_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_LHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->lhs_remaining); + + if (res != 1) { + return res; + } + + if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) { + return subtract_enter_naive_lhs(p, context); + } + + return subtract_enter_len_rhs(p, context); + } + + case SUBTRACT_STAGE_NAIVE_LHS: { + return subtract_naive_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_RHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->rhs_remaining); + + if (res != 1) { + return res; + } + + /* We've walked through both lists fully now so we no longer need + * to check for errors past this point. */ + + if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) { + return subtract_enter_naive_rhs(p, context); + } + + return subtract_enter_set_build(p, context); + } + + case SUBTRACT_STAGE_NAIVE_RHS: { + return subtract_naive_rhs(p, context); + } + + case SUBTRACT_STAGE_SET_BUILD: { + int res = subtract_set_build(p, context); + + if (res != 1) { + return res; + } + + return subtract_enter_set_finish(p, context); + } + + case SUBTRACT_STAGE_SET_FINISH: { + return subtract_set_finish(p, context); + } + + default: + ERTS_ASSERT(!"unreachable"); + } +} + +static int subtract_start(Process *p, Eterm lhs, Eterm rhs, + ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_START; + + context->lhs_original = lhs; + context->rhs_original = rhs; + + return subtract_continue(p, context); +} + +/* erlang:'--'/2 */ +static Eterm subtract(Export *bif_entry, BIF_ALIST_2) { + Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2; + + if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) { + /* We start with the context on the stack in the hopes that we won't + * have to trap. */ + ErtsSubtractContext context; + int res; + + res = subtract_start(BIF_P, lhs, rhs, &context); + + if (res == 0) { + Eterm state_mref; + + state_mref = subtract_create_trap_state(BIF_P, &context); + erts_set_gc_state(BIF_P, 0); + + BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL); + } + + subtract_ctx_dtor(&context); + + if (res < 0) { + BIF_ERROR(BIF_P, BADARG); + } + + BIF_RET(context.result); + } else if (is_internal_magic_ref(lhs)) { + ErtsSubtractContext *context; + int (*dtor)(Binary*); + Binary *magic_bin; + + int res; + + magic_bin = erts_magic_ref2bin(lhs); + dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin); + + if (dtor != subtract_ctx_bin_dtor) { + BIF_ERROR(BIF_P, BADARG); + } + + ASSERT(BIF_P->flags & F_DISABLE_GC); + ASSERT(rhs == NIL); + + context = ERTS_MAGIC_BIN_DATA(magic_bin); + res = subtract_continue(BIF_P, context); + + if (res == 0) { + BIF_TRAP2(bif_entry, BIF_P, lhs, NIL); + } + + erts_set_gc_state(BIF_P, 1); + + if (res < 0) { + ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry, + context->lhs_original, + context->rhs_original); + } + + BIF_RET(context->result); + } + + ASSERT(!(BIF_P->flags & F_DISABLE_GC)); + + BIF_ERROR(BIF_P, BADARG); +} + +BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS); +} + +BIF_RETTYPE subtract_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS); +} + + BIF_RETTYPE lists_member_2(BIF_ALIST_2) { Eterm term; Eterm list; Eterm item; int non_immed_key; - int max_iter = 10 * CONTEXT_REDS; + int reds_left = ERTS_BIF_REDS_LEFT(BIF_P); + int max_iter = 16 * reds_left; if (is_nil(BIF_ARG_2)) { BIF_RET(am_false); @@ -267,14 +890,15 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2) } item = CAR(list_val(list)); if ((item == term) || (non_immed_key && eq(item, term))) { - BIF_RET2(am_true, CONTEXT_REDS - max_iter/10); + BIF_RET2(am_true, reds_left - max_iter/16); } list = CDR(list_val(list)); } if (is_not_nil(list)) { + BUMP_REDS(BIF_P, reds_left - max_iter/16); BIF_ERROR(BIF_P, BADARG); } - BIF_RET2(am_false, CONTEXT_REDS - max_iter/10); + BIF_RET2(am_false, reds_left - max_iter/16); } static BIF_RETTYPE lists_reverse_alloc(Process *c_p, @@ -283,7 +907,7 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p, { static const Uint CELLS_PER_RED = 40; - Eterm *heap_top, *heap_end; + Eterm *alloc_top, *alloc_end; Uint cells_left, max_cells; Eterm list, tail; Eterm lookahead; @@ -305,18 +929,18 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p, BIF_ERROR(c_p, BADARG); } - heap_top = HAlloc(c_p, 2 * (max_cells - cells_left)); - heap_end = heap_top + 2 * (max_cells - cells_left); + alloc_top = HAlloc(c_p, 2 * (max_cells - cells_left)); + alloc_end = alloc_top + 2 * (max_cells - cells_left); - while (heap_top < heap_end) { + while (alloc_top < alloc_end) { Eterm *pair = list_val(list); - tail = CONS(heap_top, CAR(pair), tail); + tail = CONS(alloc_top, CAR(pair), tail); list = CDR(pair); ASSERT(is_list(list) || is_nil(list)); - heap_top += 2; + alloc_top += 2; } if (is_nil(list)) { @@ -333,7 +957,7 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p, { static const Uint CELLS_PER_RED = 60; - Eterm *heap_top, *heap_end; + Eterm *alloc_start, *alloc_top, *alloc_end; Uint cells_left, max_cells; Eterm list, tail; @@ -343,21 +967,27 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p, cells_left = max_cells = CELLS_PER_RED * (1 + ERTS_BIF_REDS_LEFT(c_p)); ASSERT(HEAP_LIMIT(c_p) >= HEAP_TOP(c_p) + 2); - heap_end = HEAP_LIMIT(c_p) - 2; - heap_top = HEAP_TOP(c_p); + alloc_start = HEAP_TOP(c_p); + alloc_end = HEAP_LIMIT(c_p) - 2; + alloc_top = alloc_start; + + /* Don't process more cells than we have reductions for. */ + alloc_end = MIN(alloc_top + (cells_left * 2), alloc_end); - while (heap_top < heap_end && is_list(list)) { + while (alloc_top < alloc_end && is_list(list)) { Eterm *pair = list_val(list); - tail = CONS(heap_top, CAR(pair), tail); + tail = CONS(alloc_top, CAR(pair), tail); list = CDR(pair); - heap_top += 2; + alloc_top += 2; } - cells_left -= (heap_top - heap_end) / 2; + cells_left -= (alloc_top - alloc_start) / 2; + HEAP_TOP(c_p) = alloc_top; + + ASSERT(cells_left >= 0 && cells_left <= max_cells); BUMP_REDS(c_p, (max_cells - cells_left) / CELLS_PER_RED); - HEAP_TOP(c_p) = heap_top; if (is_nil(list)) { BIF_RET(tail); diff --git a/erts/emulator/beam/erl_bif_persistent.c b/erts/emulator/beam/erl_bif_persistent.c new file mode 100644 index 0000000000..9dca768a18 --- /dev/null +++ b/erts/emulator/beam/erl_bif_persistent.c @@ -0,0 +1,983 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2018. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/* + * Purpose: Implement persistent term storage. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "error.h" +#include "erl_driver.h" +#include "bif.h" +#include "erl_map.h" +#include "erl_binary.h" + +/* + * The limit for the number of persistent terms before + * a warning is issued. + */ + +#define WARNING_LIMIT 20000 +#define XSTR(s) STR(s) +#define STR(s) #s + +/* + * Parameters for the hash table. + */ +#define INITIAL_SIZE 8 +#define LOAD_FACTOR ((Uint)50) +#define MUST_GROW(t) (((Uint)100) * t->num_entries >= LOAD_FACTOR * t->allocated) +#define MUST_SHRINK(t) (((Uint)200) * t->num_entries <= LOAD_FACTOR * t->allocated && \ + t->allocated > INITIAL_SIZE) + +typedef struct hash_table { + Uint allocated; + Uint num_entries; + Uint mask; + Uint first_to_delete; + Uint num_to_delete; + erts_atomic_t refc; + struct hash_table* delete_next; + ErtsThrPrgrLaterOp thr_prog_op; + Eterm term[1]; +} HashTable; + +typedef struct trap_data { + HashTable* table; + Uint idx; + Uint remaining; + Uint memory; /* Used by info/0 to count used memory */ +} TrapData; + +/* + * Declarations of local functions. + */ + +static HashTable* create_initial_table(void); +static Uint lookup(HashTable* hash_table, Eterm key); +static HashTable* copy_table(HashTable* old_table, Uint new_size, int rehash); +static HashTable* tmp_table_copy(HashTable* old_table); +static int try_seize_update_permission(Process* c_p); +static void release_update_permission(int release_updater); +static void table_updater(void* table); +static void table_deleter(void* hash_table); +static void dec_table_refc(Process* c_p, HashTable* old_table); +static void delete_table(Process* c_p, HashTable* table); +static void mark_for_deletion(HashTable* hash_table, Uint entry_index); +static ErtsLiteralArea* term_to_area(Eterm tuple); +static void suspend_updater(Process* c_p); +static Eterm do_get_all(Process* c_p, TrapData* trap_data, Eterm res); +static Eterm do_info(Process* c_p, TrapData* trap_data); +static void append_to_delete_queue(HashTable* table); +static HashTable* next_to_delete(void); +static Eterm alloc_trap_data(Process* c_p); +static int cleanup_trap_data(Binary *bp); + +/* + * Traps + */ + +static Export persistent_term_get_all_export; +static BIF_RETTYPE persistent_term_get_all_trap(BIF_ALIST_2); +static Export persistent_term_info_export; +static BIF_RETTYPE persistent_term_info_trap(BIF_ALIST_1); + +/* + * Pointer to the current hash table. + */ + +static erts_atomic_t the_hash_table; + +/* + * Queue of processes waiting to update the hash table. + */ + +struct update_queue_item { + Process *p; + struct update_queue_item* next; +}; + +static erts_mtx_t update_table_permission_mtx; +static struct update_queue_item* update_queue = NULL; +static Process* updater_process = NULL; + +/* Protected by update_table_permission_mtx */ +static ErtsThrPrgrLaterOp thr_prog_op; +static int issued_warning = 0; + +/* + * Queue of hash tables to be deleted. + */ + +static erts_mtx_t delete_queue_mtx; +static HashTable* delete_queue_head = NULL; +static HashTable** delete_queue_tail = &delete_queue_head; + +/* + * The following variables are only used during crash dumping. They + * are intialized by erts_init_persistent_dumping(). + */ + +ErtsLiteralArea** erts_persistent_areas; +Uint erts_num_persistent_areas; + +void erts_init_bif_persistent_term(void) +{ + HashTable* hash_table; + + /* + * Initialize the mutex protecting updates. + */ + + erts_mtx_init(&update_table_permission_mtx, + "update_persistent_term_permission", + NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | + ERTS_LOCK_FLAGS_CATEGORY_GENERIC); + + /* + * Initialize delete queue. + */ + + erts_mtx_init(&delete_queue_mtx, + "persistent_term_delete_permission", + NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | + ERTS_LOCK_FLAGS_CATEGORY_GENERIC); + + /* + * Allocate a small initial hash table. + */ + + hash_table = create_initial_table(); + erts_atomic_init_nob(&the_hash_table, (erts_aint_t)hash_table); + + /* + * Initialize export entry for traps + */ + + erts_init_trap_export(&persistent_term_get_all_export, + am_persistent_term, am_get_all_trap, 2, + &persistent_term_get_all_trap); + erts_init_trap_export(&persistent_term_info_export, + am_persistent_term, am_info_trap, 1, + &persistent_term_info_trap); +} + +BIF_RETTYPE persistent_term_put_2(BIF_ALIST_2) +{ + Eterm key; + Eterm term; + Eterm heap[3]; + Eterm tuple; + HashTable* hash_table; + Uint term_size; + Uint lit_area_size; + ErlOffHeap code_off_heap; + ErtsLiteralArea* literal_area; + erts_shcopy_t info; + Eterm* ptr; + Uint entry_index; + + if (!try_seize_update_permission(BIF_P)) { + ERTS_BIF_YIELD2(bif_export[BIF_persistent_term_put_2], + BIF_P, BIF_ARG_1, BIF_ARG_2); + } + + hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + + key = BIF_ARG_1; + term = BIF_ARG_2; + + entry_index = lookup(hash_table, key); + + heap[0] = make_arityval(2); + heap[1] = key; + heap[2] = term; + tuple = make_tuple(heap); + + if (is_nil(hash_table->term[entry_index])) { + Uint size = hash_table->allocated; + if (MUST_GROW(hash_table)) { + size *= 2; + } + hash_table = copy_table(hash_table, size, 0); + entry_index = lookup(hash_table, key); + hash_table->num_entries++; + } else { + Eterm tuple = hash_table->term[entry_index]; + Eterm old_term; + + ASSERT(is_tuple_arity(tuple, 2)); + old_term = boxed_val(tuple)[2]; + if (EQ(term, old_term)) { + /* Same value. No need to update anything. */ + release_update_permission(0); + BIF_RET(am_ok); + } else { + /* Mark the old term for deletion. */ + mark_for_deletion(hash_table, entry_index); + hash_table = copy_table(hash_table, hash_table->allocated, 0); + } + } + + /* + * Preserve internal sharing in the term by using the + * sharing-preserving functions. However, literals must + * be copied in case the module holding them are unloaded. + */ + INITIALIZE_SHCOPY(info); + info.copy_literals = 1; + term_size = copy_shared_calculate(tuple, &info); + ERTS_INIT_OFF_HEAP(&code_off_heap); + lit_area_size = ERTS_LITERAL_AREA_ALLOC_SIZE(term_size); + literal_area = erts_alloc(ERTS_ALC_T_LITERAL, lit_area_size); + ptr = &literal_area->start[0]; + literal_area->end = ptr + term_size; + tuple = copy_shared_perform(tuple, term_size, &info, &ptr, &code_off_heap); + ASSERT(tuple_val(tuple) == literal_area->start); + literal_area->off_heap = code_off_heap.first; + DESTROY_SHCOPY(info); + erts_set_literal_tag(&tuple, literal_area->start, term_size); + hash_table->term[entry_index] = tuple; + + erts_schedule_thr_prgr_later_op(table_updater, hash_table, &thr_prog_op); + suspend_updater(BIF_P); + + /* + * Issue a warning once if the warning limit has been exceeded. + */ + + if (hash_table->num_entries > WARNING_LIMIT && issued_warning == 0) { + static char w[] = + "More than " XSTR(WARNING_LIMIT) " persistent terms " + "have been created.\n" + "It is recommended to avoid creating an excessive number of\n" + "persistent terms, as creation and deletion of persistent terms\n" + "will be slower as the number of persistent terms increases.\n"; + issued_warning = 1; + erts_send_warning_to_logger_str(BIF_P->group_leader, w); + } + + ERTS_BIF_YIELD_RETURN(BIF_P, am_ok); +} + +BIF_RETTYPE persistent_term_get_0(BIF_ALIST_0) +{ + HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + TrapData* trap_data; + Eterm res = NIL; + Eterm magic_ref; + Binary* mbp; + + magic_ref = alloc_trap_data(BIF_P); + mbp = erts_magic_ref2bin(magic_ref); + trap_data = ERTS_MAGIC_BIN_DATA(mbp); + trap_data->table = hash_table; + trap_data->idx = 0; + trap_data->remaining = hash_table->num_entries; + res = do_get_all(BIF_P, trap_data, res); + if (trap_data->remaining == 0) { + BUMP_REDS(BIF_P, hash_table->num_entries); + trap_data->table = NULL; /* Prevent refc decrement */ + BIF_RET(res); + } else { + /* + * Increment the ref counter to prevent an update operation (by put/2 + * or erase/1) to delete this hash table. + */ + erts_atomic_inc_nob(&hash_table->refc); + BUMP_ALL_REDS(BIF_P); + BIF_TRAP2(&persistent_term_get_all_export, BIF_P, magic_ref, res); + } +} + +BIF_RETTYPE persistent_term_get_1(BIF_ALIST_1) +{ + Eterm key = BIF_ARG_1; + HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + Uint entry_index; + Eterm term; + + entry_index = lookup(hash_table, key); + term = hash_table->term[entry_index]; + if (is_boxed(term)) { + ASSERT(is_tuple_arity(term, 2)); + BIF_RET(tuple_val(term)[2]); + } + BIF_ERROR(BIF_P, BADARG); +} + +BIF_RETTYPE persistent_term_erase_1(BIF_ALIST_1) +{ + Eterm key = BIF_ARG_1; + HashTable* old_table; + HashTable* new_table; + Uint entry_index; + Eterm old_term; + + if (!try_seize_update_permission(BIF_P)) { + ERTS_BIF_YIELD1(bif_export[BIF_persistent_term_erase_1], + BIF_P, BIF_ARG_1); + } + + old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + entry_index = lookup(old_table, key); + old_term = old_table->term[entry_index]; + if (is_boxed(old_term)) { + Uint new_size; + HashTable* tmp_table; + + /* + * Since we don't use any delete markers, we must rehash + * the table when deleting terms to ensure that all terms + * can still be reached if there are hash collisions. + * We can't rehash in place and it would not be safe to modify + * the old table yet, so we will first need a new + * temporary table copy of the same size as the old one. + */ + + ASSERT(is_tuple_arity(old_term, 2)); + tmp_table = tmp_table_copy(old_table); + + /* + * Delete the term from the temporary table. Then copy the + * temporary table to a new table, rehashing the entries + * while copying. + */ + + tmp_table->term[entry_index] = NIL; + tmp_table->num_entries--; + new_size = tmp_table->allocated; + if (MUST_SHRINK(tmp_table)) { + new_size /= 2; + } + new_table = copy_table(tmp_table, new_size, 1); + erts_free(ERTS_ALC_T_TMP, tmp_table); + + mark_for_deletion(old_table, entry_index); + erts_schedule_thr_prgr_later_op(table_updater, new_table, &thr_prog_op); + suspend_updater(BIF_P); + ERTS_BIF_YIELD_RETURN(BIF_P, am_true); + } + + /* + * Key is not present. Nothing to do. + */ + + ASSERT(is_nil(old_term)); + release_update_permission(0); + BIF_RET(am_false); +} + +BIF_RETTYPE erts_internal_erase_persistent_terms_0(BIF_ALIST_0) +{ + HashTable* old_table; + HashTable* new_table; + + if (!try_seize_update_permission(BIF_P)) { + ERTS_BIF_YIELD0(bif_export[BIF_erts_internal_erase_persistent_terms_0], + BIF_P); + } + old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + old_table->first_to_delete = 0; + old_table->num_to_delete = old_table->allocated; + new_table = create_initial_table(); + erts_schedule_thr_prgr_later_op(table_updater, new_table, &thr_prog_op); + suspend_updater(BIF_P); + ERTS_BIF_YIELD_RETURN(BIF_P, am_true); +} + +BIF_RETTYPE persistent_term_info_0(BIF_ALIST_0) +{ + HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + TrapData* trap_data; + Eterm res = NIL; + Eterm magic_ref; + Binary* mbp; + + magic_ref = alloc_trap_data(BIF_P); + mbp = erts_magic_ref2bin(magic_ref); + trap_data = ERTS_MAGIC_BIN_DATA(mbp); + trap_data->table = hash_table; + trap_data->idx = 0; + trap_data->remaining = hash_table->num_entries; + trap_data->memory = 0; + res = do_info(BIF_P, trap_data); + if (trap_data->remaining == 0) { + BUMP_REDS(BIF_P, hash_table->num_entries); + trap_data->table = NULL; /* Prevent refc decrement */ + BIF_RET(res); + } else { + /* + * Increment the ref counter to prevent an update operation (by put/2 + * or erase/1) to delete this hash table. + */ + erts_atomic_inc_nob(&hash_table->refc); + BUMP_ALL_REDS(BIF_P); + BIF_TRAP2(&persistent_term_info_export, BIF_P, magic_ref, res); + } +} + +Uint +erts_persistent_term_count(void) +{ + HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + return hash_table->num_entries; +} + +void +erts_init_persistent_dumping(void) +{ + HashTable* hash_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + ErtsLiteralArea** area_p; + Uint i; + + /* + * Overwrite the array of Eterms in the current hash table + * with pointers to literal areas. + */ + + erts_persistent_areas = (ErtsLiteralArea **) hash_table->term; + erts_num_persistent_areas = hash_table->num_entries; + area_p = erts_persistent_areas; + for (i = 0; i < hash_table->allocated; i++) { + Eterm term = hash_table->term[i]; + + if (is_boxed(term)) { + *area_p++ = term_to_area(term); + } + } +} + +/* + * Local functions. + */ + +static HashTable* +create_initial_table(void) +{ + HashTable* hash_table; + int i; + + hash_table = (HashTable *) erts_alloc(ERTS_ALC_T_PERSISTENT_TERM, + sizeof(HashTable)+sizeof(Eterm) * + (INITIAL_SIZE-1)); + hash_table->allocated = INITIAL_SIZE; + hash_table->num_entries = 0; + hash_table->mask = INITIAL_SIZE-1; + hash_table->first_to_delete = 0; + hash_table->num_to_delete = 0; + erts_atomic_init_nob(&hash_table->refc, (erts_aint_t)1); + for (i = 0; i < INITIAL_SIZE; i++) { + hash_table->term[i] = NIL; + } + return hash_table; +} + +static BIF_RETTYPE +persistent_term_get_all_trap(BIF_ALIST_2) +{ + TrapData* trap_data; + Eterm res = BIF_ARG_2; + Uint bump_reds; + Binary* mbp; + + ASSERT(is_list(BIF_ARG_2)); + mbp = erts_magic_ref2bin(BIF_ARG_1); + trap_data = ERTS_MAGIC_BIN_DATA(mbp); + bump_reds = trap_data->remaining; + res = do_get_all(BIF_P, trap_data, res); + ASSERT(is_list(res)); + if (trap_data->remaining > 0) { + BUMP_ALL_REDS(BIF_P); + BIF_TRAP2(&persistent_term_get_all_export, BIF_P, BIF_ARG_1, res); + } else { + /* + * Decrement ref count (and possibly delete the hash table + * and associated literal area). + */ + dec_table_refc(BIF_P, trap_data->table); + trap_data->table = NULL; /* Prevent refc decrement */ + BUMP_REDS(BIF_P, bump_reds); + BIF_RET(res); + } +} + +static Eterm +do_get_all(Process* c_p, TrapData* trap_data, Eterm res) +{ + HashTable* hash_table; + Uint remaining; + Uint idx; + Uint max_iter; + Uint i; + Eterm* hp; + Uint heap_size; + struct copy_term { + Uint key_size; + Eterm* tuple_ptr; + } *copy_data; + + hash_table = trap_data->table; + idx = trap_data->idx; +#if defined(DEBUG) || defined(VALGRIND) + max_iter = 50; +#else + max_iter = ERTS_BIF_REDS_LEFT(c_p); +#endif + remaining = trap_data->remaining < max_iter ? + trap_data->remaining : max_iter; + trap_data->remaining -= remaining; + + copy_data = (struct copy_term *) erts_alloc(ERTS_ALC_T_TMP, + remaining * + sizeof(struct copy_term)); + i = 0; + heap_size = (2 + 3) * remaining; + while (remaining != 0) { + Eterm term = hash_table->term[idx]; + if (is_tuple(term)) { + Uint key_size; + Eterm* tup_val; + + ASSERT(is_tuple_arity(term, 2)); + tup_val = tuple_val(term); + key_size = size_object(tup_val[1]); + copy_data[i].key_size = key_size; + copy_data[i].tuple_ptr = tup_val; + heap_size += key_size; + i++; + remaining--; + } + idx++; + } + trap_data->idx = idx; + + hp = HAlloc(c_p, heap_size); + remaining = i; + for (i = 0; i < remaining; i++) { + Eterm* tuple_ptr; + Uint key_size; + Eterm key; + Eterm tup; + + tuple_ptr = copy_data[i].tuple_ptr; + key_size = copy_data[i].key_size; + key = copy_struct(tuple_ptr[1], key_size, &hp, &c_p->off_heap); + tup = TUPLE2(hp, key, tuple_ptr[2]); + hp += 3; + res = CONS(hp, tup, res); + hp += 2; + } + erts_free(ERTS_ALC_T_TMP, copy_data); + return res; +} + +static BIF_RETTYPE +persistent_term_info_trap(BIF_ALIST_1) +{ + TrapData* trap_data = (TrapData *) BIF_ARG_1; + Eterm res; + Uint bump_reds; + Binary* mbp; + + mbp = erts_magic_ref2bin(BIF_ARG_1); + trap_data = ERTS_MAGIC_BIN_DATA(mbp); + bump_reds = trap_data->remaining; + res = do_info(BIF_P, trap_data); + if (trap_data->remaining > 0) { + ASSERT(res == am_ok); + BUMP_ALL_REDS(BIF_P); + BIF_TRAP1(&persistent_term_info_export, BIF_P, BIF_ARG_1); + } else { + /* + * Decrement ref count (and possibly delete the hash table + * and associated literal area). + */ + dec_table_refc(BIF_P, trap_data->table); + trap_data->table = NULL; /* Prevent refc decrement */ + BUMP_REDS(BIF_P, bump_reds); + ASSERT(is_map(res)); + BIF_RET(res); + } +} + +#define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1) + +static Eterm +do_info(Process* c_p, TrapData* trap_data) +{ + HashTable* hash_table; + Uint remaining; + Uint idx; + Uint max_iter; + + hash_table = trap_data->table; + idx = trap_data->idx; +#if defined(DEBUG) || defined(VALGRIND) + max_iter = 50; +#else + max_iter = ERTS_BIF_REDS_LEFT(c_p); +#endif + remaining = trap_data->remaining < max_iter ? trap_data->remaining : max_iter; + trap_data->remaining -= remaining; + while (remaining != 0) { + if (is_boxed(hash_table->term[idx])) { + ErtsLiteralArea* area; + area = term_to_area(hash_table->term[idx]); + trap_data->memory += sizeof(ErtsLiteralArea) + + sizeof(Eterm) * (area->end - area->start - 1); + remaining--; + } + idx++; + } + trap_data->idx = idx; + if (trap_data->remaining > 0) { + return am_ok; /* Dummy return value */ + } else { + Eterm* hp; + Eterm count_term; + Eterm memory_term; + Eterm res; + Uint memory; + Uint hsz = MAP_SZ(2); + + memory = sizeof(HashTable) + (trap_data->table->allocated-1) * + sizeof(Eterm) + trap_data->memory; + (void) erts_bld_uint(NULL, &hsz, hash_table->num_entries); + (void) erts_bld_uint(NULL, &hsz, memory); + hp = HAlloc(c_p, hsz); + count_term = erts_bld_uint(&hp, NULL, hash_table->num_entries); + memory_term = erts_bld_uint(&hp, NULL, memory); + res = MAP2(hp, am_count, count_term, am_memory, memory_term); + return res; + } +} + +#undef DECL_AM + +static Eterm +alloc_trap_data(Process* c_p) +{ + Binary* mbp = erts_create_magic_binary(sizeof(TrapData), + cleanup_trap_data); + Eterm* hp; + + hp = HAlloc(c_p, ERTS_MAGIC_REF_THING_SIZE); + return erts_mk_magic_ref(&hp, &MSO(c_p), mbp); +} + +static int +cleanup_trap_data(Binary *bp) +{ + TrapData* trap_data = ERTS_MAGIC_BIN_DATA(bp); + + if (trap_data->table) { + /* + * The process has been killed and is now exiting. + * Decrement the reference counter for the table. + */ + dec_table_refc(NULL, trap_data->table); + } + return 1; +} + +static Uint +lookup(HashTable* hash_table, Eterm key) +{ + Uint mask = hash_table->mask; + Eterm* table = hash_table->term; + Uint32 idx = make_internal_hash(key, 0); + Eterm term; + + do { + idx++; + term = table[idx & mask]; + } while (is_boxed(term) && !EQ(key, (tuple_val(term))[1])); + return idx & mask; +} + +static HashTable* +tmp_table_copy(HashTable* old_table) +{ + Uint size = old_table->allocated; + HashTable* tmp_table; + Uint i; + + tmp_table = (HashTable *) erts_alloc(ERTS_ALC_T_TMP, + sizeof(HashTable) + + sizeof(Eterm) * (size-1)); + *tmp_table = *old_table; + for (i = 0; i < size; i++) { + tmp_table->term[i] = old_table->term[i]; + } + return tmp_table; +} + +static HashTable* +copy_table(HashTable* old_table, Uint new_size, int rehash) +{ + HashTable* new_table; + Uint old_size = old_table->allocated; + Uint i; + + new_table = (HashTable *) erts_alloc(ERTS_ALC_T_PERSISTENT_TERM, + sizeof(HashTable) + + sizeof(Eterm) * (new_size-1)); + if (old_table->allocated == new_size && !rehash) { + /* + * Same size and no key deleted. Make an exact copy of the table. + */ + *new_table = *old_table; + for (i = 0; i < new_size; i++) { + new_table->term[i] = old_table->term[i]; + } + } else { + /* + * The size of the table has changed or an element has been + * deleted. Must rehash, by inserting all old terms into the + * new (empty) table. + */ + new_table->allocated = new_size; + new_table->num_entries = old_table->num_entries; + new_table->mask = new_size - 1; + for (i = 0; i < new_size; i++) { + new_table->term[i] = NIL; + } + for (i = 0; i < old_size; i++) { + if (is_tuple(old_table->term[i])) { + Eterm key = tuple_val(old_table->term[i])[1]; + Uint entry_index = lookup(new_table, key); + ASSERT(is_nil(new_table->term[entry_index])); + new_table->term[entry_index] = old_table->term[i]; + } + } + } + new_table->first_to_delete = 0; + new_table->num_to_delete = 0; + erts_atomic_init_nob(&new_table->refc, (erts_aint_t)1); + return new_table; +} + +static void +mark_for_deletion(HashTable* hash_table, Uint entry_index) +{ + hash_table->first_to_delete = entry_index; + hash_table->num_to_delete = 1; +} + +static ErtsLiteralArea* +term_to_area(Eterm tuple) +{ + ASSERT(is_tuple_arity(tuple, 2)); + return (ErtsLiteralArea *) (((char *) tuple_val(tuple)) - + offsetof(ErtsLiteralArea, start)); +} + +static void +table_updater(void* data) +{ + HashTable* old_table; + HashTable* new_table; + + old_table = (HashTable *) erts_atomic_read_nob(&the_hash_table); + new_table = (HashTable *) data; + ASSERT(new_table->num_to_delete == 0); + erts_atomic_set_nob(&the_hash_table, (erts_aint_t)new_table); + append_to_delete_queue(old_table); + erts_schedule_thr_prgr_later_op(table_deleter, + old_table, + &old_table->thr_prog_op); + release_update_permission(1); +} + +static void +table_deleter(void* data) +{ + HashTable* old_table = (HashTable *) data; + + dec_table_refc(NULL, old_table); +} + +static void +dec_table_refc(Process* c_p, HashTable* old_table) +{ + erts_aint_t refc = erts_atomic_dec_read_nob(&old_table->refc); + + if (refc == 0) { + HashTable* to_delete; + + while ((to_delete = next_to_delete()) != NULL) { + delete_table(c_p, to_delete); + } + } +} + +static void +delete_table(Process* c_p, HashTable* table) +{ + Uint idx = table->first_to_delete; + Uint n = table->num_to_delete; + + /* + * There are no longer any references to this hash table. + * + * Any literals pointed for deletion can be queued for + * deletion and the table itself can be deallocated. + */ + +#ifdef DEBUG + if (n == 1) { + ASSERT(is_tuple_arity(table->term[idx], 2)); + } +#endif + + while (n > 0) { + Eterm term = table->term[idx]; + + if (is_tuple_arity(term, 2)) { + if (is_immed(tuple_val(term)[2])) { + erts_release_literal_area(term_to_area(term)); + } else { + erts_queue_release_literals(c_p, term_to_area(term)); + } + } + idx++, n--; + } + erts_free(ERTS_ALC_T_PERSISTENT_TERM, table); +} + +/* + * Caller *must* yield if this function returns 0. + */ + +static int +try_seize_update_permission(Process* c_p) +{ + int success; + + ASSERT(!erts_thr_progress_is_blocking()); /* to avoid deadlock */ + ASSERT(c_p != NULL); + + erts_mtx_lock(&update_table_permission_mtx); + ASSERT(updater_process != c_p); + success = (updater_process == NULL); + if (success) { + updater_process = c_p; + } else { + struct update_queue_item* qitem; + qitem = erts_alloc(ERTS_ALC_T_PERSISTENT_LOCK_Q, sizeof(*qitem)); + qitem->p = c_p; + erts_proc_inc_refc(c_p); + qitem->next = update_queue; + update_queue = qitem; + erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL); + } + erts_mtx_unlock(&update_table_permission_mtx); + return success; +} + +static void +release_update_permission(int release_updater) +{ + erts_mtx_lock(&update_table_permission_mtx); + ASSERT(updater_process != NULL); + + if (release_updater) { + erts_proc_lock(updater_process, ERTS_PROC_LOCK_STATUS); + if (!ERTS_PROC_IS_EXITING(updater_process)) { + erts_resume(updater_process, ERTS_PROC_LOCK_STATUS); + } + erts_proc_unlock(updater_process, ERTS_PROC_LOCK_STATUS); + } + updater_process = NULL; + + while (update_queue != NULL) { /* Unleash the entire herd */ + struct update_queue_item* qitem = update_queue; + erts_proc_lock(qitem->p, ERTS_PROC_LOCK_STATUS); + if (!ERTS_PROC_IS_EXITING(qitem->p)) { + erts_resume(qitem->p, ERTS_PROC_LOCK_STATUS); + } + erts_proc_unlock(qitem->p, ERTS_PROC_LOCK_STATUS); + update_queue = qitem->next; + erts_proc_dec_refc(qitem->p); + erts_free(ERTS_ALC_T_PERSISTENT_LOCK_Q, qitem); + } + erts_mtx_unlock(&update_table_permission_mtx); +} + +static void +suspend_updater(Process* c_p) +{ +#ifdef DEBUG + ASSERT(c_p != NULL); + erts_mtx_lock(&update_table_permission_mtx); + ASSERT(updater_process == c_p); + erts_mtx_unlock(&update_table_permission_mtx); +#endif + erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL); +} + +static void +append_to_delete_queue(HashTable* table) +{ + erts_mtx_lock(&delete_queue_mtx); + table->delete_next = NULL; + *delete_queue_tail = table; + delete_queue_tail = &table->delete_next; + erts_mtx_unlock(&delete_queue_mtx); +} + +static HashTable* +next_to_delete(void) +{ + HashTable* table; + + erts_mtx_lock(&delete_queue_mtx); + table = delete_queue_head; + if (table) { + if (erts_atomic_read_nob(&table->refc)) { + /* + * This hash table is still referenced. Hash tables + * must be deleted in order, so we return a NULL + * pointer. + */ + table = NULL; + } else { + /* + * Remove the first hash table from the queue. + */ + delete_queue_head = table->delete_next; + if (delete_queue_head == NULL) { + delete_queue_tail = &delete_queue_head; + } + } + } + erts_mtx_unlock(&delete_queue_mtx); + return table; +} diff --git a/erts/emulator/beam/erl_bif_unique.h b/erts/emulator/beam/erl_bif_unique.h index 40b70667c0..944788c67c 100644 --- a/erts/emulator/beam/erl_bif_unique.h +++ b/erts/emulator/beam/erl_bif_unique.h @@ -242,11 +242,11 @@ erts_internal_ref_number_cmp(Uint32 num1[ERTS_REF_NUMBERS], Uint32 num2[ERTS_REF_NUMBERS]) { if (num1[2] != num2[2]) - return (int) ((Sint64) num1[2] - (Sint64) num2[2]); + return num1[2] > num2[2] ? 1 : -1; if (num1[1] != num2[1]) - return (int) ((Sint64) num1[1] - (Sint64) num2[1]); + return num1[1] > num2[1] ? 1 : -1; if (num1[0] != num2[0]) - return (int) ((Sint64) num1[0] - (Sint64) num2[0]); + return num1[0] > num2[0] ? 1 : -1; return 0; } diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c index 788718ab09..45e4be2426 100644 --- a/erts/emulator/beam/erl_db_tree.c +++ b/erts/emulator/beam/erl_db_tree.c @@ -1860,22 +1860,14 @@ static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid, sc.mp = mpi.mp; - stack = get_static_stack(tb); if (!mpi.got_partial && mpi.some_limitation && CMP_EQ(mpi.least,mpi.most)) { - TreeDbTerm* term = *(mpi.save_term); doit_select_replace(tb,mpi.save_term,&sc,0 /* dummy */); - if (stack != NULL) { - if (TOP_NODE(stack) == term) - // throw away potentially invalid reference - REPLACE_TOP_NODE(stack, *(mpi.save_term)); - release_stack(tb, stack); - } + reset_static_stack(tb); /* may refer replaced term */ RET_TO_BIF(erts_make_integer(sc.replaced,p),DB_ERROR_NONE); } - if (stack == NULL) - stack = get_any_stack(tb); + stack = get_any_stack(tb); if (mpi.some_limitation) { if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) { diff --git a/erts/emulator/beam/erl_dirty_bif.tab b/erts/emulator/beam/erl_dirty_bif.tab index 086275fbe5..20299ff604 100644 --- a/erts/emulator/beam/erl_dirty_bif.tab +++ b/erts/emulator/beam/erl_dirty_bif.tab @@ -59,8 +59,6 @@ dirty-cpu erts_debug:lcnt_clear/0 dirty-cpu-test erlang:'++'/2 dirty-cpu-test erlang:append/2 -dirty-cpu-test erlang:'--'/2 -dirty-cpu-test erlang:subtract/2 dirty-cpu-test erlang:iolist_size/1 dirty-cpu-test erlang:make_tuple/2 dirty-cpu-test erlang:make_tuple/3 diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 4a80c00416..3a50b294d1 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -681,7 +681,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end, ErtsMonotonicTime start_time; ErtsSchedulerData *esdp = erts_proc_sched_data(p); erts_aint32_t state; - ERTS_MSACC_PUSH_STATE_M(); + ERTS_MSACC_PUSH_STATE(); #ifdef USE_VM_PROBES DTRACE_CHARBUF(pidbuf, DTRACE_TERM_BUF_SIZE); #endif @@ -711,7 +711,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end, else if (p->live_hf_end != ERTS_INVALID_HFRAG_PTR) live_hf_end = p->live_hf_end; - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_GC); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_GC); erts_atomic32_read_bor_nob(&p->state, ERTS_PSFLG_GC); if (erts_system_monitor_long_gc != 0) @@ -759,7 +759,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end, gc_trace_end_tag = am_gc_minor_end; } else { do_major_collection: - ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC_FULL); + ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC_FULL); if (IS_TRACED_FL(p, F_TRACE_GC)) { trace_gc(p, am_gc_major_start, need, THE_NON_VALUE); } @@ -770,7 +770,7 @@ do_major_collection: p->flags &= ~(F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC); DTRACE2(gc_major_end, pidbuf, reclaimed_now); gc_trace_end_tag = am_gc_major_end; - ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC); + ERTS_MSACC_SET_STATE_CACHED_X(ERTS_MSACC_STATE_GC); } reset_active_writer(p); @@ -800,7 +800,7 @@ do_major_collection: /* We have to make sure that we have space for need on the heap */ res = delay_garbage_collection(p, live_hf_end, need, fcalls); - ERTS_MSACC_POP_STATE_M(); + ERTS_MSACC_POP_STATE(); return res; } @@ -843,7 +843,7 @@ do_major_collection: FLAGS(p) &= ~(F_FORCE_GC|F_HIBERNATED); p->live_hf_end = ERTS_INVALID_HFRAG_PTR; - ERTS_MSACC_POP_STATE_M(); + ERTS_MSACC_POP_STATE(); #ifdef CHECK_FOR_HOLES /* @@ -1133,9 +1133,28 @@ erts_garbage_collect_literals(Process* p, Eterm* literals, reds = (Sint64) garbage_collect(p, ERTS_INVALID_HFRAG_PTR, 0, p->arg_reg, p->arity, fcalls, ygen_usage); + if (ERTS_PROC_IS_EXITING(p)) { + return 0; + } ASSERT(!(p->flags & (F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC))); + if (MAX_HEAP_SIZE_GET(p)) { + Uint new_heap_size; + Uint old_heap_size; + Uint total_heap_size; + + new_heap_size = HEAP_END(p) - HEAP_START(p); + old_heap_size = erts_next_heap_size(lit_size, 0); + total_heap_size = new_heap_size + old_heap_size; + if (MAX_HEAP_SIZE_GET(p) < total_heap_size && + reached_max_heap_size(p, total_heap_size, + new_heap_size, old_heap_size)) { + erts_set_self_exiting(p, am_killed); + return 0; + } + } + /* * Set GC state. */ @@ -2440,7 +2459,7 @@ erts_copy_one_frag(Eterm** hpp, ErlOffHeap* off_heap, *hpp = hp; for (i = 0; i < nrefs; i++) { - if (is_not_immed(refs[i])) + if (is_not_immed(refs[i]) && !erts_is_literal(refs[i],ptr_val(refs[i]))) refs[i] = offset_ptr(refs[i], offs); } bp->off_heap.first = NULL; diff --git a/erts/emulator/beam/erl_hl_timer.c b/erts/emulator/beam/erl_hl_timer.c index 6ec6f8065e..ef7a55fa38 100644 --- a/erts/emulator/beam/erl_hl_timer.c +++ b/erts/emulator/beam/erl_hl_timer.c @@ -3041,15 +3041,23 @@ erts_set_port_timer(Port *c_prt, Sint64 tmo) check_canceled_queue(esdp, esdp->timer_service); - timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo); - - create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC - ? create_tw_timer - : create_hl_timer); - tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT, - (void *) c_prt, c_prt->common.id, - THE_NON_VALUE, NULL, NULL, NULL); - erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr); + if (tmo == 0) { + erts_atomic_set_relb(&c_prt->common.timer, ERTS_PTMR_TIMEDOUT); + erts_port_task_schedule(c_prt->common.id, + &c_prt->timeout_task, + ERTS_PORT_TASK_TIMEOUT); + } else { + + timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo); + + create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC + ? create_tw_timer + : create_hl_timer); + tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT, + (void *) c_prt, c_prt->common.id, + THE_NON_VALUE, NULL, NULL, NULL); + erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr); + } } void diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 57c6c10c7f..99e788c718 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -354,6 +354,7 @@ erl_init(int ncpu, erts_init_bif(); erts_init_bif_chksum(); erts_init_bif_binary(); + erts_init_bif_persistent_term(); erts_init_bif_re(); erts_init_unicode(); /* after RE to get access to PCRE unicode */ erts_init_external(); @@ -2358,8 +2359,8 @@ system_cleanup(int flush_async) * The exiting thread might be waiting for * us to block; need to update status... */ - erts_thr_progress_active(NULL, 0); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_active(erts_thr_prgr_data(NULL), 0); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL)); } /* Wait forever... */ while (1) diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index 463ae898a3..1416c5f96c 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -97,6 +97,8 @@ static erts_lc_lock_order_t erts_lock_order[] = { { "proc_btm", "pid" }, { "dist_entry", "address" }, { "dist_entry_links", "address" }, + { "update_persistent_term_permission", NULL }, + { "persistent_term_delete_permission", NULL }, { "code_write_permission", NULL }, { "purge_state", NULL }, { "proc_status", "pid" }, diff --git a/erts/emulator/beam/erl_monitor_link.h b/erts/emulator/beam/erl_monitor_link.h index 9ff8aa509a..ed7bf7d54a 100644 --- a/erts/emulator/beam/erl_monitor_link.h +++ b/erts/emulator/beam/erl_monitor_link.h @@ -1387,11 +1387,14 @@ ERTS_GLB_INLINE void erts_monitor_release(ErtsMonitor *mon) { ErtsMonitorData *mdp = erts_monitor_to_data(mon); - ERTS_ML_ASSERT(!(mon->flags & ERTS_ML_FLG_IN_TABLE)); ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) > 0); - if (erts_atomic32_dec_read_nob(&mdp->refc) == 0) + if (erts_atomic32_dec_read_nob(&mdp->refc) == 0) { + ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE)); + ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE)); + erts_monitor_destroy__(mdp); + } } ERTS_GLB_INLINE void @@ -1399,12 +1402,14 @@ erts_monitor_release_both(ErtsMonitorData *mdp) { ERTS_ML_ASSERT((mdp->origin.flags & ERTS_ML_FLGS_SAME) == (mdp->target.flags & ERTS_ML_FLGS_SAME)); - ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE)); - ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE)); ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) >= 2); - if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0) + if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0) { + ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE)); + ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE)); + erts_monitor_destroy__(mdp); + } } ERTS_GLB_INLINE int diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index f4dc60941a..18ed782ae3 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -421,8 +421,25 @@ static void schedule_delete_dist_entry(DistEntry* dep) * * Note that timeouts do not guarantee thread progress. */ - erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry, - dep, &dep->later_op); + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + if (esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { + erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry, + dep, &dep->later_op); + } else { + /* + * Since OTP 20, it's possible that destructor is executed on + * a dirty scheduler. Aux work cannot be done on a dirty + * scheduler, and scheduling any aux work on a dirty scheduler + * makes the scheduler to loop infinitely. + * To avoid this, make a spot jump: schedule this function again + * on a first normal scheduler. It is guaranteed to be always + * online. Since it's a rare event, this shall not pose a big + * utilisation hit. + */ + erts_schedule_misc_aux_work(1, + (void (*)(void *))schedule_delete_dist_entry, + (void *) dep); + } } static void diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h index 2be0a5bf74..25976d38cc 100644 --- a/erts/emulator/beam/erl_port.h +++ b/erts/emulator/beam/erl_port.h @@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p); #define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11)) /* Last port to terminate halts the emulator */ #define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12)) +/* Check if the event in ready_input should be cleaned */ +#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13)) #ifdef DEBUG /* Only debug: make sure all flags aren't cleared unintentionally */ #define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31)) diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c index 4928d80f27..c8f2e88127 100644 --- a/erts/emulator/beam/erl_port_task.c +++ b/erts/emulator/beam/erl_port_task.c @@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q typedef union { struct { /* I/O tasks */ ErlDrvEvent event; +#if ERTS_POLL_USE_SCHEDULER_POLLING + int is_scheduler_event; +#endif } io; struct { ErtsProc2PortSigCallback callback; @@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ { ErtsPortTaskBusyCaller pre_alloc_busy_caller; }; +#if ERTS_POLL_USE_SCHEDULER_POLLING +erts_atomic_t erts_port_task_outstanding_io_tasks; +#endif static void begin_port_cleanup(Port *pp, ErtsPortTask **execq, @@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp) } static ERTS_INLINE void -reset_executed_io_task_handle(ErtsPortTask *ptp) +reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp) { if (ptp->u.alive.handle) { ASSERT(ptp == handle2task(ptp->u.alive.handle)); - /* The port task handle is reset inside task_executed */ - erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, - reset_port_task_handle); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) { + if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) { + erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, + reset_port_task_handle); + erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP); + } else { + reset_port_task_handle(ptp->u.alive.handle); + } + } else +#endif + { + /* The port task handle is reset inside task_executed */ + erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, + reset_port_task_handle); + } } } @@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp) res = - 1; /* Task already aborted, executing, or executed */ else { reset_port_task_handle(pthp); + +#if ERTS_POLL_USE_SCHEDULER_POLLING + switch (ptp->type) { + case ERTS_PORT_TASK_INPUT: + case ERTS_PORT_TASK_OUTPUT: + if (ptp->u.alive.td.io.is_scheduler_event) { + ASSERT(erts_atomic_read_nob( + &erts_port_task_outstanding_io_tasks) > 0); + erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks); + } + break; + default: + break; + } +#endif + res = 0; } } @@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id, va_list argp; va_start(argp, type); ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent); +#if ERTS_POLL_USE_SCHEDULER_POLLING + ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int); +#endif va_end(argp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks); +#endif break; } case ERTS_PORT_TASK_PROC_SIG: { @@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) int processing_busy_q; int vreds = 0; int reds = 0; - erts_aint_t io_tasks_executed = 0; int fpe_was_unmasked; erts_aint32_t state; int active; Uint64 start_time = 0; ErtsSchedulerData *esdp = runq->scheduler; +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_aint_t io_tasks_executed = 0; +#endif ERTS_MSACC_PUSH_STATE_M(); ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq)); @@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) for input and output */ (*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data, ptp->u.alive.td.io.event); - reset_executed_io_task_handle(ptp); - io_tasks_executed++; + reset_executed_io_task_handle(pp, ptp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + io_tasks_executed++; +#endif break; case ERTS_PORT_TASK_OUTPUT: reds = ERTS_PORT_REDS_OUTPUT; @@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) LTTNG_DRIVER(driver_ready_output, pp); (*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data, ptp->u.alive.td.io.event); - reset_executed_io_task_handle(ptp); - io_tasks_executed++; + reset_executed_io_task_handle(pp, ptp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + io_tasks_executed++; +#endif break; case ERTS_PORT_TASK_PROC_SIG: { ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data; @@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) erts_unblock_fpe(fpe_was_unmasked); ERTS_MSACC_POP_STATE_M(); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (io_tasks_executed) { + ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks) + >= io_tasks_executed); + erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks, + -1*io_tasks_executed); + } +#endif + ASSERT(runq == erts_get_runq_port(pp)); active = finalize_exec(pp, &execq, processing_busy_q); @@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq) void erts_port_task_init(void) { +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks, + (erts_aint_t) 0); +#endif init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads + 1); /* aux_thread */ init_busy_caller_table_alloc(); diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h index ae78a7d8a3..ca5183b305 100644 --- a/erts/emulator/beam/erl_port_task.h +++ b/erts/emulator/beam/erl_port_task.h @@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle; #ifndef ERL_PORT_TASK_H__ #define ERL_PORT_TASK_H__ +#include "erl_poll.h" + #undef ERTS_INCLUDE_SCHEDULER_INTERNALS #if (defined(ERL_PROCESS_C__) \ || defined(ERL_PORT_TASK_C__) \ @@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle; #define ERTS_PT_FLG_BAD_OUTPUT (1 << 4) typedef enum { - ERTS_PORT_TASK_INPUT, - ERTS_PORT_TASK_OUTPUT, + ERTS_PORT_TASK_INPUT = 0, + ERTS_PORT_TASK_OUTPUT = 1, ERTS_PORT_TASK_TIMEOUT, ERTS_PORT_TASK_DIST_CMD, ERTS_PORT_TASK_PROC_SIG @@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp); +#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING +ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void); +/* NOTE: Do not access any of the exported variables directly */ +extern erts_atomic_t erts_port_task_outstanding_io_tasks; +#endif + #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE void @@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp) erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING); } +#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING +ERTS_GLB_INLINE int +erts_port_task_have_outstanding_io_tasks(void) +{ + return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks) + != 0); +} +#endif + #endif #ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 0f7f1598fd..2427d87f66 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE]; typedef struct { int aux_work; int tse; - int sys_schedule; } ErtsBusyWaitParams; static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1]; @@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat; static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key); +#if ERTS_POLL_USE_SCHEDULER_POLLING +static erts_atomic32_t doing_sys_schedule; +#endif static erts_atomic32_t no_empty_run_queues; long erts_runq_supervision_interval = 0; static ethr_event runq_supervision_event; @@ -1646,7 +1648,7 @@ haw_thr_prgr_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val) awdp->latest_wakeup = val; haw_chk_later_cleanup_op_wakeup(awdp, val); } - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } @@ -1656,7 +1658,7 @@ haw_thr_prgr_soft_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val) if (erts_thr_progress_cmp(val, awdp->latest_wakeup) > 0) { awdp->latest_wakeup = val; haw_chk_later_cleanup_op_wakeup(awdp, val); - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } @@ -1670,7 +1672,7 @@ haw_thr_prgr_later_cleanup_op_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val, else { awdp->latest_wakeup = val; awdp->later_op.size = thr_prgr_later_cleanup_op_threshold; - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } } @@ -3066,6 +3068,7 @@ aux_thread(void *unused) ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1); erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; + ErtsThrPrgrData *tpd; int thr_prgr_active = 1; ERTS_MSACC_DECLARE_CACHE(); @@ -3087,12 +3090,16 @@ aux_thread(void *unused) callbacks.wait = thr_prgr_wait; callbacks.finalize_wait = thr_prgr_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; #if ERTS_POLL_USE_FALLBACK - ssi->psi = erts_create_pollset_thread(-1); +#if ERTS_POLL_USE_SCHEDULER_POLLING + ssi->psi = erts_create_pollset_thread(-2, tpd); +#else + ssi->psi = erts_create_pollset_thread(-1, tpd); +#endif #endif sched_prep_spin_wait(ssi); @@ -3105,11 +3112,11 @@ aux_thread(void *unused) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { if (!thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 1); + erts_thr_progress_active(tpd, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (aux_work && erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); } if (!aux_work) { @@ -3120,7 +3127,7 @@ aux_thread(void *unused) #endif if (thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 0); + erts_thr_progress_active(tpd, thr_prgr_active = 0); #if ERTS_POLL_USE_FALLBACK @@ -3132,11 +3139,11 @@ aux_thread(void *unused) if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - erts_check_io(ssi->psi); + erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT); } } #else - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(tpd); flgs = sched_spin_wait(ssi, 0); @@ -3153,7 +3160,7 @@ aux_thread(void *unused) ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); } } - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); #endif } @@ -3171,7 +3178,8 @@ poll_thread(void *arg) erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; int thr_prgr_active = 1; - struct erts_poll_thread *psi = erts_create_pollset_thread(id); + struct erts_poll_thread *psi; + ErtsThrPrgrData *tpd; ERTS_MSACC_DECLARE_CACHE(); #ifdef ERTS_ENABLE_LOCK_CHECK @@ -3192,9 +3200,12 @@ poll_thread(void *arg) callbacks.wait = thr_prgr_wait; callbacks.finalize_wait = thr_prgr_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; + + psi = erts_create_pollset_thread(id, tpd); + ssi->psi = psi; sched_prep_spin_wait(ssi); @@ -3207,16 +3218,16 @@ poll_thread(void *arg) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { if (!thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 1); + erts_thr_progress_active(tpd, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (aux_work && erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); } if (!aux_work) { if (thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 0); + erts_thr_progress_active(tpd, thr_prgr_active = 0); flgs = sched_spin_wait(ssi, 0); @@ -3226,7 +3237,7 @@ poll_thread(void *arg) if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - erts_check_io(psi); + erts_check_io(psi, ERTS_POLL_INF_TIMEOUT); } } } @@ -3236,6 +3247,59 @@ poll_thread(void *arg) return NULL; } +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ERTS_INLINE void +clear_sys_scheduling(void) +{ + erts_atomic32_set_mb(&doing_sys_schedule, 0); +} + +static ERTS_INLINE int +try_set_sys_scheduling(void) +{ + return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0); +} + + +static ERTS_INLINE int +prepare_for_sys_schedule(void) +{ + while (!erts_port_task_have_outstanding_io_tasks() + && try_set_sys_scheduling()) { + if (!erts_port_task_have_outstanding_io_tasks()) + return 1; + clear_sys_scheduling(); + } + return 0; +} + +static void +check_io_timer(void *null) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + if (prepare_for_sys_schedule()) { + erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT); + clear_sys_scheduling(); + } + + /* The timer is cleared if this schedulers run-queue became empty + or if the CHECKIO flag was cleared. The CHECKIO flags is cleared + when a check_balance assigns another scheduler to be the poller in + the overload scenario. */ + if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO)) + == ERTS_RUNQ_FLG_CHECKIO) { + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, + check_io_timer, NULL); + } else { + ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO); + } +} + +#else +#define clear_sys_scheduling() +#define prepare_for_sys_schedule() 0 +#endif + static void scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) { @@ -3286,13 +3350,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); + if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp))) + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); } if (aux_work) { @@ -3301,7 +3365,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) current_time = erts_get_monotonic_time(esdp); if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } erts_bump_timers(esdp->timer_wheel, current_time); @@ -3321,19 +3385,36 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } if (do_timeout) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } } - else { + else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) { + /* We sleep in check_io, only for normal schedulers */ + if (thr_prgr_active) { + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + flgs = sched_spin_wait(ssi, 0); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(ssi->psi, timeout_time); + current_time = erts_get_monotonic_time(esdp); + } + } + clear_sys_scheduling(); + } else { if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); sched_wall_time_change(esdp, 0); } - erts_thr_progress_prepare_wait(esdp); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp)); } - flgs = sched_spin_wait(ssi, spincount); if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_WAITING); @@ -3363,7 +3444,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } } if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) - erts_thr_progress_finalize_wait(esdp); + erts_thr_progress_finalize_wait(erts_thr_prgr_data(esdp)); } if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time) erts_bump_timers(esdp->timer_wheel, current_time); @@ -3392,7 +3473,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) if (ERTS_SCHEDULER_IS_DIRTY(esdp)) dirty_sched_wall_time_change(esdp, working = 1); else if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } @@ -4580,6 +4661,15 @@ check_balance(ErtsRunQueue *c_rq) if (blnc_no_rqs == 1) { c_rq->check_balance_reds = INT_MAX; erts_atomic32_set_nob(&balance_info.checking_balance, 0); +#if ERTS_POLL_USE_SCHEDULER_POLLING + c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO)) + == 0) { + ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO); + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL); + } + ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO); +#endif return; } @@ -5099,6 +5189,19 @@ erts_fprintf(stderr, "--------------------------------\n"); /* Publish new migration paths... */ erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (full_scheds == current_active) { + ERTS_ASSERT(full_scheds <= current_active); + /* All active schedulers ran for full, we need to do active polling, + so we setup a timer that does active polling */ + if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) { + /* Active polling is not running, start it */ + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL); + } + run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO; + } +#endif + /* Reset balance statistics in all online queues */ for (qix = 0; qix < blnc_no_rqs; qix++) { Uint32 flags = run_queue_info[qix].flags; @@ -5108,6 +5211,8 @@ erts_fprintf(stderr, "--------------------------------\n"); ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK)); if (rq->waiting) flags |= ERTS_RUNQ_FLG_OUT_OF_WORK; + if (rq != c_rq) + flags &= ~ERTS_RUNQ_FLG_CHECKIO; rq->full_reds_history_sum = run_queue_info[qix].full_reds_history_sum; @@ -5117,8 +5222,7 @@ erts_fprintf(stderr, "--------------------------------\n"); ERTS_DBG_CHK_FULL_REDS_HISTORY(rq); rq->out_of_work_count = 0; - (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags); - + (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags); rq->max_len = erts_atomic32_read_dirty(&rq->len); for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { ErtsRunQueueInfo *rqi; @@ -5557,7 +5661,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str) return EINVAL; } - params->sys_schedule = sys_sched; params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT; params->aux_work = sys_sched * aux_work_fact; @@ -5768,6 +5871,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs; erts_aligned_run_queues = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs); +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_atomic32_init_nob(&doing_sys_schedule, 0); +#endif erts_atomic32_init_nob(&no_empty_run_queues, 0); erts_no_run_queues = n; @@ -7565,7 +7671,8 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (aux_work|evacuate) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), + thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } if (aux_work) @@ -7573,8 +7680,8 @@ suspend_scheduler(ErtsSchedulerData *esdp) aux_work, 1); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); + if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp))) + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); if (evacuate) { erts_runq_lock(esdp->run_queue); evacuate_run_queue(esdp->run_queue, &sbp); @@ -7593,18 +7700,18 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (!aux_work && current_time < timeout_time) { /* go to sleep... */ if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); sched_wall_time_change(esdp, 0); } - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL)); suspend_normal_scheduler_sleep(esdp); - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(erts_thr_prgr_data(NULL)); current_time = erts_get_monotonic_time(esdp); } if (current_time >= timeout_time) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } erts_bump_timers(esdp->timer_wheel, current_time); @@ -7661,7 +7768,7 @@ suspend_scheduler(ErtsSchedulerData *esdp) profile_scheduler(make_small(esdp->no), am_active); if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } } @@ -8296,6 +8403,11 @@ sched_thread_func(void *vesdp) erts_msacc_init_thread("scheduler", no, 1); erts_thr_progress_register_managed_thread(esdp, &callbacks, 0); + +#if ERTS_POLL_USE_SCHEDULER_POLLING + esdp->ssi->psi = erts_create_pollset_thread(-1, NULL); +#endif + erts_alloc_register_scheduler(vesdp); #ifdef ERTS_ENABLE_LOCK_CHECK { @@ -9313,12 +9425,12 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) } } - leader_update = erts_thr_progress_update(esdp); + leader_update = erts_thr_progress_update(erts_thr_prgr_data(esdp)); aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work); if (aux_work | leader_update) { erts_runq_unlock(rq); if (leader_update) - erts_thr_progress_leader_update(esdp); + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); if (aux_work) handle_aux_work(&esdp->aux_work_data, aux_work, 0); erts_runq_lock(rq); diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 8d20ccdf90..a1b029adbe 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size; (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9)) #define ERTS_RUNQ_FLG_HALTING \ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10)) +#define ERTS_RUNQ_FLG_CHECKIO \ + (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11)) -#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11) +#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12) #define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \ (ERTS_RUNQ_FLGS_EMIGRATE_QMASK \ diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c index 243db4c734..ac5054ea10 100644 --- a/erts/emulator/beam/erl_process_dump.c +++ b/erts/emulator/beam/erl_process_dump.c @@ -58,6 +58,7 @@ static void dump_externally(fmtfn_t to, void *to_arg, Eterm term); static void mark_literal(Eterm* ptr); static void init_literal_areas(void); static void dump_literals(fmtfn_t to, void *to_arg); +static void dump_persistent_terms(fmtfn_t to, void *to_arg); static void dump_module_literals(fmtfn_t to, void *to_arg, ErtsLiteralArea* lit_area); @@ -74,6 +75,7 @@ erts_deep_process_dump(fmtfn_t to, void *to_arg) all_binaries = NULL; init_literal_areas(); + erts_init_persistent_dumping(); for (i = 0; i < max; i++) { Process *p = erts_pix2proc(i); @@ -93,6 +95,7 @@ erts_deep_process_dump(fmtfn_t to, void *to_arg) } } + dump_persistent_terms(to, to_arg); dump_literals(to, to_arg); dump_binaries(to, to_arg, all_binaries); } @@ -775,6 +778,9 @@ init_literal_areas(void) qsort(lit_areas, num_lit_areas, sizeof(ErtsLiteralArea *), compare_areas); + qsort(erts_persistent_areas, erts_num_persistent_areas, + sizeof(ErtsLiteralArea *), compare_areas); + erts_runlock_old_code(code_ix); } @@ -796,6 +802,13 @@ static void mark_literal(Eterm* ptr) ap = bsearch(ptr, lit_areas, num_lit_areas, sizeof(ErtsLiteralArea*), search_areas); + if (ap == 0) { + ap = bsearch(ptr, erts_persistent_areas, + erts_num_persistent_areas, + sizeof(ErtsLiteralArea*), + search_areas); + } + /* * If the literal was created by native code, this search will not @@ -807,12 +820,12 @@ static void mark_literal(Eterm* ptr) } } - static void dump_literals(fmtfn_t to, void *to_arg) { ErtsCodeIndex code_ix; int i; + Uint idx; code_ix = erts_active_code_ix(); erts_rlock_old_code(code_ix); @@ -825,6 +838,28 @@ dump_literals(fmtfn_t to, void *to_arg) } erts_runlock_old_code(code_ix); + + for (idx = 0; idx < erts_num_persistent_areas; idx++) { + dump_module_literals(to, to_arg, erts_persistent_areas[idx]); + } +} + +static void +dump_persistent_terms(fmtfn_t to, void *to_arg) +{ + Uint idx; + + erts_print(to, to_arg, "=persistent_terms\n"); + + for (idx = 0; idx < erts_num_persistent_areas; idx++) { + ErtsLiteralArea* ap = erts_persistent_areas[idx]; + Eterm tuple = make_tuple(ap->start); + Eterm* tup_val = tuple_val(tuple); + + dump_element(to, to_arg, tup_val[1]); + erts_putc(to, to_arg, '|'); + dump_element_nl(to, to_arg, tup_val[2]); + } } static void @@ -963,7 +998,8 @@ dump_module_literals(fmtfn_t to, void *to_arg, ErtsLiteralArea* lit_area) } erts_putc(to, to_arg, '\n'); } - } else if (is_export_header(w)) { + } else { + /* Dump everything else in the external format */ dump_externally(to, to_arg, term); erts_putc(to, to_arg, '\n'); } diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h index b119c59ab3..74cc966cbe 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h @@ -188,6 +188,7 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix) erts_sspa_chunk_t *chnk; erts_sspa_chunk_header_t *chdr; erts_sspa_blk_t *res; + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_ALLOC); chnk = erts_sspa_cix2chunk(data, cix); chdr = &chnk->aligned.header; @@ -201,11 +202,15 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix) chdr->local.last = NULL; ERTS_SSPA_DBG_CHK_LCL(chdr); } - if (chdr->local.cnt <= chdr->local.lim) - return (char *) erts_sspa_process_remote_frees(chdr, res); + if (chdr->local.cnt <= chdr->local.lim) { + res = erts_sspa_process_remote_frees(chdr, res); + ERTS_MSACC_POP_STATE_M_X(); + return (char*) res; + } else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS) chdr->head.no_thr_progress_check++; ASSERT(res); + ERTS_MSACC_POP_STATE_M_X(); return (char *) res; } diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c index aa08eb40ec..bac437efe9 100644 --- a/erts/emulator/beam/erl_thr_progress.c +++ b/erts/emulator/beam/erl_thr_progress.c @@ -508,6 +508,10 @@ init_wakeup_request_array(ErtsThrPrgrVal *w) } } +ErtsThrPrgrData *erts_thr_progress_data(void) { + return erts_tsd_get(erts_thr_prgr_data_key__); +} + void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks) { @@ -551,7 +555,7 @@ erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks) } -void +ErtsThrPrgrData * erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *callbacks, int pref_wakeup) @@ -630,6 +634,7 @@ erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, wakeup_managed(id); } callbacks->finalize_wait(callbacks->arg); + return tpd; } static ERTS_INLINE int @@ -796,7 +801,7 @@ leader_update(ErtsThrPrgrData *tpd) == ERTS_THR_PRGR_LFLG_NO_LEADER)) && got_sched_wakeups()) { /* Someone need to make progress */ - wakeup_managed(0); + wakeup_managed(tpd->id); } } } @@ -849,23 +854,22 @@ update(ErtsThrPrgrData *tpd) } int -erts_thr_progress_update(ErtsSchedulerData *esdp) +erts_thr_progress_update(ErtsThrPrgrData *tpd) { - return update(thr_prgr_data(esdp)); + return update(tpd); } int -erts_thr_progress_leader_update(ErtsSchedulerData *esdp) +erts_thr_progress_leader_update(ErtsThrPrgrData *tpd) { - return leader_update(thr_prgr_data(esdp)); + return leader_update(tpd); } void -erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp) +erts_thr_progress_prepare_wait(ErtsThrPrgrData *tpd) { erts_aint32_t lflgs; - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); @@ -884,14 +888,13 @@ erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp) == ERTS_THR_PRGR_LFLG_NO_LEADER && got_sched_wakeups()) { /* Someone need to make progress */ - wakeup_managed(0); + wakeup_managed(tpd->id); } } void -erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp) +erts_thr_progress_finalize_wait(ErtsThrPrgrData *tpd) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); ErtsThrPrgrVal current, val; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -921,9 +924,8 @@ erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp) } void -erts_thr_progress_active(ErtsSchedulerData *esdp, int on) +erts_thr_progress_active(ErtsThrPrgrData *tpd, int on) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); @@ -973,7 +975,7 @@ unmanaged_continue(ErtsThrPrgrDelayHandle handle) == (ERTS_THR_PRGR_LFLG_NO_LEADER|ERTS_THR_PRGR_LFLG_WAITING_UM) && got_sched_wakeups()) { /* Others waiting for us... */ - wakeup_managed(0); + wakeup_managed(1); } } } @@ -1182,10 +1184,10 @@ request_wakeup_unmanaged(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value) } void -erts_thr_progress_wakeup(ErtsSchedulerData *esdp, +erts_thr_progress_wakeup(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); + ASSERT(!tpd->is_temporary); if (tpd->is_managed) request_wakeup_managed(tpd, value); diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h index 8329995b24..00a9e61407 100644 --- a/erts/emulator/beam/erl_thr_progress.h +++ b/erts/emulator/beam/erl_thr_progress.h @@ -123,22 +123,24 @@ extern ErtsThrPrgr erts_thr_prgr__; void erts_thr_progress_pre_init(void); void erts_thr_progress_init(int no_schedulers, int managed, int unmanaged); -void erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, - ErtsThrPrgrCallbacks *, - int); +ErtsThrPrgrData *erts_thr_progress_register_managed_thread( + ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *, int); void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *); -void erts_thr_progress_active(ErtsSchedulerData *esdp, int on); -void erts_thr_progress_wakeup(ErtsSchedulerData *esdp, +void erts_thr_progress_active(ErtsThrPrgrData *, int on); +void erts_thr_progress_wakeup(ErtsThrPrgrData *, ErtsThrPrgrVal value); -int erts_thr_progress_update(ErtsSchedulerData *esdp); -int erts_thr_progress_leader_update(ErtsSchedulerData *esdp); -void erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp); -void erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp); +int erts_thr_progress_update(ErtsThrPrgrData *); +int erts_thr_progress_leader_update(ErtsThrPrgrData *); +void erts_thr_progress_prepare_wait(ErtsThrPrgrData *); +void erts_thr_progress_finalize_wait(ErtsThrPrgrData *); ErtsThrPrgrDelayHandle erts_thr_progress_unmanaged_delay__(void); void erts_thr_progress_unmanaged_continue__(int umrefc_ix); +ErtsThrPrgrData *erts_thr_progress_data(void); void erts_thr_progress_dbg_print_state(void); +ERTS_GLB_INLINE ErtsThrPrgrData *erts_thr_prgr_data(ErtsSchedulerData *esdp); + ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc); ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc); ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_mb__(ERTS_THR_PRGR_ATOMIC *atmc); @@ -161,6 +163,15 @@ ERTS_GLB_INLINE int erts_thr_progress_has_reached(ErtsThrPrgrVal val); #if ERTS_GLB_INLINE_INCL_FUNC_DEF +ERTS_GLB_INLINE ErtsThrPrgrData * +erts_thr_prgr_data(ErtsSchedulerData *esdp) { + if (esdp) { + return &esdp->thr_progress_data; + } else { + return erts_thr_progress_data(); + } +} + ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc) { diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c index 53a020e7a5..2350d4c02f 100644 --- a/erts/emulator/beam/erl_trace.c +++ b/erts/emulator/beam/erl_trace.c @@ -2177,6 +2177,7 @@ sys_msg_dispatcher_func(void *unused) { ErtsThrPrgrCallbacks callbacks; ErtsSysMsgQ *local_sys_message_queue = NULL; + ErtsThrPrgrData *tpd; int wait = 0; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -2189,7 +2190,7 @@ sys_msg_dispatcher_func(void *unused) callbacks.wait = sys_msg_dispatcher_wait; callbacks.finalize_wait = sys_msg_dispatcher_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); while (1) { int end_wait = 0; @@ -2210,8 +2211,8 @@ sys_msg_dispatcher_func(void *unused) if (!sys_message_queue) { erts_mtx_unlock(&smq_mtx); end_wait = 1; - erts_thr_progress_active(NULL, 0); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_active(tpd, 0); + erts_thr_progress_prepare_wait(tpd); erts_mtx_lock(&smq_mtx); } @@ -2225,8 +2226,8 @@ sys_msg_dispatcher_func(void *unused) erts_mtx_unlock(&smq_mtx); if (end_wait) { - erts_thr_progress_finalize_wait(NULL); - erts_thr_progress_active(NULL, 1); + erts_thr_progress_finalize_wait(tpd); + erts_thr_progress_active(tpd, 1); } /* Send trace messages ... */ @@ -2239,8 +2240,8 @@ sys_msg_dispatcher_func(void *unused) Process *proc = NULL; Port *port = NULL; - if (erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); #ifdef DEBUG_PRINTOUTS print_msg_type(smqp); diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index b3bfa69052..880febba8b 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -22,6 +22,7 @@ #define ERL_UTILS_H__ #include "sys.h" +#include "atom.h" #include "erl_printf.h" struct process; @@ -112,10 +113,12 @@ int eq(Eterm, Eterm); #define EQ(x,y) (((x) == (y)) || (is_not_both_immed((x),(y)) && eq((x),(y)))) -int erts_cmp_atoms(Eterm a, Eterm b); -Sint erts_cmp(Eterm, Eterm, int, int); -Sint erts_cmp_compound(Eterm, Eterm, int, int); +ERTS_GLB_INLINE Sint erts_cmp(Eterm, Eterm, int, int); +ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b); + Sint cmp(Eterm a, Eterm b); +Sint erts_cmp_compound(Eterm, Eterm, int, int); + #define CMP(A,B) erts_cmp(A,B,0,0) #define CMP_TERM(A,B) erts_cmp(A,B,1,0) #define CMP_EQ_ONLY(A,B) erts_cmp(A,B,0,1) @@ -150,4 +153,56 @@ Sint cmp(Eterm a, Eterm b); if (erts_cmp_compound(X,Y,0,EqOnly) Op 0) { Action; }; \ } +#define erts_float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1)) + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) { + Atom *aa = atom_tab(atom_val(a)); + Atom *bb = atom_tab(atom_val(b)); + + byte *name_a, *name_b; + int len_a, len_b, diff; + + diff = aa->ord0 - bb->ord0; + + if (diff != 0) { + return diff; + } + + name_a = &aa->name[3]; + name_b = &bb->name[3]; + len_a = aa->len-3; + len_b = bb->len-3; + + if (len_a > 0 && len_b > 0) { + diff = sys_memcmp(name_a, name_b, MIN(len_a, len_b)); + + if (diff != 0) { + return diff; + } + } + + return len_a - len_b; +} + +ERTS_GLB_INLINE Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) { + if (is_atom(a) && is_atom(b)) { + return erts_cmp_atoms(a, b); + } else if (is_both_small(a, b)) { + return (signed_val(a) - signed_val(b)); + } else if (is_float(a) && is_float(b)) { + FloatDef af, bf; + + GET_DOUBLE(a, af); + GET_DOUBLE(b, bf); + + return erts_float_comp(af.fd, bf.fd); + } + + return erts_cmp_compound(a,b,exact,eq_only); +} + +#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ + #endif diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 621ba108ba..9a66e491f3 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -1953,7 +1953,8 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla #define RETURN_STATE() \ do { \ - hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE+3); \ + static const int TUPLE2_SIZE = 2 + 1; \ + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + TUPLE2_SIZE); \ c_term = erts_mk_magic_ref(&hp, &MSO(p), context_b); \ res = TUPLE2(hp, Term, c_term); \ BUMP_ALL_REDS(p); \ diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 21ae205237..0631404599 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -906,6 +906,8 @@ typedef struct ErtsLiteralArea_ { Eterm start[1]; /* beginning of area */ } ErtsLiteralArea; +void erts_queue_release_literals(Process *c_p, ErtsLiteralArea* literals); + #define ERTS_LITERAL_AREA_ALLOC_SIZE(N) \ (sizeof(ErtsLiteralArea) + sizeof(Eterm)*((N) - 1)) @@ -1001,6 +1003,7 @@ typedef struct { Uint literal_size; Eterm *lit_purge_ptr; Uint lit_purge_sz; + int copy_literals; } erts_shcopy_t; #define INITIALIZE_SHCOPY(info) \ @@ -1010,6 +1013,7 @@ typedef struct { info.bitstore_start = info.bitstore_default; \ info.shtable_start = info.shtable_default; \ info.literal_size = 0; \ + info.copy_literals = 0; \ if (larea__) { \ info.lit_purge_ptr = &larea__->start[0]; \ info.lit_purge_sz = larea__->end - info.lit_purge_ptr; \ @@ -1238,6 +1242,13 @@ Sint erts_re_set_loop_limit(Sint limit); void erts_init_bif_binary(void); Sint erts_binary_set_loop_limit(Sint limit); +/* erl_bif_persistent.c */ +void erts_init_bif_persistent_term(void); +Uint erts_persistent_term_count(void); +void erts_init_persistent_dumping(void); +extern ErtsLiteralArea** erts_persistent_areas; +extern Uint erts_num_persistent_areas; + /* external.c */ void erts_init_external(void); diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index bb22548587..869a575cb4 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -325,6 +325,7 @@ typedef long Sint erts_align_attribute(sizeof(long)); #define UWORD_CONSTANT(Const) Const##UL #define ERTS_UWORD_MAX ULONG_MAX #define ERTS_SWORD_MAX LONG_MAX +#define ERTS_SWORD_MIN LONG_MIN #define ERTS_SIZEOF_ETERM SIZEOF_LONG #define ErtsStrToSint strtol #elif SIZEOF_VOID_P == SIZEOF_INT @@ -335,6 +336,7 @@ typedef int Sint erts_align_attribute(sizeof(int)); #define UWORD_CONSTANT(Const) Const##U #define ERTS_UWORD_MAX UINT_MAX #define ERTS_SWORD_MAX INT_MAX +#define ERTS_SWORD_MIN INT_MIN #define ERTS_SIZEOF_ETERM SIZEOF_INT #define ErtsStrToSint strtol #elif SIZEOF_VOID_P == SIZEOF_LONG_LONG @@ -345,6 +347,7 @@ typedef long long Sint erts_align_attribute(sizeof(long long)); #define UWORD_CONSTANT(Const) Const##ULL #define ERTS_UWORD_MAX ULLONG_MAX #define ERTS_SWORD_MAX LLONG_MAX +#define ERTS_SWORD_MIN LLONG_MIN #define ERTS_SIZEOF_ETERM SIZEOF_LONG_LONG #if defined(__WIN32__) #define ErtsStrToSint _strtoi64 diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 08f8ca9788..d81bd89a48 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -2615,27 +2615,6 @@ not_equal: } -/* - * Lexically compare two strings of bytes (string s1 length l1 and s2 l2). - * - * s1 < s2 return -1 - * s1 = s2 return 0 - * s1 > s2 return +1 - */ -static int cmpbytes(byte *s1, int l1, byte *s2, int l2) -{ - int i; - i = 0; - while((i < l1) && (i < l2)) { - if (s1[i] < s2[i]) return(-1); - if (s1[i] > s2[i]) return(1); - i++; - } - if (l1 < l2) return(-1); - if (l1 > l2) return(1); - return(0); -} - /* * Compare objects. @@ -2649,20 +2628,6 @@ static int cmpbytes(byte *s1, int l1, byte *s2, int l2) * */ - -#define float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1)) - -int erts_cmp_atoms(Eterm a, Eterm b) -{ - Atom *aa = atom_tab(atom_val(a)); - Atom *bb = atom_tab(atom_val(b)); - int diff = aa->ord0 - bb->ord0; - if (diff) - return diff; - return cmpbytes(aa->name+3, aa->len-3, - bb->name+3, bb->len-3); -} - /* cmp(Eterm a, Eterm b) * For compatibility with HiPE - arith-based compare. */ @@ -2673,22 +2638,6 @@ Sint cmp(Eterm a, Eterm b) Sint erts_cmp_compound(Eterm a, Eterm b, int exact, int eq_only); -Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) -{ - if (is_atom(a) && is_atom(b)) { - return erts_cmp_atoms(a, b); - } else if (is_both_small(a, b)) { - return (signed_val(a) - signed_val(b)); - } else if (is_float(a) && is_float(b)) { - FloatDef af, bf; - GET_DOUBLE(a, af); - GET_DOUBLE(b, bf); - return float_comp(af.fd, bf.fd); - } - return erts_cmp_compound(a,b,exact,eq_only); -} - - /* erts_cmp(Eterm a, Eterm b, int exact) * exact = 1 -> term-based compare * exact = 0 -> arith-based compare @@ -2985,7 +2934,7 @@ tailrecur_ne: GET_DOUBLE(a, af); GET_DOUBLE(b, bf); - ON_CMP_GOTO(float_comp(af.fd, bf.fd)); + ON_CMP_GOTO(erts_float_comp(af.fd, bf.fd)); } case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE): case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE): @@ -3022,10 +2971,7 @@ tailrecur_ne: ErlFunThing* f2 = (ErlFunThing *) fun_val(b); Sint diff; - diff = cmpbytes(atom_tab(atom_val(f1->fe->module))->name, - atom_tab(atom_val(f1->fe->module))->len, - atom_tab(atom_val(f2->fe->module))->name, - atom_tab(atom_val(f2->fe->module))->len); + diff = erts_cmp_atoms((f1->fe)->module, (f2->fe)->module); if (diff != 0) { RETURN_NEQ(diff); } @@ -3219,7 +3165,7 @@ tailrecur_ne: if (f2.fd < MAX_LOSSLESS_FLOAT && f2.fd > MIN_LOSSLESS_FLOAT) { /* Float is within the no loss limit */ f1.fd = signed_val(aw); - j = float_comp(f1.fd, f2.fd); + j = erts_float_comp(f1.fd, f2.fd); } #if ERTS_SIZEOF_ETERM == 8 else if (f2.fd > (double) (MAX_SMALL + 1)) { @@ -3266,7 +3212,7 @@ tailrecur_ne: if (big_to_double(aw, &f1.fd) < 0) { j = big_sign(aw) ? -1 : 1; } else { - j = float_comp(f1.fd, f2.fd); + j = erts_float_comp(f1.fd, f2.fd); } } else { big = double_to_big(f2.fd, big_buf, sizeof(big_buf)/sizeof(Eterm)); @@ -3282,7 +3228,7 @@ tailrecur_ne: if (f1.fd < MAX_LOSSLESS_FLOAT && f1.fd > MIN_LOSSLESS_FLOAT) { /* Float is within the no loss limit */ f2.fd = signed_val(bw); - j = float_comp(f1.fd, f2.fd); + j = erts_float_comp(f1.fd, f2.fd); } #if ERTS_SIZEOF_ETERM == 8 else if (f1.fd > (double) (MAX_SMALL + 1)) { diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 7f20477363..47eb5df7dd 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -38,6 +38,7 @@ #include <ctype.h> #include <sys/types.h> #include <errno.h> +#include <stdint.h> #define IDENTITY(c) c #define STRINGIFY_1(b) IDENTITY(#b) @@ -812,6 +813,7 @@ static size_t my_strnlen(const char *s, size_t maxlen) #define INET_OPT_PKTOPTIONS 45 /* IP(V6)_PKTOPTIONS get ancillary data */ #define INET_OPT_TTL 46 /* IP_TTL */ #define INET_OPT_RECVTTL 47 /* IP_RECVTTL ancillary data */ +#define TCP_OPT_NOPUSH 48 /* super-Nagle, aka TCP_CORK */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -954,6 +956,13 @@ static size_t my_strnlen(const char *s, size_t maxlen) #endif #endif +typedef struct _tcp_descriptor tcp_descriptor; + +#if defined(TCP_CORK) +#define INET_TCP_NOPUSH TCP_CORK +#elif defined(TCP_NOPUSH) && !defined(__DARWIN__) +#define INET_TCP_NOPUSH TCP_NOPUSH +#endif #define BIN_REALLOC_MARGIN(x) ((x)/4) /* 25% */ @@ -1003,16 +1012,19 @@ typedef struct _multi_timer_data { struct _multi_timer_data *prev; } MultiTimerData; -static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, - ErlDrvTermData caller, unsigned timeout, - void (*timeout_fun)(ErlDrvData drv_data, - ErlDrvTermData caller)); -static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, +static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port, + ErlDrvTermData caller, unsigned timeout, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)); +static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port, ErlDrvData data); -static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p); +static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p); +static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)); static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller); -static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port); +static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port); typedef struct { int id; /* id used to identify reply */ @@ -1271,7 +1283,7 @@ static struct erl_drv_entry sctp_inet_driver_entry = }; #endif -typedef struct { +struct _tcp_descriptor { inet_descriptor inet; /* common data structure (DON'T MOVE) */ int high; /* high watermark */ int low; /* low watermark */ @@ -1287,7 +1299,8 @@ typedef struct { int http_state; /* 0 = response|request 1=headers fields */ inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */ inet_async_multi_op *multi_last; - MultiTimerData *mtd; /* Timer structures for multiple accept */ + MultiTimerData *mtd; /* Timer structures for multiple accept */ + MultiTimerData *mtd_cache; /* A cache for timer allocations */ #ifdef HAVE_SENDFILE struct { ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time @@ -1303,7 +1316,7 @@ typedef struct { Uint64 length; } sendfile; #endif -} tcp_descriptor; +}; /* send function */ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len); @@ -1313,7 +1326,10 @@ static int tcp_deliver(tcp_descriptor* desc, int len); static int tcp_shutdown_error(tcp_descriptor* desc, int err); +#ifdef HAVE_SENDFILE static int tcp_inet_sendfile(tcp_descriptor* desc); +static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error); +#endif static int tcp_inet_output(tcp_descriptor* desc, HANDLE event); static int tcp_inet_input(tcp_descriptor* desc, HANDLE event); @@ -5178,6 +5194,71 @@ static int hwaddr_libdlpi_lookup(const char *ifnm, } #endif +#ifdef HAVE_GETIFADDRS +/* Returns 0 for success and errno() for failure */ +static int call_getifaddrs(inet_descriptor* desc_p, struct ifaddrs **ifa_pp) +{ + int result, save_errno; +#ifdef HAVE_SETNS + int current_ns; + + current_ns = 0; + if (desc_p->netns != NULL) { + int new_ns; + /* Temporarily change network namespace for this thread + * over the getifaddrs() call + */ + current_ns = open("/proc/self/ns/net", O_RDONLY); + if (current_ns == INVALID_SOCKET) + return sock_errno(); + new_ns = open(desc_p->netns, O_RDONLY); + if (new_ns == INVALID_SOCKET) { + save_errno = sock_errno(); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return save_errno; + } + if (setns(new_ns, CLONE_NEWNET) != 0) { + save_errno = sock_errno(); + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return save_errno; + } + else { + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } + } +#endif + save_errno = 0; + result = getifaddrs(ifa_pp); + if (result < 0) + save_errno = sock_errno(); +#ifdef HAVE_SETNS + if (desc_p->netns != NULL) { + /* Restore network namespace */ + if (setns(current_ns, CLONE_NEWNET) != 0) { + /* XXX Failed to restore network namespace. + * What to do? Tidy up and return an error... + * Note that the thread now might still be in the set namespace. + * Can this even happen? Should the emulator be aborted? + */ + if (result >= 0) { + /* We got a result but have to waste it */ + save_errno = sock_errno(); + freeifaddrs(*ifa_pp); + } + } + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } +#endif + return save_errno; +} +#endif /* #ifdef HAVE_GETIFADDRS */ + /* FIXME: temporary hack */ #ifndef IFHWADDRLEN #define IFHWADDRLEN 6 @@ -5255,8 +5336,8 @@ static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc, struct sockaddr_dl *sdlp; int found = 0; - if (getifaddrs(&ifa) == -1) - goto error; + if (call_getifaddrs(desc, &ifa) != 0) + goto error; for (ifp = ifa; ifp; ifp = ifp->ifa_next) { if ((ifp->ifa_addr->sa_family == AF_LINK) && @@ -5974,6 +6055,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, ErlDrvSizeT buf_size; char *buf_p; char *buf_alloc_p; + int save_errno; buf_size = GETIFADDRS_BUFSZ; buf_alloc_p = ALLOC(GETIFADDRS_BUFSZ); @@ -6008,9 +6090,9 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, } \ } while (0) - if (getifaddrs(&ifa_p) < 0) { - return ctl_error(sock_errno(), rbuf_pp, rsize); - } + if ((save_errno = call_getifaddrs(desc_p, &ifa_p)) != 0) + return ctl_error(save_errno, rbuf_pp, rsize); + ifa_free_p = ifa_p; *buf_p++ = INET_REP_OK; for (; ifa_p; ifa_p = ifa_p->ifa_next) { @@ -6532,6 +6614,19 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) (long)desc->port, desc->s, ival)); break; + case TCP_OPT_NOPUSH: +#if defined(INET_TCP_NOPUSH) + proto = IPPROTO_TCP; + type = INET_TCP_NOPUSH; + DEBUGF(("inet_set_opts(%ld): s=%d, t=%d TCP_NOPUSH=%d\r\n", + (long)desc->port, desc->s, type, ival)); + break; +#else + /* inet_fill_opts always returns a value for this option, + * so we need to ignore it if not implemented, just in case */ + continue; +#endif + #if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) case UDP_OPT_MULTICAST_TTL: @@ -7693,6 +7788,16 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, proto = IPPROTO_TCP; type = TCP_NODELAY; break; + case TCP_OPT_NOPUSH: +#if defined(INET_TCP_NOPUSH) + proto = IPPROTO_TCP; + type = INET_TCP_NOPUSH; + break; +#else + *ptr++ = opt; + put_int32(0, ptr); + continue; +#endif #if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) case UDP_OPT_MULTICAST_TTL: @@ -7839,8 +7944,8 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, * cmsg options and values */ PLACE_FOR(1+4, ptr); - *ptr = opt; - arg_ptr = ptr+1; /* Where to put total length */ + *ptr++ = opt; + arg_ptr = ptr; /* Where to put total length */ arg_sz = 0; /* Total length */ for (cmsg_top = (struct cmsghdr*)(cmsgbuf.buf + cmsg_sz), cmsg = (struct cmsghdr*)cmsgbuf.buf; @@ -7852,7 +7957,6 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, PLACE_FOR(1+4, ptr); \ *ptr++ = OPT; \ put_cmsg_int32(cmsg, ptr); \ - ptr += 4; \ arg_sz += 1+4; \ continue; \ } @@ -7866,7 +7970,6 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, PUT_CMSG_INT32(IPPROTO_IP, IP_TTL, INET_OPT_TTL); #endif /* BSD uses the RECV* names in CMSG fields */ - } #if defined(IPPROTO_IP) && defined(IP_RECVTOS) PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTOS, INET_OPT_TOS); #endif @@ -7877,6 +7980,7 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTTL, INET_OPT_TTL); #endif #undef PUT_CMSG_INT32 + } put_int32(arg_sz, arg_ptr); /* Put total length */ continue; } @@ -9677,6 +9781,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args) desc->tcp_add_flags = 0; desc->http_state = 0; desc->mtd = NULL; + desc->mtd_cache = NULL; desc->multi_first = desc->multi_last = NULL; DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port)); return (ErlDrvData) desc; @@ -9780,15 +9885,14 @@ static void tcp_close_check(tcp_descriptor* desc) driver_demonitor_process(desc->inet.port, &monitor); send_async_error(desc->inet.dport, id, caller, am_closed); } - clean_multi_timers(&(desc->mtd), desc->inet.port); } - else if (desc->inet.state == INET_STATE_CONNECTING) { async_error_am(INETP(desc), am_closed); } else if (desc->inet.state == INET_STATE_CONNECTED) { async_error_am_all(INETP(desc), am_closed); } + clean_multi_timers(desc, desc->inet.port); } /* @@ -9831,6 +9935,15 @@ static void tcp_desc_close(tcp_descriptor* desc) erl_inet_close(INETP(desc)); } +static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + ASSERT(!desc->inet.active); + sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); + desc->i_remain = 0; + async_error_am(INETP(desc), am_timeout); +} + /* TCP requests from Erlang */ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, ErlDrvSizeT len, @@ -10001,12 +10114,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, if (time_left <= 0) { time_left = 1; } - omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller, + omtd = add_multi_timer(desc, desc->inet.port, ocaller, time_left, &tcp_inet_multi_timeout); } enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor); if (timeout != INET_INFINITY) { - mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + mtd = add_multi_timer(desc, desc->inet.port, caller, timeout, &tcp_inet_multi_timeout); } enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor); @@ -10021,7 +10134,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_xerror("noproc", rbuf, rsize); } if (timeout != INET_INFINITY) { - mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + mtd = add_multi_timer(desc, desc->inet.port, caller, timeout, &tcp_inet_multi_timeout); } enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor); @@ -10118,7 +10231,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, async_error_am(INETP(desc), am_timeout); else { if (timeout != INET_INFINITY) - driver_set_timer(desc->inet.port, timeout); + add_multi_timer(desc, INETP(desc)->port, 0, + timeout, &tcp_inet_recv_timeout); if (!INETP(desc)->is_ignored) sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); else @@ -10205,12 +10319,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, desc->tcp_add_flags |= TCP_ADDF_SENDFILE; /* See if we can finish sending without selecting & rescheduling. */ - tcp_inet_sendfile(desc); - - if(desc->sendfile.length > 0) { - sock_select(INETP(desc), FD_WRITE, 1); + if (tcp_inet_sendfile(desc) == 0) { + if(desc->sendfile.length > 0) { + sock_select(INETP(desc), FD_WRITE, 1); + } } - return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); #else return ctl_error(ENOTSUP, rbuf, rsize); @@ -10224,12 +10337,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, } +static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + ASSERT(IS_BUSY(INETP(desc))); + ASSERT(desc->busy_on_send); + desc->inet.caller = desc->inet.busy_caller; + desc->inet.state &= ~INET_F_BUSY; + desc->busy_on_send = 0; + set_busy_port(desc->inet.port, 0); + inet_reply_error_am(INETP(desc), am_timeout); + if (desc->send_timeout_close) { + tcp_desc_close(desc); + } +} + /* ** tcp_inet_timeout: ** called when timer expire: ** TCP socket may be: ** -** a) receiving -- deselect +** a) receiving -- send timeout ** b) connecting -- close socket ** c) accepting -- reset listener ** @@ -10243,26 +10371,9 @@ static void tcp_inet_timeout(ErlDrvData e) DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n", (long)desc->inet.port, desc->inet.s)); if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */ - fire_multi_timers(&(desc->mtd), desc->inet.port, e); + fire_multi_timers(desc, desc->inet.port, e); } else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) { - if (desc->busy_on_send) { - ASSERT(IS_BUSY(INETP(desc))); - desc->inet.caller = desc->inet.busy_caller; - desc->inet.state &= ~INET_F_BUSY; - desc->busy_on_send = 0; - set_busy_port(desc->inet.port, 0); - inet_reply_error_am(INETP(desc), am_timeout); - if (desc->send_timeout_close) { - tcp_desc_close(desc); - } - } - else { - /* assume recv timeout */ - ASSERT(!desc->inet.active); - sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); - desc->i_remain = 0; - async_error_am(INETP(desc), am_timeout); - } + fire_multi_timers(desc, desc->inet.port, e); } else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) { /* assume connect timeout */ @@ -10392,7 +10503,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) return; } if (timeout != NULL) { - remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + remove_multi_timer(desc, desc->inet.port, timeout); } if (desc->multi_first == NULL) { sock_select(INETP(desc),FD_ACCEPT,0); @@ -10423,6 +10534,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) #ifdef DEBUG long port = (long) desc->inet.port; /* Used after driver_exit() */ #endif + int blocking_send = 0; DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n", port, desc->inet.s, __FILE__, __LINE__)); if (IS_BUSY(INETP(desc))) { @@ -10430,7 +10542,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port)); } @@ -10438,16 +10550,25 @@ static int tcp_recv_closed(tcp_descriptor* desc) set_busy_port(desc->inet.port, 0); inet_reply_error_am(INETP(desc), am_closed); DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port)); - } else { + blocking_send = 1; + } +#ifdef HAVE_SENDFILE + if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) { + tcp_sendfile_aborted(desc, ENOTCONN); + blocking_send = 1; + } +#endif + if (!blocking_send) { /* No blocking send op to reply to right now. * If next op is a send, make sure it returns {error,closed} * rather than {error,enotconn}. */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND; } + if (!desc->inet.active) { - /* We must cancel any timer here ! */ - driver_cancel_timer(desc->inet.port); + /* We must cancel any timer here ! */ + clean_multi_timers(desc, INETP(desc)->port); /* passive mode do not terminate port ! */ tcp_clear_input(desc); if (desc->inet.exitf) { @@ -10482,16 +10603,21 @@ static int tcp_recv_error(tcp_descriptor* desc, int err) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; set_busy_port(desc->inet.port, 0); inet_reply_error_am(INETP(desc), am_closed); } +#ifdef HAVE_SENDFILE + if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) { + tcp_sendfile_aborted(desc, err); + } +#endif if (!desc->inet.active) { /* We must cancel any timer here ! */ - driver_cancel_timer(desc->inet.port); + clean_multi_timers(desc, INETP(desc)->port); tcp_clear_input(desc); if (desc->inet.exitf) { tcp_desc_close(desc); @@ -10596,13 +10722,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len) if (len == 0) { /* empty buffer or waiting for more input */ if ((desc->i_buf == NULL) || (desc->i_remain > 0)) - return count; + return 0; if ((n = tcp_remain(desc, &len)) != 0) { if (n < 0) /* packet error */ return n; if (len > 0) /* more data pending */ desc->i_remain = len; - return count; + return 0; } } @@ -10654,9 +10780,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len) len = 0; if (!desc->inet.active) { - if (!desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); - } + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout); sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); if (desc->i_buf != NULL) tcp_restart_input(desc); @@ -10682,7 +10806,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) int len; int nread; - if (desc->i_buf == NULL) { /* allocte a read buffer */ + if (desc->i_buf == NULL) { /* allocate a read buffer */ int sz = (request_len > 0) ? request_len : desc->inet.bufsz; if ((desc->i_buf = alloc_buffer(sz)) == NULL) @@ -10755,10 +10879,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start); } else { - if ((nread = tcp_remain(desc, &len)) < 0) + nread = tcp_remain(desc, &len); + if (nread < 0) return tcp_recv_error(desc, EMSGSIZE); else if (nread == 0) - return tcp_deliver(desc, len); + return tcp_deliver(desc, len); else if (len > 0) desc->i_remain = len; /* set remain */ } @@ -11077,7 +11202,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) } if (timeout != NULL) { - remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + remove_multi_timer(desc, desc->inet.port, timeout); } driver_demonitor_process(desc->inet.port, &monitor); @@ -11136,8 +11261,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) if (IS_BUSY(INETP(desc))) { desc->inet.caller = desc->inet.busy_caller; if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); - desc->busy_on_send = 0; + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); + desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; set_busy_port(desc->inet.port, 0); @@ -11152,27 +11277,31 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n", (long)desc->inet.port, __FILE__, __LINE__)); if (desc->inet.active) { + ErlDrvTermData err_atom; if (show_econnreset) { tcp_error_message(desc, err); - tcp_closed_message(desc); - inet_reply_error(INETP(desc), err); + err_atom = error_atom(err); } else { - tcp_closed_message(desc); - inet_reply_error_am(INETP(desc), am_closed); + err_atom = am_closed; } + tcp_closed_message(desc); + if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) + inet_reply_error_am(INETP(desc), err_atom); + if (desc->inet.exitf) driver_exit(desc->inet.port, 0); else tcp_desc_close(desc); } else { tcp_close_check(desc); - tcp_desc_close(desc); if (desc->inet.caller) { - if (show_econnreset) - inet_reply_error(INETP(desc), err); - else - inet_reply_error_am(INETP(desc), am_closed); + if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) { + if (show_econnreset) + inet_reply_error(INETP(desc), err); + else + inet_reply_error_am(INETP(desc), am_closed); + } } else { /* No blocking send op to reply to right now. @@ -11181,6 +11310,7 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND; } + tcp_desc_close(desc); /* * Make sure that the next receive operation gets an {error,closed} @@ -11237,6 +11367,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err) return tcp_send_or_shutdown_error(desc, err); } +static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy) +{ + tcp_descriptor *desc = (tcp_descriptor*)data; + (void)tcp_inet_output(desc, INETP(desc)->s); +} + /* ** Send non-blocking vector data */ @@ -11289,7 +11425,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - driver_set_timer(desc->inet.port, desc->send_timeout); + add_multi_timer(desc, INETP(desc)->port, + 0 /* arg */, desc->send_timeout /* timeout */, + &tcp_inet_send_timeout); } return 1; } @@ -11304,7 +11442,10 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) INETP(desc)->is_ignored |= INET_IGNORE_WRITE; n = 0; } else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { - n = 0; + driver_enqv(ix, ev, 0); + add_multi_timer(desc, INETP(desc)->port, 0, + 0, &tcp_inet_delay_send); + return 0; } else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov, vsize, &n, 0))) { if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) { @@ -11387,7 +11528,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - driver_set_timer(desc->inet.port, desc->send_timeout); + add_multi_timer(desc, INETP(desc)->port, + 0 /* arg */, desc->send_timeout /* timeout */, + &tcp_inet_send_timeout); } return 1; } @@ -11491,7 +11634,8 @@ static int tcp_sendfile_completed(tcp_descriptor* desc) { /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, + &tcp_inet_send_timeout); desc->busy_on_send = 0; } @@ -11693,8 +11837,8 @@ socket_error: { DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n", (long)desc->inet.port, socket_errno, errno)); - result = tcp_send_error(desc, socket_errno); tcp_sendfile_aborted(desc, socket_errno); + result = tcp_send_error(desc, socket_errno); goto done; } @@ -11798,6 +11942,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) #ifdef __WIN32__ desc->inet.send_would_block = 1; #endif + /* If DELAY_SEND is set ready_output may have + been called without doing select so we do + a select in order to get into the correct + state */ + if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) + sock_select(INETP(desc), FD_WRITE, 1); goto done; } else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning 0 when sending packets with @@ -11823,7 +11973,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) set_busy_port(desc->inet.port, 0); /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } inet_reply_ok(INETP(desc)); @@ -12635,7 +12785,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) udesc->i_buf = NULL; if (!desc->active) { async_error(desc, err); - driver_cancel_timer(desc->port); + driver_cancel_timer(desc->port); sock_select(desc,FD_READ,0); } else { @@ -12724,7 +12874,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) return count; count++; if (!desc->active) { - driver_cancel_timer(desc->port); /* possibly cancel */ + driver_cancel_timer(desc->port); sock_select(desc,FD_READ,0); return count; /* passive mode (read one packet only) */ } @@ -12803,55 +12953,69 @@ make_noninheritable_handle(SOCKET s) * Multi-timers */ -static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, +static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port, ErlDrvData data) { ErlDrvTime next_timeout; - if (!*first) { + MultiTimerData *curr = desc->mtd; + if (!curr) { ASSERT(0); return; } #ifdef DEBUG { ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC); - ASSERT(chk >= (*first)->when); + ASSERT(chk >= curr->when); } #endif do { - MultiTimerData *save = *first; - *first = save->next; + MultiTimerData *save = curr; + (*(save->timeout_function))(data,save->caller); - FREE(save); - if (*first == NULL) { + + curr = curr->next; + + if (desc->mtd_cache == NULL) + desc->mtd_cache = save; + else + FREE(save); + + if (curr == NULL) { + desc->mtd = NULL; return; } - (*first)->prev = NULL; - next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + curr->prev = NULL; + next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC); } while (next_timeout <= 0); + desc->mtd = curr; driver_set_timer(port, (unsigned long) next_timeout); } -static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port) +static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port) { - MultiTimerData *p; - if (*first) { + if (desc->mtd) { driver_cancel_timer(port); } - while (*first) { - p = *first; - *first = p->next; - FREE(p); + while (desc->mtd) { + MultiTimerData *p = desc->mtd; + desc->mtd = p->next; + FREE(p); + } + desc->mtd = NULL; + if (desc->mtd_cache) { + FREE(desc->mtd_cache); + desc->mtd_cache = NULL; } } -static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p) +static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p) { if (p->prev != NULL) { p->prev->next = p->next; } else { driver_cancel_timer(port); - *first = p->next; - if (*first) { - ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + desc->mtd = p->next; + if (desc->mtd) { + ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC); if (ntmo < 0) ntmo = 0; driver_set_timer(port, (unsigned long) ntmo); @@ -12860,36 +13024,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim if (p->next != NULL) { p->next->prev = p->prev; } - FREE(p); + if (desc->mtd_cache == NULL) + desc->mtd_cache = p; + else + FREE(p); } -static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, +/* Cancel a timer based on the timeout_fun */ +static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)) +{ + MultiTimerData *timer = desc->mtd; + while(timer && timer->timeout_function != timeout_fun) { + timer = timer->next; + } + if (timer) { + remove_multi_timer(desc, port, timer); + } +} + +static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port, ErlDrvTermData caller, unsigned timeout, void (*timeout_fun)(ErlDrvData drv_data, ErlDrvTermData caller)) { MultiTimerData *mtd, *p, *s; - mtd = ALLOC(sizeof(MultiTimerData)); - mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1; + + /* Use cached timer if available */ + if (desc->mtd_cache != NULL) { + mtd = desc->mtd_cache; + desc->mtd_cache = NULL; + } else + mtd = ALLOC(sizeof(MultiTimerData)); + + if (timeout) + mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout); + else + mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */ + mtd->timeout_function = timeout_fun; mtd->caller = caller; mtd->next = mtd->prev = NULL; - for(p = *first,s = NULL; p != NULL; s = p, p = p->next) { + + /* Find correct slot in timer linked list */ + for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) { if (p->when >= mtd->when) { break; } } + /* Insert in linked list */ if (!p) { if (!s) { - *first = mtd; + desc->mtd = mtd; } else { s->next = mtd; mtd->prev = s; } } else { if (!s) { - *first = mtd; + desc->mtd = mtd; } else { s->next = mtd; mtd->prev = s; @@ -12897,10 +13092,8 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, mtd->next = p; p->prev = mtd; } + /* Possibly set new timer */ if (!s) { - if (mtd->next) { - driver_cancel_timer(port); - } driver_set_timer(port,timeout); } return mtd; diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c index a05d50b333..5fff55b467 100644 --- a/erts/emulator/nifs/common/prim_file_nif.c +++ b/erts/emulator/nifs/common/prim_file_nif.c @@ -38,6 +38,9 @@ static void unload(ErlNifEnv *env, void* priv_data); static ErlNifResourceType *efile_resource_type; +static ERL_NIF_TERM am_erts_prim_file; +static ERL_NIF_TERM am_close; + static ERL_NIF_TERM am_ok; static ERL_NIF_TERM am_error; static ERL_NIF_TERM am_continue; @@ -96,11 +99,14 @@ static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +/* Internal ops */ +static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM get_handle_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); -static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); - /* All file handle operations are passed through a wrapper that handles state * transitions, marking it as busy during the course of the operation, and * closing on completion if the owner died in the middle of an operation. @@ -128,7 +134,11 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[] * * CLOSE_PENDING -> * CLOSED (file_handle_wrapper) - */ + * + * Should the owner of a file die, we can't close it immediately as that could + * potentially block a normal scheduler. When entering the CLOSED state from + * owner_death_callback, we will instead send a message to the erts_prim_file + * process that will then close the file through delayed_close_nif. */ typedef ERL_NIF_TERM (*file_op_impl_t)(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); @@ -142,7 +152,6 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env return file_handle_wrapper( name ## _impl , env, argc, argv); \ } -WRAP_FILE_HANDLE_EXPORT(close_nif) WRAP_FILE_HANDLE_EXPORT(read_nif) WRAP_FILE_HANDLE_EXPORT(write_nif) WRAP_FILE_HANDLE_EXPORT(pread_nif) @@ -193,18 +202,27 @@ static ErlNifFunc nif_funcs[] = { /* Internal ops. */ {"get_handle_nif", 1, get_handle_nif}, + {"delayed_close_nif", 1, delayed_close_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"altname_nif", 1, altname_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, }; ERL_NIF_INIT(prim_file, nif_funcs, load, NULL, upgrade, unload) +static ErlNifPid erts_prim_file_pid; + static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon); -static void gc_callback(ErlNifEnv *env, void* data); -static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info) +static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM prim_file_pid) { ErlNifResourceTypeInit callbacks; + if(!enif_get_local_pid(env, prim_file_pid, &erts_prim_file_pid)) { + ASSERT(!"bad pid passed to prim_file_nif"); + } + + am_erts_prim_file = enif_make_atom(env, "erts_prim_file"); + am_close = enif_make_atom(env, "close"); + am_ok = enif_make_atom(env, "ok"); am_error = enif_make_atom(env, "error"); am_continue = enif_make_atom(env, "continue"); @@ -239,7 +257,7 @@ static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info) am_eof = enif_make_atom(env, "eof"); callbacks.down = owner_death_callback; - callbacks.dtor = gc_callback; + callbacks.dtor = NULL; callbacks.stop = NULL; efile_resource_type = enif_open_resource_type_x(env, "efile", &callbacks, @@ -305,8 +323,10 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env /* This is the only point where a change from CLOSE_PENDING is * possible, and we're running synchronously, so we can't race with * anything else here. */ + posix_errno_t ignored; + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); - efile_close(d); + efile_close(d, &ignored); } } else { /* CLOSE_PENDING should be impossible at this point since it requires @@ -319,6 +339,24 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env return result; } +/* This is a special close operation used by the erts_prim_file process for + * cleaning up orphaned files. It differs from the ordinary close_nif in that + * it only works for files that have already entered the CLOSED state. */ +static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t ignored; + efile_data_t *d; + + ASSERT(argc == 1); + if(!get_file_data(env, argv[0], &d)) { + return enif_make_badarg(env); + } + + ASSERT(erts_atomic32_read_acqb(&d->state) == EFILE_STATE_CLOSED); + efile_close(d, &ignored); + + return am_ok; +} + static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon) { efile_data_t *d = (efile_data_t*)obj; @@ -334,8 +372,24 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN switch(previous_state) { case EFILE_STATE_IDLE: - efile_close(d); - return; + { + /* We cannot close the file here as that could block a normal + * scheduler, so we tell erts_prim_file to do it for us. + * + * This can in turn become a bottleneck (especially in cases + * like NFS failure), but it's less problematic than blocking + * thread progress. */ + ERL_NIF_TERM message, file_ref; + + file_ref = enif_make_resource(env, d); + message = enif_make_tuple2(env, am_close, file_ref); + + if(!enif_send(env, &erts_prim_file_pid, NULL, message)) { + ERTS_INTERNAL_ERROR("Failed to defer prim_file close."); + } + + return; + } case EFILE_STATE_CLOSE_PENDING: case EFILE_STATE_CLOSED: /* We're either already closed or managed to mark ourselves for @@ -352,24 +406,6 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN } } -static void gc_callback(ErlNifEnv *env, void* data) { - efile_data_t *d = (efile_data_t*)data; - - enum efile_state_t previous_state; - - (void)env; - - previous_state = erts_atomic32_cmpxchg_acqb(&d->state, - EFILE_STATE_CLOSED, EFILE_STATE_IDLE); - - ASSERT(previous_state != EFILE_STATE_CLOSE_PENDING && - previous_state != EFILE_STATE_BUSY); - - if(previous_state == EFILE_STATE_IDLE) { - efile_close(d); - } -} - static ERL_NIF_TERM efile_filetype_to_atom(enum efile_filetype_t type) { switch(type) { case EFILE_FILETYPE_DEVICE: return am_device; @@ -454,40 +490,62 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[] return posix_error_to_tuple(env, posix_errno); } - result = enif_make_resource(env, d); - enif_release_resource(d); - enif_self(env, &controlling_process); if(enif_monitor_process(env, d, &controlling_process, &d->monitor)) { + /* We need to close the file manually as we haven't registered a + * destructor. */ + posix_errno_t ignored; + + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); + efile_close(d, &ignored); + return posix_error_to_tuple(env, EINVAL); } + /* Note that we do not call enif_release_resource at this point. While it's + * normally safe to leave resource management to the GC, efile_close is a + * blocking operation which must not be done in the GC callback, and we + * can't defer it as the resource is gone as soon as it returns. + * + * We instead keep the resource alive until efile_close is called, after + * which it's safe to leave things to the GC. If the controlling process + * were to die before the user had a chance to close their file, the above + * monitor will tell the erts_prim_file process to close it for them. */ + result = enif_make_resource(env, d); + return enif_make_tuple2(env, am_ok, result); } -static ERL_NIF_TERM close_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { +static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { enum efile_state_t previous_state; + efile_data_t *d; - if(argc != 0) { + ASSERT(argc == 1); + if(!get_file_data(env, argv[0], &d)) { return enif_make_badarg(env); } previous_state = erts_atomic32_cmpxchg_acqb(&d->state, - EFILE_STATE_CLOSED, EFILE_STATE_BUSY); + EFILE_STATE_CLOSED, EFILE_STATE_IDLE); - ASSERT(previous_state == EFILE_STATE_CLOSE_PENDING || - previous_state == EFILE_STATE_BUSY); + if(previous_state == EFILE_STATE_IDLE) { + posix_errno_t error; - if(previous_state == EFILE_STATE_BUSY) { enif_demonitor_process(env, d, &d->monitor); - if(!efile_close(d)) { - return posix_error_to_tuple(env, d->posix_errno); + if(!efile_close(d, &error)) { + return posix_error_to_tuple(env, error); } - } - return am_ok; + return am_ok; + } else { + /* CLOSE_PENDING should be impossible at this point since it requires + * a transition from BUSY; the only valid state here is CLOSED. */ + ASSERT(previous_state == EFILE_STATE_CLOSED); + + return posix_error_to_tuple(env, EINVAL); + } } static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { @@ -514,6 +572,7 @@ static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, con ASSERT(bytes_read <= block_size); if(bytes_read < 0) { + enif_release_binary(&result); return posix_error_to_tuple(env, d->posix_errno); } else if(bytes_read == 0) { enif_release_binary(&result); @@ -576,6 +635,7 @@ static ERL_NIF_TERM pread_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, co bytes_read = efile_preadv(d, offset, read_vec, 1); if(bytes_read < 0) { + enif_release_binary(&result); return posix_error_to_tuple(env, d->posix_errno); } else if(bytes_read == 0) { enif_release_binary(&result); @@ -802,6 +862,7 @@ static ERL_NIF_TERM ipread_s32bu_p32bu_nif_impl(efile_data_t *d, ErlNifEnv *env, bytes_read = efile_preadv(d, payload_offset, read_vec, 1); if(bytes_read < 0) { + enif_release_binary(&payload); return posix_error_to_tuple(env, d->posix_errno); } else if(bytes_read == 0) { enif_release_binary(&payload); @@ -872,7 +933,7 @@ static ERL_NIF_TERM set_permissions_nif(ErlNifEnv *env, int argc, const ERL_NIF_ posix_errno_t posix_errno; efile_path_t path; - Uint32 permissions; + unsigned int permissions; if(argc != 2 || !enif_get_uint(env, argv[1], &permissions)) { return enif_make_badarg(env); @@ -891,7 +952,7 @@ static ERL_NIF_TERM set_owner_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a posix_errno_t posix_errno; efile_path_t path; - Sint32 uid, gid; + int uid, gid; if(argc != 3 || !enif_get_int(env, argv[1], &uid) || !enif_get_int(env, argv[2], &gid)) { @@ -1187,7 +1248,7 @@ static posix_errno_t read_file(efile_data_t *d, size_t size, ErlNifBinary *resul } static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { - posix_errno_t posix_errno; + posix_errno_t posix_errno, ignored; efile_fileinfo_t info = {0}; efile_path_t path; @@ -1208,7 +1269,9 @@ static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a } posix_errno = read_file(d, info.size, &result); - enif_release_resource(d); + + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); + efile_close(d, &ignored); if(posix_errno) { return posix_error_to_tuple(env, posix_errno); diff --git a/erts/emulator/nifs/common/prim_file_nif.h b/erts/emulator/nifs/common/prim_file_nif.h index 099c06c48c..b2e30c59dd 100644 --- a/erts/emulator/nifs/common/prim_file_nif.h +++ b/erts/emulator/nifs/common/prim_file_nif.h @@ -159,8 +159,11 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, ErlNifResourceType *nif_type, efile_data_t **d); /** @brief Closes a file. The file must have entered the CLOSED state prior to - * calling this to prevent double close. */ -int efile_close(efile_data_t *d); + * calling this to prevent double close. + * + * Note that the file is completely invalid after this point, so the error code + * is provided in \c error rather than d->posix_errno. */ +int efile_close(efile_data_t *d, posix_errno_t *error); /* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */ diff --git a/erts/emulator/nifs/unix/unix_prim_file.c b/erts/emulator/nifs/unix/unix_prim_file.c index dea73db18a..169b193993 100644 --- a/erts/emulator/nifs/unix/unix_prim_file.c +++ b/erts/emulator/nifs/unix/unix_prim_file.c @@ -202,21 +202,24 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, return errno; } -int efile_close(efile_data_t *d) { +int efile_close(efile_data_t *d, posix_errno_t *error) { efile_unix_t *u = (efile_unix_t*)d; int fd; + ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER); ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); ASSERT(u->fd != -1); fd = u->fd; u->fd = -1; + enif_release_resource(d); + /* close(2) either always closes (*BSD, Linux) or leaves the fd in an * undefined state (POSIX 2008, Solaris), so we must not retry on EINTR. */ if(close(fd) < 0) { - u->common.posix_errno = errno; + *error = errno; return 0; } diff --git a/erts/emulator/nifs/win32/win_prim_file.c b/erts/emulator/nifs/win32/win_prim_file.c index f7fae3c637..d0aa70542f 100644 --- a/erts/emulator/nifs/win32/win_prim_file.c +++ b/erts/emulator/nifs/win32/win_prim_file.c @@ -33,16 +33,32 @@ #define FILE_SHARE_FLAGS (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE) -#define LP_PREFIX L"\\\\?\\" -#define LP_PREFIX_SIZE (sizeof(LP_PREFIX) - sizeof(WCHAR)) +/* Long paths can either be in the file (?) or the device (.) namespace. UNC + * paths are always in the file namespace. */ +#define LP_FILE_PREFIX L"\\\\?\\" +#define LP_DEV_PREFIX L"\\\\.\\" +#define LP_UNC_PREFIX (LP_FILE_PREFIX L"UNC\\") + +#define LP_PREFIX_SIZE (sizeof(LP_FILE_PREFIX) - sizeof(WCHAR)) #define LP_PREFIX_LENGTH (LP_PREFIX_SIZE / sizeof(WCHAR)) +#define LP_UNC_PREFIX_SIZE (sizeof(LP_UNC_PREFIX) - sizeof(WCHAR)) +#define LP_UNC_PREFIX_LENGTH (LP_UNC_PREFIX_SIZE / sizeof(WCHAR)) + +#define IS_LONG_PATH(length, data) \ + ((length) >= LP_PREFIX_LENGTH && \ + (!sys_memcmp((data), LP_FILE_PREFIX, LP_PREFIX_SIZE) || \ + !sys_memcmp((data), LP_DEV_PREFIX, LP_PREFIX_SIZE))) + +#define IS_LONG_UNC_PATH(length, data) \ + ((length) >= LP_UNC_PREFIX_LENGTH && \ + !sys_memcmp((data), LP_UNC_PREFIX, LP_UNC_PREFIX_SIZE)) + #define PATH_LENGTH(path) (path->size / sizeof(WCHAR) - 1) #define ASSERT_PATH_FORMAT(path) \ do { \ - ASSERT(PATH_LENGTH(path) >= 4 && \ - !memcmp(path->data, LP_PREFIX, LP_PREFIX_SIZE)); \ + ASSERT(IS_LONG_PATH(PATH_LENGTH(path), (path)->data)); \ ASSERT(PATH_LENGTH(path) == wcslen((WCHAR*)path->data)); \ } while(0) @@ -106,7 +122,7 @@ static posix_errno_t get_full_path(ErlNifEnv *env, WCHAR *input, efile_path_t *r return ENOENT; } - maximum_length += LP_PREFIX_LENGTH; + maximum_length += MAX(LP_PREFIX_LENGTH, LP_UNC_PREFIX_LENGTH); if(!enif_alloc_binary(maximum_length * sizeof(WCHAR), result)) { return ENOMEM; @@ -115,18 +131,28 @@ static posix_errno_t get_full_path(ErlNifEnv *env, WCHAR *input, efile_path_t *r actual_length = GetFullPathNameW(input, maximum_length, (WCHAR*)result->data, NULL); if(actual_length < maximum_length) { - int has_long_path_prefix; + int is_long_path, maybe_unc_path; WCHAR *path_start; - /* Make sure we have a long-path prefix; GetFullPathNameW only adds one - * if the path is relative. */ - has_long_path_prefix = actual_length >= LP_PREFIX_LENGTH && - !sys_memcmp(result->data, LP_PREFIX, LP_PREFIX_SIZE); - - if(!has_long_path_prefix) { + /* The APIs we use have varying path length limits and sometimes + * behave differently when given a long-path prefix, so it's simplest + * to always use long paths. */ + + is_long_path = IS_LONG_PATH(actual_length, result->data); + maybe_unc_path = !sys_memcmp(result->data, L"\\\\", sizeof(WCHAR) * 2); + + if(maybe_unc_path && !is_long_path) { + /* \\localhost\c$\gurka -> \\?\UNC\localhost\c$\gurka */ + sys_memmove(result->data + LP_UNC_PREFIX_SIZE, + &((WCHAR*)result->data)[2], + (actual_length - 1) * sizeof(WCHAR)); + sys_memcpy(result->data, LP_UNC_PREFIX, LP_UNC_PREFIX_SIZE); + actual_length += LP_UNC_PREFIX_LENGTH; + } else if(!is_long_path) { + /* C:\gurka -> \\?\C:\gurka */ sys_memmove(result->data + LP_PREFIX_SIZE, result->data, (actual_length + 1) * sizeof(WCHAR)); - sys_memcpy(result->data, LP_PREFIX, LP_PREFIX_SIZE); + sys_memcpy(result->data, LP_FILE_PREFIX, LP_PREFIX_SIZE); actual_length += LP_PREFIX_LENGTH; } @@ -200,13 +226,19 @@ static int normalize_path_result(ErlNifBinary *path) { ASSERT(length < path->size / sizeof(WCHAR)); /* Get rid of the long-path prefix, if present. */ - if(length >= LP_PREFIX_LENGTH) { - if(!sys_memcmp(path_start, LP_PREFIX, LP_PREFIX_SIZE)) { - length -= LP_PREFIX_LENGTH; - sys_memmove(path_start, &path_start[LP_PREFIX_LENGTH], - length * sizeof(WCHAR)); - } + if(IS_LONG_UNC_PATH(length, path_start)) { + /* The first two characters (\\) are the same for both long and short + * UNC paths. */ + sys_memmove(&path_start[2], &path_start[LP_UNC_PREFIX_LENGTH], + (length - LP_UNC_PREFIX_LENGTH) * sizeof(WCHAR)); + + length -= LP_UNC_PREFIX_LENGTH - 2; + } else if(IS_LONG_PATH(length, path_start)) { + length -= LP_PREFIX_LENGTH; + + sys_memmove(path_start, &path_start[LP_PREFIX_LENGTH], + length * sizeof(WCHAR)); } path_end = &path_start[length]; @@ -318,49 +350,55 @@ static int has_same_mount_point(const efile_path_t *path_a, const efile_path_t * /* Mirrors the PathIsRootW function of the shell API, but doesn't choke on * paths longer than MAX_PATH. */ static int is_path_root(const efile_path_t *path) { - const WCHAR *path_start, *path_end; + const WCHAR *path_start, *path_end, *path_iterator; int length; ASSERT_PATH_FORMAT(path); - path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH; - length = PATH_LENGTH(path) - LP_PREFIX_LENGTH; - - path_end = &path_start[length]; + if(!IS_LONG_UNC_PATH(PATH_LENGTH(path), path->data)) { + path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH; + length = PATH_LENGTH(path) - LP_PREFIX_LENGTH; - if(length == 1) { /* A single \ refers to the root of the current working directory. */ - return IS_SLASH(path_start[0]); - } else if(length == 3 && iswalpha(path_start[0]) && path_start[1] == L':') { - /* Drive letter. */ - return IS_SLASH(path_start[2]); - } else if(length >= 4) { - /* Check whether we're a UNC root, eg. \\server, \\server\share */ - const WCHAR *path_iterator; + if(length == 1) { + return IS_SLASH(path_start[0]); + } - if(!IS_SLASH(path_start[0]) || !IS_SLASH(path_start[1])) { - return 0; + /* Drive letter. */ + if(length == 3 && iswalpha(path_start[0]) && path_start[1] == L':') { + return IS_SLASH(path_start[2]); } - path_iterator = path_start + 2; + return 0; + } - /* Slide to the slash between the server and share names, if present. */ - while(path_iterator < path_end && !IS_SLASH(*path_iterator)) { - path_iterator++; - } + /* Check whether we're a UNC root, eg. \\server, \\server\share */ - /* Slide past the end of the string, stopping at the first slash we - * encounter. */ - do { - path_iterator++; - } while(path_iterator < path_end && !IS_SLASH(*path_iterator)); + path_start = (WCHAR*)path->data + LP_UNC_PREFIX_LENGTH; + length = PATH_LENGTH(path) - LP_UNC_PREFIX_LENGTH; - /* If we're past the end of the string and it didnt't end with a slash, - * then we're a root path. */ - return path_iterator >= path_end && !IS_SLASH(path_start[length - 1]); + path_end = &path_start[length]; + path_iterator = path_start; + + /* Server name must be at least one character. */ + if(length <= 1) { + return 0; } - return 0; + /* Slide to the slash between the server and share names, if present. */ + while(path_iterator < path_end && !IS_SLASH(*path_iterator)) { + path_iterator++; + } + + /* Slide past the end of the string, stopping at the first slash we + * encounter. */ + do { + path_iterator++; + } while(path_iterator < path_end && !IS_SLASH(*path_iterator)); + + /* If we're past the end of the string and it didnt't end with a slash, + * then we're a root path. */ + return path_iterator >= path_end && !IS_SLASH(path_start[length - 1]); } posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, @@ -428,18 +466,21 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, } } -int efile_close(efile_data_t *d) { +int efile_close(efile_data_t *d, posix_errno_t *error) { efile_win_t *w = (efile_win_t*)d; HANDLE handle; + ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER); ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); ASSERT(w->handle != INVALID_HANDLE_VALUE); handle = w->handle; w->handle = INVALID_HANDLE_VALUE; + enif_release_resource(d); + if(!CloseHandle(handle)) { - w->common.posix_errno = windows_to_posix_errno(GetLastError()); + *error = windows_to_posix_errno(GetLastError()); return 0; } @@ -687,7 +728,7 @@ static int is_name_surrogate(const efile_path_t *path) { if(handle != INVALID_HANDLE_VALUE) { REPARSE_GUID_DATA_BUFFER reparse_buffer; - LPDWORD unused_length; + DWORD unused_length; BOOL success; success = DeviceIoControl(handle, @@ -1248,11 +1289,22 @@ posix_errno_t efile_set_cwd(const efile_path_t *path) { /* We have to use _wchdir since that's the only function that updates the * per-drive working directory, but it naively assumes that all paths - * starting with \\ are UNC paths, so we have to skip the \\?\-prefix. */ - path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH; + * starting with \\ are UNC paths, so we have to skip the long-path prefix. + * + * _wchdir doesn't handle long-prefixed UNC paths either so we hand those + * to SetCurrentDirectoryW instead. The per-drive working directory is + * irrelevant for such paths anyway. */ - if(_wchdir(path_start)) { - return windows_to_posix_errno(GetLastError()); + if(!IS_LONG_UNC_PATH(PATH_LENGTH(path), path->data)) { + path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH; + + if(_wchdir(path_start)) { + return windows_to_posix_errno(GetLastError()); + } + } else { + if(!SetCurrentDirectoryW((WCHAR*)path->data)) { + return windows_to_posix_errno(GetLastError()); + } } return 0; @@ -1333,7 +1385,7 @@ posix_errno_t efile_altname(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TE int name_length; /* Reject path wildcards. */ - if(wcspbrk(&((const WCHAR*)path->data)[4], L"?*")) { + if(wcspbrk(&((const WCHAR*)path->data)[LP_PREFIX_LENGTH], L"?*")) { return ENOENT; } diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 9f115706dc..c681fa481f 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -46,11 +46,11 @@ #if 0 #define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__) #define DEBUG_PRINT_FD(FMT, STATE, ...) \ - DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \ + DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \ (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \ ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \ ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \ - (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR) + (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR) #define DEBUG_PRINT_MODE #else #define DEBUG_PRINT(...) @@ -76,22 +76,40 @@ typedef enum { typedef enum { ERTS_EV_FLAG_CLEAR = 0, ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */ -#ifdef ERTS_ENABLE_KERNEL_POLL - ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd +#if ERTS_POLL_USE_SCHEDULER_POLLING + ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated + to scheduler pollset */ + ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in + scheduler pollset */ +#else + ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR, + ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR, +#endif +#ifdef ERTS_POLL_USE_FALLBACK + ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd and it was put in the nkp version */ #else ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR, #endif /* Combinations */ - ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK + ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK, + ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER, + ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER } EventStateFlags; #define flag2str(flags) \ ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \ ((flags) == ERTS_EV_FLAG_USED ? "USED" : \ ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \ - ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR")))) + ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \ + ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \ + "ERROR")))))))) /* How many events that can be handled at once by one erts_poll_wait call */ #define ERTS_CHECK_IO_POLL_RES_LEN 512 @@ -105,6 +123,7 @@ typedef struct erts_poll_thread { ErtsPollSet *ps; ErtsPollResFd *pollres; + ErtsThrPrgrData *tpd; int pollres_len; } ErtsPollThread; @@ -112,10 +131,13 @@ typedef struct erts_poll_thread * Which pollset to use is determined by hashing the fd. */ static ErtsPollSet **pollsetv; +static ErtsPollThread *psiv; #if ERTS_POLL_USE_FALLBACK static ErtsPollSet *flbk_pollset; #endif -static ErtsPollThread *psiv; +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ErtsPollSet *sched_pollset; +#endif typedef struct { #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS @@ -130,10 +152,12 @@ typedef struct { ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */ } stop; } driver; - ErtsPollEvents events; /* The events that have been selected upon */ + ErtsPollEvents events; /* The events that have been selected upon */ ErtsPollEvents active_events; /* The events currently active in the pollset */ EventStateType type; EventStateFlags flags; + int count; /* Number of times this fd has triggered + without being deselected. */ } ErtsDrvEventState; struct drv_ev_state_shared { @@ -370,12 +394,22 @@ get_pollset(ErtsSysFdType fd) #if ERTS_POLL_USE_FALLBACK static ERTS_INLINE ErtsPollSet * -get_fallback(void) +get_fallback_pollset(void) { return flbk_pollset; } #endif +static ERTS_INLINE ErtsPollSet * +get_scheduler_pollset(ErtsSysFdType fd) +{ +#if ERTS_POLL_USE_SCHEDULER_POLLING + return sched_pollset; +#else + return get_pollset(fd); +#endif +} + /* * Place a fd within a pollset. This will automatically use * the fallback ps if needed. @@ -391,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op, ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); if (!(flags & ERTS_EV_FLAG_FALLBACK)) { - res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + + if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) { + erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) { + res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + } else { + res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + } #if ERTS_POLL_USE_FALLBACK if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) { /* When an add fails with NVAL, the poll/kevent operation could not put that fd in the pollset, so we instead put it into a fallback pollset */ state->flags |= ERTS_EV_FLAG_FALLBACK; - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); } } else { ASSERT(op != ERTS_POLL_OP_ADD); - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); #endif } @@ -425,59 +468,77 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task); ErtsSysFdType fd = itp->fd; erts_mtx_t *mtx = fd_mtx(fd); - int active_events; + ErtsPollOp op = ERTS_POLL_OP_MOD; + int active_events, new_events = 0; ErtsDrvEventState *state; ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO); + erts_mtx_lock(mtx); state = get_drv_ev_state(fd); + reset_handle(pthp); + active_events = state->active_events; - switch (type) { - case ERTS_PORT_TASK_INPUT: + if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) { + switch (type) { + case ERTS_PORT_TASK_INPUT: + + DEBUG_PRINT_FD("executed ready_input", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); + if (state->events & ERTS_POLL_EV_IN) { + active_events |= ERTS_POLL_EV_IN; + if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) { + if (!(state->flags & ERTS_EV_FLAG_SCHEDULER)) + op = ERTS_POLL_OP_ADD; + state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER; + new_events = ERTS_POLL_EV_IN; + DEBUG_PRINT_FD("moving to scheduler ps", state); + } else + new_events = active_events; + if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING) + state->count++; + } + break; + case ERTS_PORT_TASK_OUTPUT: - DEBUG_PRINT_FD("executed ready_input", state); + DEBUG_PRINT_FD("executed ready_output", state); - ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); - if (state->events & ERTS_POLL_EV_IN) - active_events |= ERTS_POLL_EV_IN; - break; - case ERTS_PORT_TASK_OUTPUT: + ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); + if (state->events & ERTS_POLL_EV_OUT) { + active_events |= ERTS_POLL_EV_OUT; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN) + new_events = ERTS_POLL_EV_OUT; + else + new_events = active_events; + } + break; + default: + erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); + break; + } - DEBUG_PRINT_FD("executed ready_output", state); + if (state->active_events != active_events && new_events) { + state->active_events = active_events; + new_events = erts_io_control(state, op, new_events); + } - ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); - if (state->events & ERTS_POLL_EV_OUT) - active_events |= ERTS_POLL_EV_OUT; - break; - default: - erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); - break; + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } } - reset_handle(pthp); - - if (active_events) { - /* This is not needed if active_events has not changed */ - if (state->active_events != active_events) { - ErtsPollEvents new_events; - state->active_events = active_events; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events); - - /* We were unable to re-insert the fd into the pollset, signal the callback. */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (active_events & ERTS_POLL_EV_IN) - iready(state->driver.select->inport, state); - if (active_events & ERTS_POLL_EV_OUT) - oready(state->driver.select->outport, state); - state->active_events = 0; - } - } - } else { + if (!active_events) check_fd_cleanup(state, &free_select, &free_nif); - } erts_mtx_unlock(mtx); @@ -485,6 +546,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, free_drv_select_data(free_select); if (free_nif) free_nif_select_data(free_nif); + + ERTS_MSACC_POP_STATE_M_X(); } static ERTS_INLINE void @@ -755,11 +818,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) { ctl_op = ERTS_POLL_OP_ADD; } + new_events = state->active_events; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + new_events &= ~ERTS_POLL_EV_IN; } else { ctl_events &= old_events; state->events &= ~ctl_events; state->active_events &= ~ctl_events; + new_events = state->active_events; + + if (ctl_events & ERTS_POLL_EV_IN) { + state->count = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + new_events = 0; + } + } if (!state->events) { if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE) @@ -770,7 +844,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { new_events = erts_io_control_wakeup(state, ctl_op, - state->active_events, + new_events, &wake_poller); ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE); @@ -802,6 +876,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events & ERTS_POLL_EV_IN) { abort_tasks(state, ERL_DRV_READ); state->driver.select->inport = NIL; + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; } if (ctl_events & ERTS_POLL_EV_OUT) { abort_tasks(state, ERL_DRV_WRITE); @@ -810,6 +885,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (state->events == 0) { if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { state->type = ERTS_EV_TYPE_NONE; + if (state->flags & ERTS_EV_FLAG_SCHEDULER) + erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP); state->flags = 0; } /*else keep it, as fd will probably be selected upon again */ @@ -1426,7 +1503,8 @@ iready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_INPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) { stale_drv_select(id, state, ERL_DRV_READ); } else { DEBUG_PRINT_FD("schedule ready_input(%T, %d)", @@ -1444,7 +1522,8 @@ oready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_OUTPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + 0) != 0) { stale_drv_select(id, state, ERL_DRV_WRITE); } else { DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd); @@ -1506,7 +1585,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) { if (psi) { #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { erts_poll_interrupt_flbk(psi->ps, set); return; } @@ -1516,12 +1595,13 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) } ErtsPollThread * -erts_create_pollset_thread(int id) { +erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) { + psiv[id].tpd = tpd; return psiv+id; } void -erts_check_io(ErtsPollThread *psi) +erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time) { int pollres_len; int poll_ret, i; @@ -1536,14 +1616,14 @@ erts_check_io(ErtsPollThread *psi) pollres_len = psi->pollres_len; #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { - poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } else #endif { - poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } #ifdef ERTS_ENABLE_LOCK_CHECK @@ -1579,7 +1659,12 @@ erts_check_io(ErtsPollThread *psi) ErtsNifSelectDataState *free_nif = NULL; ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]); ErtsDrvEventState *state; - ErtsPollEvents revents; + ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); + + /* The fd will be set to -1 if a pollset internal fd was triggered + that was determined to be too expensive to remove from the result. + */ + if (fd == -1) continue; erts_mtx_lock(fd_mtx(fd)); @@ -1590,8 +1675,6 @@ erts_check_io(ErtsPollThread *psi) continue; } - revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); - DEBUG_PRINT_FD("triggered %s", state, ev2str(revents)); if (revents & ERTS_POLL_EV_ERR) { @@ -1603,25 +1686,39 @@ erts_check_io(ErtsPollThread *psi) */ revents = state->active_events; state->active_events = 0; + + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + erts_io_control(state, ERTS_POLL_OP_MOD, 0); + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } } else { /* Disregard any events that are not active at the moment, for instance this could happen if the driver/nif does select/deselect in rapid succession. */ revents &= state->active_events | ERTS_POLL_EV_NVAL; - state->active_events &= ~revents; - /* Reactivate the poll op if there are still active events */ - if (state->active_events) { - ErtsPollEvents new_events; - DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events)); + if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) { + ErtsPollEvents reactive_events; + state->active_events &= ~revents; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + reactive_events = state->active_events; - /* Unable to re-enable the fd, signal all callbacks */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - revents |= state->active_events; - state->active_events = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + reactive_events &= ~ERTS_POLL_EV_IN; + + /* Reactivate the poll op if there are still active events */ + if (reactive_events) { + ErtsPollEvents new_events; + DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events)); + + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events); + + /* Unable to re-enable the fd, signal all callbacks */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + revents |= reactive_events; + state->active_events &= ~reactive_events; + } } } } @@ -1697,7 +1794,7 @@ erts_check_io(ErtsPollThread *psi) case ERTS_EV_TYPE_STOP_USE: { #if ERTS_POLL_USE_FALLBACK - ASSERT(psi->ps == get_fallback()); + ASSERT(psi->ps == get_fallback_pollset()); #endif drv_ptr = state->driver.stop.drv_ptr; state->type = ERTS_EV_TYPE_NONE; @@ -2035,12 +2132,17 @@ erts_init_check_io(int *argc, char **argv) for (j=0; j < erts_no_pollsets; j++) pollsetv[j] = erts_poll_create_pollset(j); -#if ERTS_POLL_USE_FALLBACK - flbk_pollset = erts_poll_create_pollset_flbk(-1); + no_poll_threads = erts_no_poll_threads; + + j = -1; + +#if ERTS_POLL_USE_SCHEDULER_POLLING + sched_pollset = erts_poll_create_pollset(j--); + no_poll_threads++; #endif - no_poll_threads = erts_no_poll_threads; #if ERTS_POLL_USE_FALLBACK + flbk_pollset = erts_poll_create_pollset_flbk(j--); no_poll_threads++; #endif @@ -2050,7 +2152,15 @@ erts_init_check_io(int *argc, char **argv) psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); - psiv[0].ps = get_fallback(); + psiv[0].ps = get_fallback_pollset(); + psiv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_scheduler_pollset(0); psiv++; #endif @@ -2107,7 +2217,12 @@ erts_check_io_size(void) int i; #if ERTS_POLL_USE_FALLBACK - erts_poll_info(get_fallback(), &pi); + erts_poll_info(get_fallback_pollset(), &pi); + res += pi.memory_size; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &pi); res += pi.memory_size; #endif @@ -2139,13 +2254,21 @@ erts_check_io_info(void *proc) Uint sz, *szp, *hp, **hpp; ErtsPollInfo *piv; Sint i, j = 0, len; - int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING; ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); + ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1); piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); #if ERTS_POLL_USE_FALLBACK - erts_poll_info_flbk(get_fallback(), &piv[0]); + erts_poll_info_flbk(get_fallback_pollset(), &piv[0]); + piv[0].poll_threads = 1; + piv[0].active_fds = 0; + piv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &piv[0]); piv[0].poll_threads = 1; piv[0].active_fds = 0; piv++; @@ -2199,6 +2322,7 @@ erts_check_io_info(void *proc) sz = 0; piv -= ERTS_POLL_USE_FALLBACK; + piv -= ERTS_POLL_USE_SCHEDULER_POLLING; bld_it: @@ -2303,15 +2427,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev) static ERTS_INLINE void print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f) { - const char* delim = ""; - if(f & ERTS_EV_FLAG_USED) { - erts_dsprintf(dsbufp, "%s","USED"); - delim = "|"; - } - if(f & ERTS_EV_FLAG_FALLBACK) { - erts_dsprintf(dsbufp, "%s%s", delim, "FLBK"); - delim = "|"; - } + erts_dsprintf(dsbufp, "%s", flag2str(f)); } #ifdef DEBUG_PRINT_MODE @@ -2653,13 +2769,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) #if ERTS_POLL_USE_FALLBACK erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n"); - erts_poll_get_selected_events_flbk(get_fallback(), counters.epep, + erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep, drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); } #endif +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); + erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { + if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) + counters.epep[fd] &= ~ERTS_POLL_EV_OUT; + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } +#endif + erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); for (i = 0; i < erts_no_pollsets; i++) { @@ -2668,8 +2797,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) - && get_pollset_id(fd) == i) + && get_pollset_id(fd) == i) { + if (counters.epep[fd] != ERTS_POLL_EV_NONE && + drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) { + /* We add the in flag if it is enabled in the scheduler pollset + and get_selected_events works on the platform */ + counters.epep[fd] |= ERTS_POLL_EV_IN; + } doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } } } for (fd = len ; fd < drv_ev_state.max_fds; fd++) { @@ -2716,7 +2852,7 @@ void erts_lcnt_update_cio_locks(int enable) { #endif #if ERTS_POLL_USE_FALLBACK - erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable); + erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable); #endif for (i = 0; i < erts_no_pollsets; i++) diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index 443ef1264c..31182be5ec 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -68,7 +68,7 @@ int erts_check_io_max_files(void); * * @param pt the poll thread structure to use. */ -void erts_check_io(struct erts_poll_thread *pt); +void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time); /** * Initialize the check io framework. This function will parse the arguments * and delete any entries that it is interested in. @@ -90,8 +90,11 @@ void erts_check_io_interrupt(struct erts_poll_thread *pt, int set); /** * Create a new poll thread structure that is associated with the number no. * It is the callers responsibility that no is unique. + * + * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler + * @param tpd the thread progress data of the pollset thread */ -struct erts_poll_thread* erts_create_pollset_thread(int no); +struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd); #ifdef ERTS_ENABLE_LOCK_COUNT /** * Toggle lock counting on all check io locks @@ -126,16 +129,6 @@ extern int erts_no_poll_threads; #include "erl_poll.h" #include "erl_port_task.h" -#ifdef __WIN32__ -/* - * Current erts_poll implementation for Windows cannot handle - * active events in the set of events polled. - */ -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#else -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#endif - typedef struct { Eterm inport; Eterm outport; @@ -147,10 +140,6 @@ struct erts_nif_select_event { Eterm pid; Eterm immed; Uint32 refn[ERTS_REF_NUMBERS]; - Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress - * 1: Do deselect before next poll - * >1: Countdown of ignored events - */ }; typedef struct { diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index b4d1575ee5..51d50933ff 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -75,6 +75,7 @@ # define WANT_NONBLOCKING #endif +#include "erl_thr_progress.h" #include "erl_poll.h" #if ERTS_POLL_USE_KQUEUE # include <sys/types.h> @@ -95,7 +96,6 @@ # include <limits.h> # endif #endif -#include "erl_thr_progress.h" #include "erl_driver.h" #include "erl_alloc.h" #include "erl_msacc.h" @@ -121,7 +121,8 @@ /* Define to print info about modifications done to each fd */ #define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__) /* Define to print entry and exit from erts_poll_wait (can be very spammy) */ -//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0) #else #define ERTS_POLL_DEBUG_PRINT 0 @@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, #define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE) -#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE) +#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0) #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) { #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; + int oneshot; #endif /* ERTS_POLL_USE_KERNEL_POLL */ #if ERTS_POLL_USE_POLL @@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) { ErtsPollSetUpdateRequestsBlock *curr_upd_req_block; erts_atomic32_t have_update_requests; erts_mtx_t mtx; - erts_atomic32_t wakeup_state; +#else + int do_wakeup; #endif -#if ERTS_POLL_USE_WAKEUP_PIPE - int wake_fds[2]; +#if ERTS_POLL_USE_TIMERFD + int timer_fd; #endif + ErtsMonotonicTime timeout_time; + erts_atomic32_t wakeup_state; + int wake_fds[2]; }; void erts_silence_warn_unused_result(long unused); @@ -365,63 +371,47 @@ static void print_misc_debug_info(void); uint32_t epoll_events(int kp_fd, int fd); #endif - #define ERTS_POLL_NOT_WOKEN 0 #define ERTS_POLL_WOKEN -1 #define ERTS_POLL_WOKEN_INTR 1 -#if !ERTS_POLL_USE_CONCURRENT_UPDATE static ERTS_INLINE void reset_wakeup_state(ErtsPollSet *ps) { erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); } -#endif static ERTS_INLINE int is_woken(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN; -#else - return 0; -#endif } static ERTS_INLINE int is_interrupted_reset(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN) == ERTS_POLL_WOKEN_INTR); -#else - return 0; -#endif } static ERTS_INLINE void woke_up(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); if (wakeup_state == ERTS_POLL_NOT_WOKEN) (void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, ERTS_POLL_WOKEN, ERTS_POLL_NOT_WOKEN); ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN); -#endif } /* * --- Wakeup pipe ----------------------------------------------------------- */ -#if ERTS_POLL_USE_WAKEUP_PIPE - static ERTS_INLINE void wake_poller(ErtsPollSet *ps, int interrupted) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE int wake; erts_aint32_t wakeup_state; if (!interrupted) @@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted) wake = wakeup_state == ERTS_POLL_NOT_WOKEN; if (wake) -#endif { ssize_t res; + DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted); if (ps->wake_fds[1] < 0) return; /* Not initialized yet */ do { @@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps) fd, erl_errno_id(errno), errno); } -#if !ERTS_POLL_USE_CONCURRENT_UPDATE if (intr) erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR); -#endif } static void @@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps) ps->wake_fds[1] = wake_fds[1]; } +/* + * --- timer fd ----------------------------------------------------------- + */ + +#if ERTS_POLL_USE_TIMERFD + +/* We use the timerfd when using epoll_wait to get high accuracy + timeouts, i.e. we want to sleep with < ms accuracy. */ + +static void +create_timerfd(ErtsPollSet *ps) +{ + int do_wake = 0; + int timer_fd = timerfd_create(CLOCK_MONOTONIC,0); + ERTS_POLL_EXPORT(erts_poll_control)(ps, + timer_fd, + ERTS_POLL_OP_ADD, + ERTS_POLL_EV_IN, + &do_wake); + if (ps->internal_fd_limit <= timer_fd) + ps->internal_fd_limit = timer_fd + 1; + ps->timer_fd = timer_fd; +} + +static ERTS_INLINE void +timerfd_set(ErtsPollSet *ps, struct itimerspec *its) +{ +#ifdef DEBUG + struct itimerspec old_its; + int res; + res = timerfd_settime(ps->timer_fd, 0, its, &old_its); + ASSERT(res == 0); + ASSERT(old_its.it_interval.tv_sec == 0 && + old_its.it_interval.tv_nsec == 0 && + old_its.it_value.tv_sec == 0 && + old_its.it_value.tv_nsec == 0); + +#else + timerfd_settime(ps->timer_fd, 0, its, NULL); #endif +} + +static ERTS_INLINE int +timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) { + + struct itimerspec its; + /* we always have to clear the timer */ + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 0; + timerfd_settime(ps->timer_fd, 0, &its, NULL); + + /* only timeout fd triggered */ + if (res == 1 && pr[0].data.fd == ps->timer_fd) + return 0; + + return res; +} + +#endif /* ERTS_POLL_USE_TIMERFD */ /* * --- Poll set update requests ---------------------------------------------- @@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) struct epoll_event epe_templ; struct epoll_event epe; - epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT; + epe_templ.events = ERTS_POLL_EV_E2N(events); epe_templ.data.fd = fd; + if (ps->oneshot) + epe_templ.events |= EPOLLONESHOT; + #ifdef VALGRIND /* Silence invalid valgrind warning ... */ memset((void *) &epe.data, 0, sizeof(epoll_data_t)); @@ -802,6 +853,7 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) int res = 0, len = 0; struct kevent evts[2]; struct timespec ts = {0, 0}; + uint32_t oneshot = 0; if (op == ERTS_POLL_OP_ADD) { /* This is a hack to make the "noshell" option work; kqueue can poll @@ -840,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) man page), but it seems to be the way it works... */ + if (ps->oneshot) + oneshot = EV_DISPATCH; + if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); /* We could probably skip this delete, do we want to? */ @@ -849,27 +904,29 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) uint32_t flags; erts_atomic_inc_nob(&ps->no_of_user_fds); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } else { uint32_t flags; ASSERT(op == ERTS_POLL_OP_MOD); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } #else - uint32_t flags = EV_ADD|EV_ONESHOT; + uint32_t flags = EV_ADD; + + if (ps->oneshot) flags |= EV_ONESHOT; if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); @@ -903,14 +960,17 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd); for (i = 0; i < len; i++) { const char *flags = "UNKNOWN"; - if (evts[i].flags == EV_DELETE) flags = "EV_DELETE"; + if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE"; if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT"; + if (evts[i].flags == (EV_ADD)) flags = "EV_ADD"; #ifdef EV_DISPATCH if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH"; if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE"; if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH"; - if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE"; + if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; #endif keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "", @@ -1273,11 +1333,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op, goto done; } #endif -#if ERTS_POLL_USE_WAKEUP_PIPE if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) { new_events = ERTS_POLL_EV_NVAL; goto done; } +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } #endif } @@ -1333,11 +1397,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, ERTS_POLLSET_UNLOCK(ps); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (*do_wake) { + if (*do_wake) wake_poller(ps, 0); - } -#endif return res; } @@ -1351,52 +1412,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, static ERTS_INLINE int ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE int n = chk_fds_res < max_res ? chk_fds_res : max_res, i; int res = n; -#if ERTS_POLL_USE_WAKEUP_PIPE int wake_fd = ps->wake_fds[0]; -#endif - for (i = 0; i < n; i++) { - int fd = ERTS_POLL_RES_GET_FD(&pr[i]); -#ifdef DEBUG_PRINT_MODE - ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); -#endif + if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) { + + for (i = 0; i < n; i++) { + int fd = ERTS_POLL_RES_GET_FD(&pr[i]); +#if ERTS_POLL_DEBUG_PRINT + ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); - DEBUG_PRINT_FD("trig %s (%s)", ps, fd, - ev2str(evts), + if (fd != wake_fd +#if ERTS_POLL_USE_TIMERFD + && fd != ps->timer_fd +#endif + ) + DEBUG_PRINT_FD("trig %s (%s)", ps, fd, + ev2str(evts), #if ERTS_POLL_USE_KQUEUE - "kqueue" + "kqueue" #elif ERTS_POLL_USE_EPOLL - "epoll" + "epoll" #else - "/dev/poll" + "/dev/poll" +#endif + ); #endif - ); -#if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); - if (n == 1) - return 0; - } + if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) { + cleanup_wakeup_pipe(ps); + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } +#if ERTS_POLL_USE_TIMERFD + else if (fd == ps->timer_fd) { + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } #endif #if !ERTS_POLL_USE_CONCURRENT_UPDATE - else { - /* Reset the events to emulate ONESHOT semantics */ - ps->fds_status[fd].events = 0; - enqueue_update_request(ps, fd); - } + else { + /* Reset the events to emulate ONESHOT semantics */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); + } #endif + } } - return res; -#else - ASSERT(chk_fds_res <= max_res); - return chk_fds_res; -#endif + if (res == 0) + return res; + else + return n; } #else /* !ERTS_POLL_USE_KERNEL_POLL */ @@ -1577,19 +1647,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, #endif /* !ERTS_POLL_USE_KERNEL_POLL */ +static ERTS_INLINE ErtsMonotonicTime +get_timeout(ErtsPollSet *ps, + int resolution, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout; + + if (timeout_time == ERTS_POLL_NO_TIMEOUT) { + timeout = 0; + } + else if (timeout_time == ERTS_POLL_INF_TIMEOUT) { + timeout = -1; + } + else { + ErtsMonotonicTime diff_time, current_time; + current_time = erts_get_monotonic_time(NULL); + diff_time = timeout_time - current_time; + if (diff_time <= 0) { + timeout = 0; + } + else { + switch (resolution) { + case 1000: + /* Round up to nearest even milli second */ + timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1; + if (timeout > (ErtsMonotonicTime) INT_MAX) + timeout = (ErtsMonotonicTime) INT_MAX; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000); + break; + case 1000000: + /* Round up to nearest even micro second */ + timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000); + break; + case 1000000000: + /* Round up to nearest even nano second */ + timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000); + break; + default: + ERTS_INTERNAL_ERROR("Invalid resolution"); + timeout = 0; + break; + } + } + } + return timeout; +} + +#if ERTS_POLL_USE_SELECT + +static ERTS_INLINE int +get_timeout_timeval(ErtsPollSet *ps, + SysTimeval *tvp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000, + timeout_time); + + if (!timeout) { + tvp->tv_sec = 0; + tvp->tv_usec = 0; + + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000); + tvp->tv_sec = sec; + tvp->tv_usec = timeout - sec*(1000*1000); + + ASSERT(tvp->tv_sec >= 0); + ASSERT(tvp->tv_usec >= 0); + ASSERT(tvp->tv_usec < 1000*1000); + + return 1; + } + +} + +#endif + +#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_timespec(ErtsPollSet *ps, + struct timespec *tsp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000*1000, + timeout_time); + + if (!timeout) { + tsp->tv_sec = 0; + tsp->tv_nsec = 0; + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000*1000); + tsp->tv_sec = sec; + tsp->tv_nsec = timeout - sec*(1000*1000*1000); + + ASSERT(tsp->tv_sec >= 0); + ASSERT(tsp->tv_nsec >= 0); + ASSERT(tsp->tv_nsec < 1000*1000*1000); + + return 1; + } +} + +#endif + +#if ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_itimerspec(ErtsPollSet *ps, + struct itimerspec *itsp, + ErtsMonotonicTime timeout_time) +{ + + itsp->it_interval.tv_sec = 0; + itsp->it_interval.tv_nsec = 0; + + return get_timeout_timespec(ps, &itsp->it_value, timeout_time); +} + +#endif + static ERTS_INLINE int -check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) +check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time) { int res; - int timeout = do_wait ? -1 : 0; - DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait); + int timeout; + DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time); { #if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ +#if ERTS_POLL_USE_TIMERFD + struct itimerspec its; + timeout = get_timeout_itimerspec(ps, &its, timeout_time); + if (timeout > 0) { + timerfd_set(ps, &its); + res = epoll_wait(ps->kp_fd, pr, max_res, -1); + res = timerfd_clear(ps, pr, res, max_res); + } else { + res = epoll_wait(ps->kp_fd, pr, max_res, timeout); + } +#else /* !ERTS_POLL_USE_TIMERFD */ + timeout = (int) get_timeout(ps, 1000, timeout_time); res = epoll_wait(ps->kp_fd, pr, max_res, timeout); - +#endif /* !ERTS_POLL_USE_TIMERFD */ #elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - struct timespec ts = {0, 0}; - struct timespec *tsp = timeout ? NULL : &ts; + struct timespec ts; + struct timespec *tsp; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + tsp = timeout < 0 ? NULL : &ts; res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp); #elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ /* @@ -1601,16 +1820,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */; poll_res.dp_nfds = nfds < max_res ? nfds : max_res; poll_res.dp_fds = pr; - poll_res.dp_timeout = timeout; + poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time); res = ioctl(ps->kp_fd, DP_POLL, &poll_res); - +#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */ + struct timespec ts; + struct timespec *tsp = &ts; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + if (timeout < 0) tsp = NULL; + res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL); #elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */ - + timeout = (int) get_timeout(ps, 1000, timeout_time); res = poll(ps->poll_fds, ps->no_poll_fds, timeout); - #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - SysTimeval tv = {0, 0}; - SysTimeval *tvp = timeout ? NULL : &tv; + SysTimeval tv; + SysTimeval *tvp; + timeout = get_timeout_timeval(ps, &tv, timeout_time); + tvp = timeout < 0 ? NULL : &tv; ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); @@ -1629,7 +1854,9 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len) + int *len, + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time) { int res, no_fds, used_fds = 0; int ebadf = 0; @@ -1654,61 +1881,65 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, } #endif - do_wait = !is_woken(ps) && used_fds == 0; + do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT; DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait); if (do_wait) { - erts_thr_progress_prepare_wait(NULL); + tpd = tpd ? tpd : erts_thr_prgr_data(NULL); + erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); - } + } else + timeout_time = ERTS_POLL_NO_TIMEOUT; while (1) { - res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds); + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time); + if (res != 0) + break; + if (timeout_time == ERTS_POLL_NO_TIMEOUT) + break; + if (erts_get_monotonic_time(NULL) >= timeout_time) + break; + } #if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (res < 0 - && errno == EBADF - && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { - /* - * This may have happened because another thread deselected - * a fd in our poll set and then closed it, i.e. the driver - * behaved correctly. We wan't to avoid looking for a bad - * fd, that may even not exist anymore. Therefore, handle - * update requests and try again. This behaviour should only - * happen when using SELECT as the polling mechanism. - */ - ERTS_POLLSET_LOCK(ps); - used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); - if (used_fds == no_fds) { - *len = used_fds; - ERTS_POLLSET_UNLOCK(ps); - return 0; - } - res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds); - /* Keep the lock over the non-blocking poll in order to not - get any nasty races happening. */ + if (res < 0 + && errno == EBADF + && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + /* + * This may have happened because another thread deselected + * a fd in our poll set and then closed it, i.e. the driver + * behaved correctly. We wan't to avoid looking for a bad + * fd, that may even not exist anymore. Therefore, handle + * update requests and try again. This behaviour should only + * happen when using SELECT as the polling mechanism. + */ + ERTS_POLLSET_LOCK(ps); + used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); + if (used_fds == no_fds) { + *len = used_fds; ERTS_POLLSET_UNLOCK(ps); - if (res == 0) { - errno = EAGAIN; - res = -1; - } + return 0; + } + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT); + /* Keep the lock over the non-blocking poll in order to not + get any nasty races happening. */ + ERTS_POLLSET_UNLOCK(ps); + if (res == 0) { + errno = EAGAIN; + res = -1; } -#endif - - if (res != 0) - break; - if (!do_wait) - break; } +#endif if (do_wait) { - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); ERTS_MSACC_UPDATE_CACHE(); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO); } - woke_up(ps); + if (ERTS_POLL_USE_WAKEUP(ps)) + woke_up(ps); if (res < 0) { #if ERTS_POLL_USE_SELECT @@ -1719,11 +1950,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, #endif res = errno; } - else { + else if (res == 0) { + res = used_fds == 0 ? ETIMEDOUT : 0; +#ifdef HARD_DEBUG + check_poll_result(pr, used_fds); +#endif + *len = used_fds; + } else { #if ERTS_POLL_USE_SELECT save_results: #endif - ps_locked = 1; ERTS_POLLSET_LOCK(ps); @@ -1753,12 +1989,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (!set) - reset_wakeup_state(ps); - else - wake_poller(ps, 1); -#endif + DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set); + if (ERTS_POLL_USE_WAKEUP(ps)) { + if (!set) + reset_wakeup_state(ps); + else + wake_poller(ps, 1); + } } int @@ -1874,10 +2111,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id) if (ps->internal_fd_limit <= kp_fd) ps->internal_fd_limit = kp_fd + 1; ps->kp_fd = kp_fd; + if (ps->id == -1) + ps->oneshot = 0; + else + ps->oneshot = 1; #endif -#if !ERTS_POLL_USE_CONCURRENT_UPDATE + erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); create_wakeup_pipe(ps); + +#if ERTS_POLL_USE_TIMERFD + create_timerfd(ps); +#endif + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE handle_update_requests(ps, NULL, 0); cleanup_wakeup_pipe(ps); #endif @@ -1992,9 +2239,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip) pip->memory_size = size; pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE pip->poll_set_size++; /* Wakeup pipe */ -#endif pip->lazy_updates = #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -2177,6 +2422,12 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, ASSERT(0); return; } + if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) + continue; +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) + continue; +#endif data &= 0xFFFFFFFF; ASSERT(fd == data); /* Events are the events that are being monitored, which of course include diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index e1cea7eb8b..d40dabc529 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -51,6 +51,7 @@ #include "sys.h" #define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN +#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX #ifdef ERTS_ENABLE_KERNEL_POLL # undef ERTS_ENABLE_KERNEL_POLL @@ -130,6 +131,9 @@ #endif #define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10 +#define ERTS_POLL_USE_TIMERFD 0 typedef Uint32 ErtsPollEvents; @@ -156,6 +160,14 @@ typedef enum { #include <sys/epoll.h> +#if ERTS_POLL_USE_EPOLL +#ifdef HAVE_SYS_TIMERFD_H +#include <sys/timerfd.h> +#undef ERTS_POLL_USE_TIMERFD +#define ERTS_POLL_USE_TIMERFD 1 +#endif +#endif + #define ERTS_POLL_EV_E2N(EV) \ ((uint32_t) (EV)) #define ERTS_POLL_EV_N2E(EV) \ @@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd { #endif -#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)) +#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))) #define ev2str(ev) \ (((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \ diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h index 1170a549b9..f3a91e54f7 100644 --- a/erts/emulator/sys/common/erl_poll_api.h +++ b/erts/emulator/sys/common/erl_poll_api.h @@ -72,11 +72,15 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, * @param res an array of fd results that the ready fds are put in. * @param[in] length the length of the res array * @param[out] length the number of ready events returned in res + * @param tpd the thread progress data to note sleep state in + * @param timeout_time the time in native to wake up at * @return 0 on success, else the ERRNO of the error that happened. */ int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd res[], - int *length); + int *length, + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time); /** * Interrupt the thread waiting in the pollset. This function should be called * with set = 0 before any thread calls erts_poll_wait in order to clear any diff --git a/erts/emulator/sys/unix/sys_uds.c b/erts/emulator/sys/unix/sys_uds.c index 39a4866065..c9f73622ba 100644 --- a/erts/emulator/sys/unix/sys_uds.c +++ b/erts/emulator/sys/unix/sys_uds.c @@ -88,8 +88,9 @@ sys_uds_readv(int fd, struct iovec *iov, size_t iov_len, if((msg.msg_flags & MSG_CTRUNC) == MSG_CTRUNC) { /* We assume that we have given enough space for any header - that are sent to us. So the only remaining reason to get - this flag set is if the caller has run out of file descriptors. + that are sent to us. So the only remaining reasons to get + this flag set is if the caller has run out of file descriptors + or an SELinux policy prunes the response (eg. O_APPEND on STDERR). */ errno = EMFILE; return -1; diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c index 39bb4d515e..3843a27a6e 100644 --- a/erts/emulator/sys/win32/erl_poll.c +++ b/erts/emulator/sys/win32/erl_poll.c @@ -1017,10 +1017,12 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps, int erts_poll_wait(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len) + int *len, + ErtsThrPrgrData *tpd, + Sint64 timeout_in) { int no_fds; - DWORD timeout = INFINITE; + DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in; EventData* ev; int res = 0; int num = 0; @@ -1056,10 +1058,10 @@ int erts_poll_wait(ErtsPollSet *ps, HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout)); ERTS_POLLSET_UNLOCK(ps); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); handle = WaitForMultipleObjects(num_h, harr, FALSE, timeout); - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); ERTS_MSACC_POP_STATE(); ERTS_POLLSET_LOCK(ps); HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout)); diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile index bf00de2204..6a064ec8d4 100644 --- a/erts/emulator/test/Makefile +++ b/erts/emulator/test/Makefile @@ -33,6 +33,7 @@ MODULES= \ after_SUITE \ alloc_SUITE \ async_ports_SUITE \ + atomics_SUITE \ beam_SUITE \ beam_literals_SUITE \ bif_SUITE \ @@ -50,6 +51,7 @@ MODULES= \ call_trace_SUITE \ code_SUITE \ code_parallel_load_SUITE \ + counters_SUITE \ crypto_SUITE \ ddll_SUITE \ decode_packet_SUITE \ @@ -92,6 +94,7 @@ MODULES= \ port_SUITE \ port_bif_SUITE \ prim_eval_SUITE \ + persistent_term_SUITE \ process_SUITE \ pseudoknot_SUITE \ receive_SUITE \ diff --git a/erts/emulator/test/atomics_SUITE.erl b/erts/emulator/test/atomics_SUITE.erl new file mode 100644 index 0000000000..a5407c42ee --- /dev/null +++ b/erts/emulator/test/atomics_SUITE.erl @@ -0,0 +1,150 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(atomics_SUITE). + +-include_lib("common_test/include/ct.hrl"). + +-compile(export_all). + +suite() -> [{ct_hooks,[ts_install_cth]}]. + +all() -> + [signed, unsigned, bad, signed_limits, unsigned_limits]. + +signed(Config) when is_list(Config) -> + Size = 10, + Ref = atomics:new(Size,[]), + #{size:=Size, memory:=Memory} = atomics:info(Ref), + {_,true} = {Memory, Memory > Size*8}, + {_,true} = {Memory, Memory < Size*max_atomic_sz() + 100}, + [signed_do(Ref, Ix) || Ix <- lists:seq(1, Size)], + ok. + +signed_do(Ref, Ix) -> + 0 = atomics:get(Ref, Ix), + ok = atomics:put(Ref, Ix, 3), + ok = atomics:add(Ref, Ix, 14), + 17 = atomics:get(Ref, Ix), + 20 = atomics:add_get(Ref, Ix, 3), + -3 = atomics:add_get(Ref, Ix, -23), + 17 = atomics:add_get(Ref, Ix, 20), + ok = atomics:sub(Ref, Ix, 4), + 13 = atomics:get(Ref, Ix), + -7 = atomics:sub_get(Ref, Ix, 20), + 3 = atomics:sub_get(Ref, Ix, -10), + 3 = atomics:exchange(Ref, Ix, 666), + ok = atomics:compare_exchange(Ref, Ix, 666, 777), + 777 = atomics:compare_exchange(Ref, Ix, 666, -666), + ok. + +unsigned(Config) when is_list(Config) -> + Size = 10, + Ref = atomics:new(Size,[{signed, false}]), + #{size:=Size, memory:=Memory} = atomics:info(Ref), + true = Memory > Size*8, + true = Memory < Size*max_atomic_sz() + 100, + [unsigned_do(Ref, Ix) || Ix <- lists:seq(1, Size)], + ok. + +unsigned_do(Ref, Ix) -> + 0 = atomics:get(Ref, Ix), + ok = atomics:put(Ref, Ix, 3), + ok = atomics:add(Ref, Ix, 14), + 17 = atomics:get(Ref, Ix), + 20 = atomics:add_get(Ref, Ix, 3), + ok = atomics:sub(Ref, Ix, 7), + 13 = atomics:get(Ref, Ix), + 3 = atomics:sub_get(Ref, Ix, 10), + 3 = atomics:exchange(Ref, Ix, 666), + ok = atomics:compare_exchange(Ref, Ix, 666, 777), + 777 = atomics:compare_exchange(Ref, Ix, 666, 888), + ok. + +bad(Config) when is_list(Config) -> + {'EXIT',{badarg,_}} = (catch atomics:new(0,[])), + {'EXIT',{badarg,_}} = (catch atomics:new(10,[bad])), + {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,bad}])), + {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,true}, bad])), + {'EXIT',{badarg,_}} = (catch atomics:new(10,[{signed,false} | bad])), + Ref = atomics:new(10,[]), + {'EXIT',{badarg,_}} = (catch atomics:get(1742, 7)), + {'EXIT',{badarg,_}} = (catch atomics:get(make_ref(), 7)), + {'EXIT',{badarg,_}} = (catch atomics:get(Ref, -1)), + {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 0)), + {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 11)), + {'EXIT',{badarg,_}} = (catch atomics:get(Ref, 7.0)), + ok. + + +signed_limits(Config) when is_list(Config) -> + Bits = 64, + Max = (1 bsl (Bits-1)) - 1, + Min = -(1 bsl (Bits-1)), + + Ref = atomics:new(1,[{signed, true}]), + #{max:=Max, min:=Min} = atomics:info(Ref), + 0 = atomics:get(Ref, 1), + ok = atomics:add(Ref, 1, Max), + Min = atomics:add_get(Ref, 1, 1), + Max = atomics:sub_get(Ref, 1, 1), + + IncrMax = (Max bsl 1) bor 1, + ok = atomics:put(Ref, 1, 0), + ok = atomics:add(Ref, 1, IncrMax), + -1 = atomics:get(Ref, 1), + {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, IncrMax+1)), + {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, Min-1)), + + ok. + +unsigned_limits(Config) when is_list(Config) -> + Bits = 64, + Max = (1 bsl Bits) - 1, + Min = 0, + + Ref = atomics:new(1,[{signed,false}]), + #{max:=Max, min:=Min} = atomics:info(Ref), + 0 = atomics:get(Ref, 1), + ok = atomics:add(Ref, 1, Max), + Min = atomics:add_get(Ref, 1, 1), + Max = atomics:sub_get(Ref, 1, 1), + + atomics:put(Ref, 1, Max), + io:format("Max=~p~n", [atomics:get(Ref, 1)]), + + {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, Max+1)), + IncrMin = -(1 bsl (Bits-1)), + ok = atomics:put(Ref, 1, -IncrMin), + ok = atomics:add(Ref, 1, IncrMin), + 0 = atomics:get(Ref, 1), + {'EXIT',{badarg,_}} = (catch atomics:add(Ref, 1, IncrMin-1)), + + ok. + +max_atomic_sz() -> + case erlang:system_info({wordsize, external}) of + 4 -> 16; + 8 -> + EI = erlang:system_info(ethread_info), + case lists:keyfind("64-bit native atomics", 1, EI) of + {_, "no", _} -> 16; + _ -> 8 + end + end. diff --git a/erts/emulator/test/big_SUITE.erl b/erts/emulator/test/big_SUITE.erl index 0a42b09903..3b9b9e5989 100644 --- a/erts/emulator/test/big_SUITE.erl +++ b/erts/emulator/test/big_SUITE.erl @@ -24,7 +24,7 @@ -export([t_div/1, eq_28/1, eq_32/1, eq_big/1, eq_math/1, big_literals/1, borders/1, negative/1, big_float_1/1, big_float_2/1, - bxor_2pow/1, + bxor_2pow/1, band_2pow/1, shift_limit_1/1, powmod/1, system_limit/1, toobig/1, otp_6692/1]). %% Internal exports. @@ -43,7 +43,7 @@ suite() -> all() -> [t_div, eq_28, eq_32, eq_big, eq_math, big_literals, borders, negative, {group, big_float}, shift_limit_1, - bxor_2pow, + bxor_2pow, band_2pow, powmod, system_limit, toobig, otp_6692]. groups() -> @@ -168,7 +168,11 @@ eval({op,_,Op,A0,B0}, LFH) -> Res = eval_op(Op, A, B), erlang:garbage_collect(), Res; -eval({integer,_,I}, _) -> I; +eval({integer,_,I}, _) -> + %% "Parasitic" ("symbiotic"?) test of squaring all numbers + %% found in the test data. + test_squaring(I), + I; eval({call,_,{atom,_,Local},Args0}, LFH) -> Args = eval_list(Args0, LFH), LFH(Local, Args). @@ -192,6 +196,18 @@ eval_op('bxor', A, B) -> A bxor B; eval_op('bsl', A, B) -> A bsl B; eval_op('bsr', A, B) -> A bsr B. +test_squaring(I) -> + %% Multiplying an integer by itself is specially optimized, so we + %% should take special care to test squaring. The optimization + %% will kick in when the two operands have the same address. + Sqr = I * I, + + %% This expression will be multiplied in the usual way, because + %% the the two operands for '*' are stored at different addresses. + Sqr = I * ((I + id(1)) - id(1)), + + ok. + %% Built in test functions fac(0) -> 1; @@ -456,3 +472,38 @@ my_bxor(A, B, N, Acc0) -> false -> Acc0 bor (1 bsl N) end, my_bxor(A bsr 1, B bsr 1, N+1, Acc1). + + +%% ERL-804 +band_2pow(_Config) -> + IL = lists:seq(8*3, 8*16, 4), + JL = lists:seq(0, 64), + [band_2pow_1((1 bsl I), (1 bsl J)) + || I <- IL, J <- JL], + ok. + +band_2pow_1(A, B) -> + for(-1,1, fun(Ad) -> + for(-1,1, fun(Bd) -> + band_2pow_2(A+Ad, B+Bd), + band_2pow_2(-A+Ad, B+Bd), + band_2pow_2(A+Ad, -B+Bd), + band_2pow_2(-A+Ad, -B+Bd) + end) + end). + +band_2pow_2(A, B) -> + Correct = my_band(A, B), + case A band B of + Correct -> ok; + Wrong -> + io:format("~.16# band ~.16#\n", [A,B]), + io:format("Expected ~.16#\n", [Correct]), + io:format("Got ~.16#\n", [Wrong]), + ct:fail({failed, 'band'}) + + end. + +%% Implement band without band +my_band(A, B) -> + bnot ((bnot A) bor (bnot B)). diff --git a/erts/emulator/test/code_SUITE.erl b/erts/emulator/test/code_SUITE.erl index 9c6dc3ff83..0444ba4f89 100644 --- a/erts/emulator/test/code_SUITE.erl +++ b/erts/emulator/test/code_SUITE.erl @@ -28,7 +28,7 @@ fake_literals/1, false_dependency/1,coverage/1,fun_confusion/1, t_copy_literals/1, t_copy_literals_frags/1, - erl_544/1]). + erl_544/1, max_heap_size/1]). -define(line_trace, 1). -include_lib("common_test/include/ct.hrl"). @@ -43,7 +43,7 @@ all() -> constant_pools, constant_refc_binaries, fake_literals, false_dependency, coverage, fun_confusion, t_copy_literals, t_copy_literals_frags, - erl_544]. + erl_544, max_heap_size]. init_per_suite(Config) -> erts_debug:set_internal_state(available_internal_state, true), @@ -968,6 +968,39 @@ erl_544(Config) when is_list(Config) -> {skipped, "Only run when native file name encoding is utf8"} end. +%% Test that the copying of literals to a process during purging of +%% literals will cause the process to be killed if the max heap size +%% is exceeded. +max_heap_size(_Config) -> + Mod = ?FUNCTION_NAME, + Value = [I || I <- lists:seq(1, 5000)], + Code = gen_lit(Mod, [{term,Value}]), + {module,Mod} = erlang:load_module(Mod, Code), + SpawnOpts = [monitor, + {max_heap_size, + #{size=>1024, + kill=>true, + error_logger=>true}}], + {Pid,Ref} = spawn_opt(fun() -> + max_heap_size_proc(Mod) + end, SpawnOpts), + receive + {'DOWN',Ref,process,Pid,Reason} -> + killed = Reason; + Other -> + ct:fail({unexpected_message,Other}) + after 10000 -> + ct:fail({process_did_not_die, Pid, erlang:process_info(Pid)}) + end. + +max_heap_size_proc(Mod) -> + Value = Mod:term(), + code:delete(Mod), + code:purge(Mod), + receive + _ -> Value + end. + %% Utilities. make_sub_binary(Bin) when is_binary(Bin) -> diff --git a/erts/emulator/test/counters_SUITE.erl b/erts/emulator/test/counters_SUITE.erl new file mode 100644 index 0000000000..b3f0358c1e --- /dev/null +++ b/erts/emulator/test/counters_SUITE.erl @@ -0,0 +1,234 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(counters_SUITE). + +-include_lib("common_test/include/ct.hrl"). + +-export([suite/0, all/0]). +-export([basic/1, bad/1, limits/1, indep/1, write_concurrency/1]). + +suite() -> [{ct_hooks,[ts_install_cth]}]. + +all() -> + [basic, bad, limits, indep, write_concurrency]. + +basic(Config) when is_list(Config) -> + Size = 10, + [begin + Ref = counters:new(Size,[Type]), + #{size:=Size, memory:=Memory} = counters:info(Ref), + check_memory(Type, Memory, Size), + [basic_do(Ref, Ix) || Ix <- lists:seq(1, Size)] + end + || Type <- [atomics, write_concurrency]], + ok. + +basic_do(Ref, Ix) -> + 0 = counters:get(Ref, Ix), + ok = counters:add(Ref, Ix, 3), + 3 = counters:get(Ref, Ix), + ok = counters:add(Ref, Ix, 14), + 17 = counters:get(Ref, Ix), + ok = counters:add(Ref, Ix, -20), + -3 = counters:get(Ref, Ix), + ok = counters:add(Ref, Ix, 100), + 97 = counters:get(Ref, Ix), + ok = counters:sub(Ref, Ix, 20), + 77 = counters:get(Ref, Ix), + ok = counters:sub(Ref, Ix, -10), + 87 = counters:get(Ref, Ix), + ok = counters:put(Ref, Ix, 0), + 0 = counters:get(Ref, Ix), + ok = counters:put(Ref, Ix, 123), + 123 = counters:get(Ref, Ix), + ok = counters:put(Ref, Ix, -321), + -321 = counters:get(Ref, Ix), + ok. + +check_memory(atomics, Memory, Size) -> + {_,true} = {Memory, Memory > Size*8}, + {_,true} = {Memory, Memory < Size*max_atomic_sz() + 100}; +check_memory(write_concurrency, Memory, Size) -> + NWords = erlang:system_info(schedulers) + 1, + {_,true} = {Memory, Memory > NWords*Size*8}, + {_,true} = {Memory, Memory < NWords*(Size+7)*max_atomic_sz() + 100}. + +max_atomic_sz() -> + case erlang:system_info({wordsize, external}) of + 4 -> 16; + 8 -> + EI = erlang:system_info(ethread_info), + case lists:keyfind("64-bit native atomics", 1, EI) of + {_, "no", _} -> 16; + _ -> 8 + end + end. + +bad(Config) when is_list(Config) -> + {'EXIT',{badarg,_}} = (catch counters:new(0,[])), + {'EXIT',{badarg,_}} = (catch counters:new(10,[bad])), + {'EXIT',{badarg,_}} = (catch counters:new(10,[atomic, bad])), + {'EXIT',{badarg,_}} = (catch counters:new(10,[write_concurrency | bad])), + Ref = counters:new(10,[]), + {'EXIT',{badarg,_}} = (catch counters:get(1742, 7)), + {'EXIT',{badarg,_}} = (catch counters:get(make_ref(), 7)), + {'EXIT',{badarg,_}} = (catch counters:get(Ref, -1)), + {'EXIT',{badarg,_}} = (catch counters:get(Ref, 0)), + {'EXIT',{badarg,_}} = (catch counters:get(Ref, 11)), + {'EXIT',{badarg,_}} = (catch counters:get(Ref, 7.0)), + ok. + + +limits(Config) when is_list(Config) -> + limits_do(counters:new(1,[atomics])), + limits_do(counters:new(1,[write_concurrency])), + ok. + +limits_do(Ref) -> + Bits = 64, + Max = (1 bsl (Bits-1)) - 1, + Min = -(1 bsl (Bits-1)), + + 0 = counters:get(Ref, 1), + ok = counters:put(Ref, 1, Max), + Max = counters:get(Ref, 1), + ok = counters:add(Ref, 1, 1), + Min = counters:get(Ref, 1), + ok = counters:sub(Ref, 1, 1), + Max = counters:get(Ref, 1), + ok = counters:put(Ref, 1, Min), + Min = counters:get(Ref, 1), + + IncrMax = (Max bsl 1) bor 1, + ok = counters:put(Ref, 1, 0), + ok = counters:add(Ref, 1, IncrMax), + -1 = counters:get(Ref, 1), + {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, IncrMax+1)), + {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)), + {'EXIT',{badarg,_}} = (catch counters:put(Ref, 1, Max+1)), + {'EXIT',{badarg,_}} = (catch counters:add(Ref, 1, Min-1)), + ok. + + +%% Verify that independent workers, using different counters +%% within the same array, do not interfere with each other. +indep(Config) when is_list(Config) -> + NScheds = erlang:system_info(schedulers), + Ref = counters:new(NScheds,[write_concurrency]), + Rounds = 100, + Papa = self(), + Pids = [spawn_opt(fun () -> + Val = I*197, + counters:put(Ref, I, Val), + indep_looper(Rounds, Ref, I, Val), + Papa ! {self(), done} + end, + [link, {scheduler, I}]) + || I <- lists:seq(1, NScheds)], + [receive {P,done} -> ok end || P <- Pids], + ok. + +indep_looper(0, _, _ , _) -> + ok; +indep_looper(N, Ref, I, Val0) -> + %%io:format("Val0 = ~p\n", [Val0]), + Val0 = counters:get(Ref, I), + Val1 = indep_adder(Ref, I, Val0), + indep_subber(Ref, I, Val1), + Val2 = N*7 + I, + counters:put(Ref, I, Val2), + indep_looper(N-1, Ref, I, Val2). + +indep_adder(Ref, I, Val) when Val < (1 bsl 62) -> + %%io:format("adder Val = ~p\n", [Val]), + Incr = abs(Val div 2) + I + 984735, + counters:add(Ref, I, Incr), + Res = Val + Incr, + Res = counters:get(Ref, I), + indep_adder(Ref, I, Res); +indep_adder(_Ref, _I, Val) -> + Val. + +indep_subber(Ref, I, Val) when Val > -(1 bsl 62) -> + %%io:format("subber Val = ~p\n", [Val]), + Decr = (abs(Val div 2) + I + 725634), + counters:sub(Ref, I, Decr), + Res = Val - Decr, + Res = counters:get(Ref, I), + indep_subber(Ref, I, Res); +indep_subber(_Ref, _I, Val) -> + Val. + + + +%% Verify write_concurrency yields correct results. +write_concurrency(Config) when is_list(Config) -> + rand:seed(exs1024s), + io:format("*** SEED: ~p ***\n", [rand:export_seed()]), + NScheds = erlang:system_info(schedulers), + Size = 100, + Ref = counters:new(Size,[write_concurrency]), + Rounds = 1000, + Papa = self(), + Pids = [spawn_opt(fun Worker() -> + receive + {go, Ix, Incr} -> + wc_looper(Rounds, Ref, Ix, Incr), + Papa ! {self(), done, Rounds*Incr}, + Worker(); + stop -> + ok + end + end, + [link, {scheduler, N}]) + || N <- lists:seq(1, NScheds)], + [begin + Base = rand_log64(), + counters:put(Ref, Index, Base), + SendList = [{P,{go, Index, rand_log64()}} || P <- Pids], + [P ! Msg || {P,Msg} <- SendList], + Added = lists:sum([receive {P,done,Contrib} -> Contrib end || P <- Pids]), + Result = mask_sint64(Base+Added), + {_,Result} = {Result, counters:get(Ref, Index)} + end + || Index <- lists:seq(1, Size)], + + [begin unlink(P), P ! stop end || P <- Pids], + ok. + +wc_looper(0, _, _, _) -> + ok; +wc_looper(N, Ref, Ix, Incr) -> + counters:add(Ref, Ix, Incr), + wc_looper(N-1, Ref, Ix, Incr). + +mask_sint64(X) -> + SMask = 1 bsl 63, + UMask = SMask - 1, + (X band UMask) - (X band SMask). + +%% A random signed 64-bit integer +%% with a uniformly distributed number of significant bits. +rand_log64() -> + Uint = round(math:pow(2, rand:uniform()*63)), + case rand:uniform(2) of + 1 -> -Uint; + 2 -> Uint + end. diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl index 6f5d639d04..bd62708aa7 100644 --- a/erts/emulator/test/driver_SUITE.erl +++ b/erts/emulator/test/driver_SUITE.erl @@ -1754,7 +1754,7 @@ smp_select0(Config) -> ProcFun = fun()-> io:format("Worker ~p starting\n",[self()]), Port = open_port({spawn, DrvName}, []), smp_select_loop(Port, 100000), - sleep(1000), % wait for driver to handle pending events + smp_select_done(Port), true = erlang:port_close(Port), Master ! {ok,self()}, io:format("Worker ~p finished\n",[self()]) @@ -1784,6 +1784,21 @@ smp_select_loop(Port, N) -> smp_select_loop(Port, N-1) end. +smp_select_done(Port) -> + case erlang:port_control(Port, ?CHKIO_SMP_SELECT, "done") of + "wait" -> + receive + {Port, done} -> + ok + after 10*1000 -> + %% Seems we have a lost ready_input event. + %% Go ahead anyway, port will crash VM when closed. + ok + end; + + "ok" -> ok + end. + smp_select_wait([], _) -> ok; smp_select_wait(Pids, TimeoutMsg) -> diff --git a/erts/emulator/test/driver_SUITE_data/chkio_drv.c b/erts/emulator/test/driver_SUITE_data/chkio_drv.c index ee8f28e8b1..b9ee155b4b 100644 --- a/erts/emulator/test/driver_SUITE_data/chkio_drv.c +++ b/erts/emulator/test/driver_SUITE_data/chkio_drv.c @@ -90,7 +90,7 @@ typedef struct chkio_smp_select { int next_read; int next_write; int first_write; - enum {Closed, Opened, Selected, Waiting} state; + enum {Closed, Opened, Selected, Waiting, WaitingDone} state; int wasSelected; unsigned rand_state; }ChkioSmpSelect; @@ -292,18 +292,20 @@ stop_steal_aux(ChkioDrvData *cddp) static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port) { switch (pip->state) { + case WaitingDone: case Waiting: { int word; - fprintf(stderr, "Closing pipe in state Waiting. Event lost?\n"); + fprintf(stderr, "Closing pipe in state Waiting*. Event lost?\r\n"); for (;;) { int bytes = read(pip->read_fd, &word, sizeof(word)); if (bytes != sizeof(word)) { if (bytes != 0) { - fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\n", bytes, errno); + fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\r\n", + bytes, errno); } break; } - fprintf(stderr, "Read from pipe: %d\n", word); + fprintf(stderr, "Read from pipe: %d\r\n", word); } abort(); } @@ -318,6 +320,8 @@ static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port) close(pip->write_fd); pip->state = Closed; break; + case Closed: + break; } driver_free(pip); } @@ -383,6 +387,9 @@ chkio_drv_start(ErlDrvPort port, char *command) cddp->id = driver_mk_port(port); cddp->test = CHKIO_STOP; cddp->test_data = NULL; + + drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */ + return (ErlDrvData) cddp; #endif } @@ -526,7 +533,7 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) printf("Read event on uninitiated pipe %d\n", fd); abort(); } - if (pip->state != Selected && pip->state != Waiting) { + if (pip->state != Selected && pip->state != Waiting && pip->state != WaitingDone) { printf("Read event on pipe in strange state %d\n", pip->state); abort(); } @@ -536,9 +543,9 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) inPipe = (pip->next_write - pip->next_read); if (inPipe == 0) { bytes = read(pip->read_fd, &word, sizeof(word)); - printf("Unexpected empty pipe, expected %u -> %u, bytes=%d, word=%d, written=%d\n", - pip->next_read, pip->next_write-1, bytes, word, - (pip->next_write - pip->first_write)); + printf("Unexpected empty pipe: ptr=%p, fds=%d->%d, read bytes=%d, word=%d, written=%d\n", + pip, pip->write_fd, pip->read_fd, + bytes, word, (pip->next_write - pip->first_write)); /*abort(); Allow unexpected events as it's been seen to be triggered by epoll on Linux. Most of the time the unwanted events are filtered by @@ -564,7 +571,20 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) TRACEF(("Read %d from fd=%d\n", word, fd)); pip->next_read++; } - pip->state = Selected; /* not Waiting anymore */ + if (pip->state == WaitingDone) { + if (pip->next_write == pip->next_read) { + /* All data read, send {Port, done} */ + ErlDrvTermData spec[] = {ERL_DRV_PORT, driver_mk_port(cddp->port), + ERL_DRV_ATOM, driver_mk_atom("done"), + ERL_DRV_TUPLE, 2}; + erl_drv_output_term(driver_mk_port(cddp->port), + spec, sizeof(spec) / sizeof(spec[0])); + pip->state = Selected; + } + } + else { + pip->state = Selected; /* not Waiting anymore */ + } break; } case CHKIO_DRV_USE: @@ -962,6 +982,16 @@ chkio_drv_control(ErlDrvData drv_data, } case CHKIO_SMP_SELECT: { ChkioSmpSelect* pip = (ChkioSmpSelect*) cddp->test_data; + if (len == 4 && memcmp(buf, "done", 4) == 0) { + if (pip && pip->state == Waiting) { + pip->state = WaitingDone; + res_str = "wait"; + } + else + res_str = "ok"; + res_len = -1; + break; + } if (pip == NULL) { erl_drv_mutex_lock(smp_pipes_mtx); if (smp_pipes) { @@ -1014,7 +1044,6 @@ chkio_drv_control(ErlDrvData drv_data, if (pip->wasSelected && (op & 1)) { TRACEF(("%T: Close pipe [%d->%d]\n", cddp->id, pip->write_fd, pip->read_fd)); - drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */ if (driver_select(cddp->port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd, DO_READ|ERL_DRV_USE, 0) || close(pip->write_fd)) { diff --git a/erts/emulator/test/persistent_term_SUITE.erl b/erts/emulator/test/persistent_term_SUITE.erl new file mode 100644 index 0000000000..58cd3276b0 --- /dev/null +++ b/erts/emulator/test/persistent_term_SUITE.erl @@ -0,0 +1,614 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%5 +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(persistent_term_SUITE). +-include_lib("common_test/include/ct.hrl"). + +-export([all/0,suite/0, + basic/1,purging/1,sharing/1,get_trapping/1, + info/1,info_trapping/1,killed_while_trapping/1, + off_heap_values/1,keys/1,collisions/1, + init_restart/1]). + +%% +-export([test_init_restart_cmd/1]). + +suite() -> + [{ct_hooks,[ts_install_cth]}, + {timetrap,{minutes,10}}]. + +all() -> + [basic,purging,sharing,get_trapping,info,info_trapping, + killed_while_trapping,off_heap_values,keys,collisions, + init_restart]. + +basic(_Config) -> + Chk = chk(), + N = 777, + Seq = lists:seq(1, N), + par(2, N, Seq), + seq(3, Seq), + seq(3, Seq), %Same values. + _ = [begin + Key = {?MODULE,{key,I}}, + true = persistent_term:erase(Key), + false = persistent_term:erase(Key), + {'EXIT',{badarg,_}} = (catch persistent_term:get(Key)) + end || I <- Seq], + [] = [P || {{?MODULE,_},_}=P <- persistent_term:get()], + chk(Chk). + +par(C, N, Seq) -> + _ = [spawn_link(fun() -> + ok = persistent_term:put({?MODULE,{key,I}}, + {value,C*I}) + end) || I <- Seq], + Result = wait(N), + _ = [begin + Double = C*I, + {{?MODULE,{key,I}},{value,Double}} = Res + end || {I,Res} <- lists:zip(Seq, Result)], + ok. + +seq(C, Seq) -> + _ = [ok = persistent_term:put({?MODULE,{key,I}}, {value,C*I}) || + I <- Seq], + All = persistent_term:get(), + All = [P || {{?MODULE,_},_}=P <- persistent_term:get()], + All = [{Key,persistent_term:get(Key)} || {Key,_} <- All], + Result = lists:sort(All), + _ = [begin + Double = C*I, + {{?MODULE,{key,I}},{value,Double}} = Res + end || {I,Res} <- lists:zip(Seq, Result)], + ok. + +wait(N) -> + All = [P || {{?MODULE,_},_}=P <- persistent_term:get()], + case length(All) of + N -> + All = [{Key,persistent_term:get(Key)} || {Key,_} <- All], + lists:sort(All); + _ -> + receive after 10 -> ok end, + wait(N) + end. + +%% Make sure that terms that have been erased are copied into all +%% processes that still hold a pointer to them. + +purging(_Config) -> + Chk = chk(), + do_purging(fun(K) -> persistent_term:put(K, {?MODULE,new}) end, + replaced), + do_purging(fun persistent_term:erase/1, erased), + chk(Chk). + +do_purging(Eraser, Type) -> + Parent = self(), + Key = {?MODULE,?FUNCTION_NAME}, + ok = persistent_term:put(Key, {term,[<<"abc",0:777/unit:8>>]}), + Ps0 = [spawn_monitor(fun() -> purging_tester(Parent, Key) end) || + _ <- lists:seq(1, 50)], + Ps = maps:from_list(Ps0), + purging_recv(gotten, Ps), + Eraser(Key), + _ = [P ! {Parent,Type} || P <- maps:keys(Ps)], + purging_wait(Ps). + +purging_recv(Tag, Ps) when map_size(Ps) > 0 -> + receive + {Pid,Tag} -> + true = is_map_key(Pid, Ps), + purging_recv(Tag, maps:remove(Pid, Ps)) + end; +purging_recv(_, _) -> ok. + +purging_wait(Ps) when map_size(Ps) > 0 -> + receive + {'DOWN',Ref,process,Pid,Reason} -> + normal = Reason, + Ref = map_get(Pid, Ps), + purging_wait(maps:remove(Pid, Ps)) + end; +purging_wait(_) -> ok. + +purging_tester(Parent, Key) -> + Term = persistent_term:get(Key), + purging_check_term(Term), + 0 = erts_debug:size_shared(Term), + Parent ! {self(),gotten}, + receive + {Parent,erased} -> + {'EXIT',{badarg,_}} = (catch persistent_term:get(Key)), + purging_tester_1(Term); + {Parent,replaced} -> + {?MODULE,new} = persistent_term:get(Key), + purging_tester_1(Term) + end. + +%% Wait for the term to be copied into this process. +purging_tester_1(Term) -> + purging_check_term(Term), + receive after 1 -> ok end, + case erts_debug:size_shared(Term) of + 0 -> + purging_tester_1(Term); + Size -> + %% The term has been copied into this process. + purging_check_term(Term), + Size = erts_debug:size(Term) + end. + +purging_check_term({term,[<<"abc",0:777/unit:8>>]}) -> + ok. + +%% Test that sharing is preserved when storing terms. + +sharing(_Config) -> + Chk = chk(), + Depth = 10, + Size = 2*Depth, + Shared = lists:foldl(fun(_, A) -> [A|A] end, + [], lists:seq(1, Depth)), + Size = erts_debug:size(Shared), + Key = {?MODULE,?FUNCTION_NAME}, + ok = persistent_term:put(Key, Shared), + SharedStored = persistent_term:get(Key), + Size = erts_debug:size(SharedStored), + 0 = erts_debug:size_shared(SharedStored), + + {Pid,Ref} = spawn_monitor(fun() -> + Term = persistent_term:get(Key), + Size = erts_debug:size(Term), + 0 = erts_debug:size_shared(Term), + true = Term =:= SharedStored + end), + receive + {'DOWN',Ref,process,Pid,normal} -> + true = persistent_term:erase(Key), + Size = erts_debug:size(SharedStored), + chk(Chk) + end. + +%% Test trapping of persistent_term:get/0. + +get_trapping(_Config) -> + Chk = chk(), + + %% Assume that the get/0 traps after 4000 iterations + %% in a non-debug emulator. + N = case test_server:timetrap_scale_factor() of + 1 -> 10000; + _ -> 1000 + end, + spawn_link(fun() -> get_trapping_create(N) end), + All = do_get_trapping(N, []), + N = get_trapping_check_result(lists:sort(All), 1), + erlang:garbage_collect(), + get_trapping_erase(N), + chk(Chk). + +do_get_trapping(N, Prev) -> + case persistent_term:get() of + Prev when length(Prev) >= N -> + All = [P || {{?MODULE,{get_trapping,_}},_}=P <- Prev], + case length(All) of + N -> All; + _ -> do_get_trapping(N, Prev) + end; + New -> + receive after 1 -> ok end, + do_get_trapping(N, New) + end. + +get_trapping_create(0) -> + ok; +get_trapping_create(N) -> + ok = persistent_term:put({?MODULE,{get_trapping,N}}, N), + get_trapping_create(N-1). + +get_trapping_check_result([{{?MODULE,{get_trapping,N}},N}|T], N) -> + get_trapping_check_result(T, N+1); +get_trapping_check_result([], N) -> N-1. + +get_trapping_erase(0) -> + ok; +get_trapping_erase(N) -> + true = persistent_term:erase({?MODULE,{get_trapping,N}}), + get_trapping_erase(N-1). + +%% Test retrieving information about persistent terms. + +info(_Config) -> + Chk = chk(), + + %% White box test of info/0. + N = 100, + try + Overhead = info_literal_area_overhead(), + io:format("Overhead = ~p\n", [Overhead]), + info_wb(N, Overhead, info_info()) + after + _ = [_ = persistent_term:erase({?MODULE,I}) || + I <- lists:seq(1, N)] + end, + + chk(Chk). + +%% White box test of persistent_term:info/0. We take into account +%% that there might already exist persistent terms (created by the +%% OTP standard libraries), but we assume that they are not +%% changed during the execution of this test case. + +info_wb(0, _, _) -> + ok; +info_wb(N, Overhead, {BaseCount,BaseMemory}) -> + Key = {?MODULE,N}, + Value = lists:seq(1, N), + ok = persistent_term:put(Key, Value), + + %% Calculate the extra memory needed for this term. + WordSize = erlang:system_info(wordsize), + ExtraMemory = Overhead + 2 * N * WordSize, + + %% Call persistent_term:info/0. + {Count,Memory} = info_info(), + + %% There should be one more persistent term. + Count = BaseCount + 1, + + %% Verify that the amount of memory is correct. + case BaseMemory + ExtraMemory of + Memory -> + %% Exactly right. The size of the hash table was not changed. + ok; + Expected -> + %% The size of the hash table has been doubled to avoid filling + %% the table to more than 50 percent. The previous number + %% of entries must have been exactly half the size of the + %% hash table. The expected number of extra words added by + %% the resizing will be twice that number. + ExtraWords = BaseCount * 2, + true = ExtraWords * WordSize =:= (Memory - Expected) + end, + info_wb(N-1, Overhead, {Count,Memory}). + +info_info() -> + #{count:=Count,memory:=Memory} = persistent_term:info(), + true = is_integer(Count) andalso Count >= 0, + true = is_integer(Memory) andalso Memory >= 0, + {Count,Memory}. + +%% Calculate the number of extra bytes needed for storing each term in +%% the literal, assuming that the key is a tuple of size 2 with +%% immediate elements. The calculated number is the size of the +%% ErtsLiteralArea struct excluding the storage for the literal term +%% itself. + +info_literal_area_overhead() -> + Key1 = {?MODULE,1}, + Key2 = {?MODULE,2}, + #{memory:=Mem0} = persistent_term:info(), + ok = persistent_term:put(Key1, literal), + #{memory:=Mem1} = persistent_term:info(), + ok = persistent_term:put(Key2, literal), + #{memory:=Mem2} = persistent_term:info(), + true = persistent_term:erase(Key1), + true = persistent_term:erase(Key2), + + %% The size of the hash table may have doubled when inserting + %% one of the keys. To avoiding counting the change in the hash + %% table size, take the smaller size increase. + min(Mem2-Mem1, Mem1-Mem0). + +%% Test trapping of persistent_term:info/0. + +info_trapping(_Config) -> + Chk = chk(), + + %% Assume that the info/0 traps after 4000 iterations + %% in a non-debug emulator. + N = case test_server:timetrap_scale_factor() of + 1 -> 10000; + _ -> 1000 + end, + spawn_link(fun() -> info_trapping_create(N) end), + All = do_info_trapping(N, 0), + N = info_trapping_check_result(lists:sort(All), 1), + erlang:garbage_collect(), + info_trapping_erase(N), + chk(Chk). + +do_info_trapping(N, PrevMem) -> + case info_info() of + {N,Mem} -> + true = Mem >= PrevMem, + All = [P || {{?MODULE,{info_trapping,_}},_}=P <- persistent_term:get()], + case length(All) of + N -> All; + _ -> do_info_trapping(N, PrevMem) + end; + {_,Mem} -> + true = Mem >= PrevMem, + receive after 1 -> ok end, + do_info_trapping(N, Mem) + end. + +info_trapping_create(0) -> + ok; +info_trapping_create(N) -> + ok = persistent_term:put({?MODULE,{info_trapping,N}}, N), + info_trapping_create(N-1). + +info_trapping_check_result([{{?MODULE,{info_trapping,N}},N}|T], N) -> + info_trapping_check_result(T, N+1); +info_trapping_check_result([], N) -> N-1. + +info_trapping_erase(0) -> + ok; +info_trapping_erase(N) -> + true = persistent_term:erase({?MODULE,{info_trapping,N}}), + info_trapping_erase(N-1). + +%% Test that hash tables are deallocated if a process running +%% persistent_term:get/0 is killed. + +killed_while_trapping(_Config) -> + Chk = chk(), + N = case test_server:timetrap_scale_factor() of + 1 -> 20000; + _ -> 2000 + end, + kwt_put(N), + kwt_spawn(10), + kwt_erase(N), + chk(Chk). + +kwt_put(0) -> + ok; +kwt_put(N) -> + ok = persistent_term:put({?MODULE,{kwt,N}}, N), + kwt_put(N-1). + +kwt_spawn(0) -> + ok; +kwt_spawn(N) -> + Pids = [spawn(fun kwt_getter/0) || _ <- lists:seq(1, 20)], + erlang:yield(), + _ = [exit(Pid, kill) || Pid <- Pids], + kwt_spawn(N-1). + +kwt_getter() -> + _ = persistent_term:get(), + kwt_getter(). + +kwt_erase(0) -> + ok; +kwt_erase(N) -> + true = persistent_term:erase({?MODULE,{kwt,N}}), + kwt_erase(N-1). + +%% Test storing off heap values (such as ref-counted binaries). + +off_heap_values(_Config) -> + Chk = chk(), + Key = {?MODULE,?FUNCTION_NAME}, + Val = {a,list_to_binary(lists:seq(0, 255)),make_ref(),fun() -> ok end}, + ok = persistent_term:put(Key, Val), + FetchedVal = persistent_term:get(Key), + Val = FetchedVal, + true = persistent_term:erase(Key), + off_heap_values_wait(FetchedVal, Val), + chk(Chk). + +off_heap_values_wait(FetchedVal, Val) -> + case erts_debug:size_shared(FetchedVal) of + 0 -> + Val = FetchedVal, + ok; + _ -> + erlang:yield(), + off_heap_values_wait(FetchedVal, Val) + end. + +%% Test some more data types as keys. Use the module name as a key +%% to minimize the risk of collision with any key used +%% by the OTP libraries. + +keys(_Config) -> + Chk = chk(), + do_key(?MODULE), + do_key([?MODULE]), + do_key(?MODULE_STRING), + do_key(list_to_binary(?MODULE_STRING)), + chk(Chk). + +do_key(Key) -> + Val = term_to_binary(Key), + ok = persistent_term:put(Key, Val), + StoredVal = persistent_term:get(Key), + Val = StoredVal, + true = persistent_term:erase(Key). + +%% Create persistent terms with keys that are known to collide. +%% Delete them in random order, making sure that all others +%% terms can still be found. + +collisions(_Config) -> + Chk = chk(), + + %% Create persistent terms with random keys. + Keys = lists:flatten(colliding_keys()), + Kvs = [{K,rand:uniform(1000)} || K <- Keys], + _ = [ok = persistent_term:put(K, V) || {K,V} <- Kvs], + _ = [V = persistent_term:get(K) || {K,V} <- Kvs], + + %% Now delete the persistent terms in random order. + collisions_delete(lists:keysort(2, Kvs)), + + chk(Chk). + +collisions_delete([{Key,Val}|Kvs]) -> + Val = persistent_term:get(Key), + true = persistent_term:erase(Key), + true = lists:sort(persistent_term:get()) =:= lists:sort(Kvs), + _ = [V = persistent_term:get(K) || {K,V} <- Kvs], + collisions_delete(Kvs); +collisions_delete([]) -> + ok. + +colliding_keys() -> + %% Collisions found by Jesper L. Andersen for breaking maps. + L = [[764492191,2361333849], + [49527266765044,90940896816021,20062927283041,267080852079651], + [249858369443708,206247021789428,20287304470696,25847120931175], + [10645228898670,224705626119556,267405565521452,258214397180678], + [264783762221048,166955943492306,98802957003141,102012488332476], + [69425677456944,177142907243411,137138950917722,228865047699598], + [116031213307147,29203342183358,37406949328742,255198080174323], + [200358182338308,235207156008390,120922906095920,116215987197289], + [58728890318426,68877471005069,176496507286088,221041411345780], + [91094120814795,50665258299931,256093108116737,19777509566621], + [74646746200247,98350487270564,154448261001199,39881047281135], + [23408943649483,164410325820923,248161749770122,274558342231648], + [169531547115055,213630535746863,235098262267796,200508473898303], + [235098564415817,85039146398174,51721575960328,173069189684390], + [176136386396069,155368359051606,147817099696487,265419485459634], + [137542881551462,40028925519736,70525669519846,63445773516557], + [173854695142814,114282444507812,149945832627054,99605565798831], + [177686773562184,127158716984798,132495543008547], + [227073396444896,139667311071766,158915951283562], + [26212438434289,94902985796531,198145776057315], + [266279278943923,58550737262493,74297973216378], + [32373606512065,131854353044428,184642643042326], + [34335377662439,85341895822066,273492717750246]], + + %% Verify that the keys still collide (this will fail if the + %% internal hash function has been changed). + erts_debug:set_internal_state(available_internal_state, true), + try + case erlang:system_info(wordsize) of + 8 -> + verify_colliding_keys(L); + 4 -> + %% Not guaranteed to collide on a 32-bit system. + ok + end + after + erts_debug:set_internal_state(available_internal_state, false) + end, + + L. + +verify_colliding_keys([[K|Ks]|Gs]) -> + Hash = internal_hash(K), + [Hash] = lists:usort([internal_hash(Key) || Key <- Ks]), + verify_colliding_keys(Gs); +verify_colliding_keys([]) -> + ok. + +internal_hash(Term) -> + erts_debug:get_internal_state({internal_hash,Term}). + +%% Test that all persistent terms are erased by init:restart/0. + +init_restart(_Config) -> + File = "command_file", + ok = file:write_file(File, term_to_binary(restart)), + {ok,[[Erl]]} = init:get_argument(progname), + ModPath = filename:dirname(code:which(?MODULE)), + Cmd = Erl ++ " -pa " ++ ModPath ++ " -noshell " + "-run " ++ ?MODULE_STRING ++ " test_init_restart_cmd " ++ + File, + io:format("~s\n", [Cmd]), + Expected = "12ok", + case os:cmd(Cmd) of + Expected -> + ok; + Actual -> + io:format("Expected: ~s", [Expected]), + io:format("Actual: ~s\n", [Actual]), + ct:fail(unexpected_output) + end. + +test_init_restart_cmd([File]) -> + try + do_test_init_restart_cmd(File) + catch + C:R -> + io:format("\n~p ~p\n", [C,R]), + halt() + end, + receive + _ -> ok + end. + +do_test_init_restart_cmd(File) -> + {ok,Bin} = file:read_file(File), + Seq = lists:seq(1, 50), + case binary_to_term(Bin) of + restart -> + _ = [persistent_term:put({?MODULE,I}, {value,I}) || + I <- Seq], + ok = file:write_file(File, term_to_binary(was_restarted)), + io:put_chars("1"), + init:restart(), + receive + _ -> ok + end; + was_restarted -> + io:put_chars("2"), + ok = file:delete(File), + _ = [begin + Key = {?MODULE,I}, + {'EXIT',{badarg,_}} = (catch persistent_term:get(Key)) + end || I <- Seq], + io:put_chars("ok"), + init:stop() + end. + +%% Check that there is the same number of persistents terms before +%% and after each test case. + +chk() -> + persistent_term:info(). + +chk(Chk) -> + Chk = persistent_term:info(), + Key = {?MODULE,?FUNCTION_NAME}, + ok = persistent_term:put(Key, {term,Chk}), + Term = persistent_term:get(Key), + true = persistent_term:erase(Key), + chk_not_stuck(Term), + ok. + +chk_not_stuck(Term) -> + %% Hash tables to be deleted are put onto a queue. + %% Make sure that the queue isn't stuck by a table with + %% a non-zero ref count. + + case erts_debug:size_shared(Term) of + 0 -> + erlang:yield(), + chk_not_stuck(Term); + _ -> + ok + end. diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl index f04efb9003..2e0dfa42f3 100644 --- a/erts/emulator/test/scheduler_SUITE.erl +++ b/erts/emulator/test/scheduler_SUITE.erl @@ -1450,26 +1450,29 @@ poll_threads(Config) when is_list(Config) -> {Conc, PollType, KP} = get_ioconfig(Config), {Sched, SchedOnln, _} = get_sstate(Config, ""), - [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), - [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), - - [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), - if Conc -> - [5] = get_ionum(Config,"+IOt 5 +IOp 1"), - [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"), - [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"), + [1, 1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), + [1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), + [1, 1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), - [2] = get_ionum(Config, "+S 2 +IOPt 100"), - [4] = get_ionum(Config, "+S 4 +IOPt 100"), - [4] = get_ionum(Config, "+S 4:2 +IOPt 100"), - [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), + [5, 1] = get_ionum(Config,"+IOt 5 +IOp 1"), + [3, 2, 1] = get_ionum(Config,"+IOt 5 +IOp 2"), + [2, 2, 2, 2, 2, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"), + + [2, 1] = get_ionum(Config, "+S 2 +IOPt 100"), + [4, 1] = get_ionum(Config, "+S 4 +IOPt 100"), + [4, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"), + [4, 4, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), fail = get_ionum(Config, "+IOt 1 +IOp 2"), ok; not Conc -> + [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), + [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"), [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"), [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"), diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl index fab2f45f28..4e6baa9e0e 100644 --- a/erts/emulator/test/signal_SUITE.erl +++ b/erts/emulator/test/signal_SUITE.erl @@ -85,7 +85,7 @@ xm_sig_order_proc() -> receive may_not_reach -> exit(bad_signal_order); may_reach -> ok - after 0 -> ok + after 0 -> erlang:yield() end, xm_sig_order_proc(). |