diff options
Diffstat (limited to 'erts/emulator')
-rw-r--r-- | erts/emulator/beam/erl_alloc.types | 1 | ||||
-rw-r--r-- | erts/emulator/beam/erl_alloc_util.c | 129 | ||||
-rw-r--r-- | erts/emulator/beam/erl_lock_check.c | 3 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 92 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.h | 38 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process_lock.c | 168 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process_lock.h | 4 | ||||
-rw-r--r-- | erts/emulator/beam/erl_sched_spec_pre_alloc.c | 113 | ||||
-rw-r--r-- | erts/emulator/beam/erl_sched_spec_pre_alloc.h | 5 |
9 files changed, 308 insertions, 245 deletions
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index d4ef9cc553..4aa8fa82fb 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -291,7 +291,6 @@ type PORT_LOCK STANDARD SYSTEM port_lock type DRIVER_LOCK STANDARD SYSTEM driver_lock type XPORTS_LIST SHORT_LIVED SYSTEM extra_port_list type PROC_LCK_WTR LONG_LIVED SYSTEM proc_lock_waiter -type PROC_LCK_QS LONG_LIVED SYSTEM proc_lock_queues type RUNQ_BLNS LONG_LIVED SYSTEM run_queue_balancing type THR_PRGR_IDATA LONG_LIVED SYSTEM thr_prgr_internal_data type THR_PRGR_DATA LONG_LIVED SYSTEM thr_prgr_data diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index a0abd1c405..97ba306a79 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -829,46 +829,83 @@ init_dd_queue(ErtsAllctrDDQueue_t *ddq) ddq->head.used_marker = 1; } -static ERTS_INLINE erts_aint_t -ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr) +static ERTS_INLINE int +ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) { - erts_aint_t ilast, itmp; - ErtsAllctrDDBlock_t *this = ptr; + erts_aint_t itmp; + ErtsAllctrDDBlock_t *enq, *this = ptr; erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL); - /* Enqueue at end of list... */ - ilast = erts_atomic_read_nob(&ddq->tail.data.last); - while (1) { - ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast; - itmp = erts_atomic_cmpxchg_mb(&last->atmc_next, - (erts_aint_t) this, - ERTS_AINT_NULL); - if (itmp == ERTS_AINT_NULL) - break; - ilast = itmp; + enq = (ErtsAllctrDDBlock_t *) erts_atomic_read_nob(&ddq->tail.data.last); + itmp = erts_atomic_cmpxchg_relb(&enq->atmc_next, + (erts_aint_t) this, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + /* We are required to move last pointer */ +#ifdef DEBUG + ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->atmc_next)); + ASSERT(((erts_aint_t) enq) + == erts_atomic_xchg_relb(&ddq->tail.data.last, + (erts_aint_t) this)); +#else + erts_atomic_set_relb(&ddq->tail.data.last, (erts_aint_t) this); +#endif + return 1; } + else { + /* + * We *need* to insert element somewhere in between the + * last element we read earlier and the actual last element. + */ + int i = cinit; - /* Move last pointer forward... */ - while (1) { - if (erts_atomic_read_rb(&this->atmc_next) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return erts_atomic_read_rb(&ddq->tail.data.last); + while (1) { + erts_aint_t itmp2; + erts_atomic_set_nob(&this->atmc_next, itmp); + itmp2 = erts_atomic_cmpxchg_relb(&enq->atmc_next, + (erts_aint_t) this, + itmp); + if (itmp == itmp2) + return 0; /* inserted this */ + if ((i & 1) == 0) + itmp = itmp2; + else { + enq = (ErtsAllctrDDBlock_t *) itmp2; + itmp = erts_atomic_read_acqb(&enq->atmc_next); + ASSERT(itmp != ERTS_AINT_NULL); + } + i++; } - itmp = erts_atomic_cmpxchg_mb(&ddq->tail.data.last, - (erts_aint_t) this, - ilast); - if (ilast == itmp) - return (erts_aint_t) this; - ilast = itmp; } } +static ERTS_INLINE erts_aint_t +check_insert_marker(ErtsAllctrDDQueue_t *ddq, erts_aint_t ilast) +{ + if (!ddq->head.used_marker + && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) { + erts_aint_t itmp; + ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast; + + erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL); + itmp = erts_atomic_cmpxchg_relb(&last->atmc_next, + (erts_aint_t) &ddq->tail.data.marker, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + ilast = (erts_aint_t) &ddq->tail.data.marker; + ddq->head.used_marker = !0; + erts_atomic_set_relb(&ddq->tail.data.last, ilast); + } + } + return ilast; +} + static ERTS_INLINE int -ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr) +ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) { - erts_aint_t ilast; + int last_elem; int um_refc_ix = 0; int managed_thread = erts_thr_progress_is_managed_thread(); if (!managed_thread) { @@ -884,11 +921,11 @@ ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr) } } - ilast = ddq_managed_thread_enqueue(ddq, ptr); + last_elem = ddq_managed_thread_enqueue(ddq, ptr, cinit); if (!managed_thread) erts_atomic_dec_relb(&ddq->tail.data.um_refc[um_refc_ix]); - return ilast == (erts_aint_t) ptr; + return last_elem; } static ERTS_INLINE void * @@ -934,20 +971,16 @@ ddq_check_incoming(ErtsAllctrDDQueue_t *ddq) int um_refc_ix; ddq->head.next.thr_progress_reached = 1; um_refc_ix = ddq->head.next.um_refc_ix; - if (erts_atomic_read_acqb(&ddq->tail.data.um_refc[um_refc_ix]) == 0) { + if (erts_atomic_read_nob(&ddq->tail.data.um_refc[um_refc_ix]) == 0) { /* Move unreferenced end pointer forward... */ + ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore); + ddq->head.unref_end = ddq->head.next.unref_end; - if (!ddq->head.used_marker - && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) { - ddq->head.used_marker = 1; - ilast = ddq_managed_thread_enqueue(ddq, &ddq->tail.data.marker); - } + ilast = check_insert_marker(ddq, ilast); - if (ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) - ERTS_THR_MEMORY_BARRIER; - else { + if (ddq->head.unref_end != (ErtsAllctrDDBlock_t *) ilast) { ddq->head.next.unref_end = (ErtsAllctrDDBlock_t *) ilast; ddq->head.next.thr_progress = erts_thr_progress_later(NULL); erts_atomic32_set_relb(&ddq->tail.data.um_refc_ix, @@ -1092,12 +1125,15 @@ handle_delayed_dealloc(Allctr_t *allctr, } static ERTS_INLINE void -enqueue_dealloc_other_instance(ErtsAlcType_t type, Allctr_t *allctr, void *ptr) +enqueue_dealloc_other_instance(ErtsAlcType_t type, + Allctr_t *allctr, + void *ptr, + int cinit) { if (allctr->fix) ((UWord *) ptr)[ERTS_ALCU_DD_FIX_TYPE_OFFS] = (UWord) type; - if (ddq_enqueue(type, &allctr->dd.q, ptr)) + if (ddq_enqueue(type, &allctr->dd.q, ptr, cinit)) erts_alloc_notify_delayed_dealloc(allctr->ix); } @@ -3613,7 +3649,11 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p) get_pref_allctr(extra, &pref_allctr); ptr = get_used_allctr(extra, p, &used_allctr, NULL); if (pref_allctr != used_allctr) - enqueue_dealloc_other_instance(type, used_allctr, ptr); + enqueue_dealloc_other_instance(type, + used_allctr, + ptr, + (used_allctr->dd.ix + - pref_allctr->dd.ix)); else { if (used_allctr->thread_safe) erts_mtx_lock(&used_allctr->mutex); @@ -3988,7 +4028,11 @@ realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size, sys_memcpy(res, p, cpy_size); if (!force_move || used_allctr != pref_allctr) - enqueue_dealloc_other_instance(type, used_allctr, ptr); + enqueue_dealloc_other_instance(type, + used_allctr, + ptr, + (used_allctr->dd.ix + - pref_allctr->dd.ix)); else { do_erts_alcu_free(type, used_allctr, ptr); ASSERT(pref_allctr == used_allctr); @@ -4179,6 +4223,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->dd.use = 1; init_dd_queue(&allctr->dd.q); + allctr->dd.ix = init->ix; } else #endif diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index a0f744be9d..b545ec07c0 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -175,9 +175,6 @@ static erts_lc_lock_order_t erts_lock_order[] = { { "sched_stat", NULL }, #endif { "async_init_mtx", NULL }, -#ifdef ERTS_SMP - { "proc_lck_qs_alloc", NULL }, -#endif #ifdef __WIN32__ #ifdef DEBUG { "save_ops_lock", NULL }, diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index d2fa111b80..0fa2def5af 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -359,6 +359,7 @@ dbg_chk_aux_work_val(erts_aint32_t value) valid |= ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN; #endif #ifdef ERTS_SMP + valid |= ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP; valid |= ERTS_SSI_AUX_WORK_MISC_THR_PRGR; valid |= ERTS_SSI_AUX_WORK_DD; valid |= ERTS_SSI_AUX_WORK_DD_THR_PRGR; @@ -930,6 +931,45 @@ haw_thr_prgr_current_check_progress(ErtsAuxWorkData *awdp) } } +static ERTS_INLINE erts_aint32_t +handle_delayed_aux_work_wakeup(ErtsAuxWorkData *awdp, erts_aint32_t aux_work) +{ + int jix, max_jix; + unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP); + + ERTS_THR_MEMORY_BARRIER; + + max_jix = awdp->delayed_wakeup.jix; + awdp->delayed_wakeup.jix = -1; + for (jix = 0; jix <= max_jix; jix++) { + int sched = awdp->delayed_wakeup.job[jix].sched; + erts_aint32_t aux_work = awdp->delayed_wakeup.job[jix].aux_work; + + ASSERT(awdp->delayed_wakeup.sched2jix[sched] == jix); + awdp->delayed_wakeup.sched2jix[sched] = -1; + set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(sched-1), + aux_work); + } + return aux_work & ~ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP; +} + +static ERTS_INLINE void +schedule_aux_work_wakeup(ErtsAuxWorkData *awdp, int sched, erts_aint32_t aux_work) +{ + int jix = awdp->delayed_wakeup.sched2jix[sched]; + if (jix >= 0) { + ASSERT(awdp->delayed_wakeup.job[jix].sched == sched); + awdp->delayed_wakeup.job[jix].aux_work |= aux_work; + } + else { + jix = ++awdp->delayed_wakeup.jix; + awdp->delayed_wakeup.sched2jix[sched] = jix; + awdp->delayed_wakeup.job[jix].sched = sched; + awdp->delayed_wakeup.job[jix].aux_work = aux_work; + } + set_aux_work_flags_wakeup_nob(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP); +} + #endif typedef struct erts_misc_aux_work_t_ erts_misc_aux_work_t; @@ -1186,8 +1226,14 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work) void erts_alloc_notify_delayed_dealloc(int ix) { - set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1), - ERTS_SSI_AUX_WORK_DD); + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + if (esdp) + schedule_aux_work_wakeup(&esdp->aux_work_data, + ix, + ERTS_SSI_AUX_WORK_DD); + else + set_aux_work_flags_wakeup_relb(ERTS_SCHED_SLEEP_INFO_IX(ix-1), + ERTS_SSI_AUX_WORK_DD); } static ERTS_INLINE erts_aint32_t @@ -1485,6 +1531,8 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting) * eachother. Most frequent first. */ #ifdef ERTS_SMP + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP, + handle_delayed_aux_work_wakeup); HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DD, handle_delayed_dealloc); /* DD must be before DD_THR_PRGR */ @@ -1963,7 +2011,7 @@ thr_prgr_fin_wait(void *vssi) | ERTS_SSI_FLG_TSE_SLEEPING)); } -static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp); +static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp); static void * aux_thread(void *unused) @@ -1983,7 +2031,7 @@ aux_thread(void *unused) callbacks.finalize_wait = thr_prgr_fin_wait; erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); - init_aux_work_data(awdp, NULL); + init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; sched_prep_spin_wait(ssi); @@ -3850,7 +3898,7 @@ erts_sched_set_busy_wait_threshold(char *str) return 0; } static void -init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp) +init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) { awdp->sched_id = esdp ? (int) esdp->no : 0; awdp->esdp = esdp; @@ -3868,12 +3916,32 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp) #endif awdp->async_ready.queue = NULL; #endif +#ifdef ERTS_SMP + if (!dawwp) { + awdp->delayed_wakeup.job = NULL; + awdp->delayed_wakeup.sched2jix = NULL; + awdp->delayed_wakeup.jix = -1; + } + else { + int i; + awdp->delayed_wakeup.job = (ErtsDelayedAuxWorkWakeupJob *) dawwp; + dawwp += sizeof(ErtsDelayedAuxWorkWakeupJob)*(erts_no_schedulers+1); + awdp->delayed_wakeup.sched2jix = (int *) dawwp; + awdp->delayed_wakeup.jix = -1; + for (i = 0; i <= erts_no_schedulers; i++) + awdp->delayed_wakeup.sched2jix[i] = -1; + } +#endif } void erts_init_scheduling(int no_schedulers, int no_schedulers_online) { int ix, n, no_ssi; + char *daww_ptr; +#ifdef ERTS_SMP + size_t daww_sz; +#endif init_misc_op_list_alloc(); @@ -4006,6 +4074,15 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) /* Create and initialize scheduler specific data */ +#ifdef ERTS_SMP + daww_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE((sizeof(ErtsDelayedAuxWorkWakeupJob) + + sizeof(int))*(n+1)); + daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, + daww_sz*n); +#else + daww_ptr = NULL; +#endif + erts_aligned_scheduler_data = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, n*sizeof(ErtsAlignedSchedulerData)); @@ -4040,7 +4117,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) esdp->run_queue = ERTS_RUNQ_IX(ix); esdp->run_queue->scheduler = esdp; - init_aux_work_data(&esdp->aux_work_data, esdp); + init_aux_work_data(&esdp->aux_work_data, esdp, daww_ptr); +#ifdef ERTS_SMP + daww_ptr += daww_sz; +#endif init_sched_wall_time(&esdp->sched_wall_time); } diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 5b79c40d93..9e7a5a5c74 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -253,18 +253,19 @@ typedef enum { * eachother. Most frequent - lowest bit number. */ -#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 0) -#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 1) -#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 2) -#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 3) -#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 4) -#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 5) -#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 6) -#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 7) -#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 8) -#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 9) -#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 10) -#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 11) +#define ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP (((erts_aint32_t) 1) << 0) +#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 1) +#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 2) +#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 3) +#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 4) +#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 5) +#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 6) +#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 7) +#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 8) +#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 9) +#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 10) +#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 11) +#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 12) typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo; @@ -403,6 +404,11 @@ typedef struct { } ErtsSchedWallTime; typedef struct { + int sched; + erts_aint32_t aux_work; +} ErtsDelayedAuxWorkWakeupJob; + +typedef struct { int sched_id; ErtsSchedulerData *esdp; ErtsSchedulerSleepInfo *ssi; @@ -431,6 +437,13 @@ typedef struct { void *queue; } async_ready; #endif +#ifdef ERTS_SMP + struct { + int *sched2jix; + int jix; + ErtsDelayedAuxWorkWakeupJob *job; + } delayed_wakeup; +#endif } ErtsAuxWorkData; struct ErtsSchedulerData_ { @@ -464,7 +477,6 @@ struct ErtsSchedulerData_ { int virtual_reds; int cpu_id; /* >= 0 when bound */ ErtsAuxWorkData aux_work_data; - ErtsAtomCacheMap atom_cache_map; ErtsSchedAllocData alloc_data; diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c index b3b4601a31..34d591df40 100644 --- a/erts/emulator/beam/erl_process_lock.c +++ b/erts/emulator/beam/erl_process_lock.c @@ -90,16 +90,6 @@ static void check_queue(erts_proc_lock_t *lck); #error "The size of the 'uflgs' field of the erts_tse_t type is too small" #endif -struct erts_proc_lock_queues_t_ { - erts_proc_lock_queues_t *next; - erts_tse_t *queue[ERTS_PROC_LOCK_MAX_BIT+1]; -}; - -static erts_proc_lock_queues_t zeroqs = {0}; - -static erts_smp_spinlock_t qs_lock; -static erts_proc_lock_queues_t *queue_free_list; - #ifdef ERTS_ENABLE_LOCK_CHECK static struct { Sint16 proc_lock_main; @@ -120,7 +110,6 @@ void erts_init_proc_lock(int cpus) { int i; - erts_smp_spinlock_init(&qs_lock, "proc_lck_qs_alloc"); for (i = 0; i < ERTS_NO_OF_PIX_LOCKS; i++) { #ifdef ERTS_ENABLE_LOCK_COUNT erts_mtx_init_x(&erts_pix_locks[i].u.mtx, @@ -129,7 +118,6 @@ erts_init_proc_lock(int cpus) erts_mtx_init(&erts_pix_locks[i].u.mtx, "pix_lock"); #endif } - queue_free_list = NULL; erts_thr_install_exit_handler(cleanup_tse); #ifdef ERTS_ENABLE_LOCK_CHECK lc_id.proc_lock_main = erts_lc_get_lock_order_id("proc_main"); @@ -156,16 +144,7 @@ erts_init_proc_lock(int cpus) } #ifdef ERTS_ENABLE_LOCK_CHECK -static void -check_unused_tse(erts_tse_t *wtr) -{ - int i; - erts_proc_lock_queues_t *queues = wtr->udata; - ERTS_LC_ASSERT(wtr->uflgs == 0); - for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++) - ERTS_LC_ASSERT(!queues->queue[i]); -} -#define CHECK_UNUSED_TSE(W) check_unused_tse((W)) +#define CHECK_UNUSED_TSE(W) ERTS_LC_ASSERT((W)->uflgs == 0) #else #define CHECK_UNUSED_TSE(W) #endif @@ -174,56 +153,21 @@ static ERTS_INLINE erts_tse_t * tse_fetch(erts_pix_lock_t *pix_lock) { erts_tse_t *tse = erts_tse_fetch(); - if (!tse->udata) { - erts_proc_lock_queues_t *qs; -#if ERTS_PROC_LOCK_SPINLOCK_IMPL && !ERTS_PROC_LOCK_ATOMIC_IMPL - if (pix_lock) - erts_pix_unlock(pix_lock); -#endif - erts_smp_spin_lock(&qs_lock); - qs = queue_free_list; - if (qs) { - queue_free_list = queue_free_list->next; - erts_smp_spin_unlock(&qs_lock); - } - else { - erts_smp_spin_unlock(&qs_lock); - qs = erts_alloc(ERTS_ALC_T_PROC_LCK_QS, - sizeof(erts_proc_lock_queues_t)); - sys_memcpy((void *) qs, - (void *) &zeroqs, - sizeof(erts_proc_lock_queues_t)); - } - tse->udata = qs; -#if ERTS_PROC_LOCK_SPINLOCK_IMPL && !ERTS_PROC_LOCK_ATOMIC_IMPL - if (pix_lock) - erts_pix_lock(pix_lock); -#endif - } tse->uflgs = 0; return tse; } static ERTS_INLINE void -tse_return(erts_tse_t *tse, int force_free_q) +tse_return(erts_tse_t *tse) { CHECK_UNUSED_TSE(tse); - if (force_free_q || erts_tse_is_tmp(tse)) { - erts_proc_lock_queues_t *qs = tse->udata; - ASSERT(qs); - erts_smp_spin_lock(&qs_lock); - qs->next = queue_free_list; - queue_free_list = qs; - erts_smp_spin_unlock(&qs_lock); - tse->udata = NULL; - } erts_tse_return(tse); } void erts_proc_lock_prepare_proc_lock_waiter(void) { - tse_return(tse_fetch(NULL), 0); + tse_return(tse_fetch(NULL)); } @@ -231,55 +175,49 @@ static void cleanup_tse(void) { erts_tse_t *tse = erts_tse_fetch(); - if (tse) { - if (tse->udata) - tse_return(tse, 1); - else - erts_tse_return(tse); - } + if (tse) + erts_tse_return(tse); } /* * Waiters are queued in a circular double linked list; - * where qs->queue[lock_ix] is the first waiter in queue, and - * qs->queue[lock_ix]->prev is the last waiter in queue. + * where lck->queue[lock_ix] is the first waiter in queue, and + * lck->queue[lock_ix]->prev is the last waiter in queue. */ static ERTS_INLINE void -enqueue_waiter(erts_proc_lock_queues_t *qs, - int ix, - erts_tse_t *wtr) +enqueue_waiter(erts_proc_lock_t *lck, int ix, erts_tse_t *wtr) { - if (!qs->queue[ix]) { - qs->queue[ix] = wtr; + if (!lck->queue[ix]) { + lck->queue[ix] = wtr; wtr->next = wtr; wtr->prev = wtr; } else { - ERTS_LC_ASSERT(qs->queue[ix]->next && qs->queue[ix]->prev); - wtr->next = qs->queue[ix]; - wtr->prev = qs->queue[ix]->prev; + ERTS_LC_ASSERT(lck->queue[ix]->next && lck->queue[ix]->prev); + wtr->next = lck->queue[ix]; + wtr->prev = lck->queue[ix]->prev; wtr->prev->next = wtr; - qs->queue[ix]->prev = wtr; + lck->queue[ix]->prev = wtr; } } static erts_tse_t * -dequeue_waiter(erts_proc_lock_queues_t *qs, int ix) +dequeue_waiter(erts_proc_lock_t *lck, int ix) { - erts_tse_t *wtr = qs->queue[ix]; - ERTS_LC_ASSERT(qs->queue[ix]); + erts_tse_t *wtr = lck->queue[ix]; + ERTS_LC_ASSERT(lck->queue[ix]); if (wtr->next == wtr) { - ERTS_LC_ASSERT(qs->queue[ix]->prev == wtr); - qs->queue[ix] = NULL; + ERTS_LC_ASSERT(lck->queue[ix]->prev == wtr); + lck->queue[ix] = NULL; } else { ERTS_LC_ASSERT(wtr->next != wtr); ERTS_LC_ASSERT(wtr->prev != wtr); wtr->next->prev = wtr->prev; wtr->prev->next = wtr->next; - qs->queue[ix] = wtr->next; + lck->queue[ix] = wtr->next; } return wtr; } @@ -300,19 +238,18 @@ try_aquire(erts_proc_lock_t *lck, erts_tse_t *wtr) ErtsProcLocks locks = wtr->uflgs; int lock_no; - ERTS_LC_ASSERT(lck->queues); ERTS_LC_ASSERT(got_locks != locks); for (lock_no = 0; lock_no <= ERTS_PROC_LOCK_MAX_BIT; lock_no++) { ErtsProcLocks lock = ((ErtsProcLocks) 1) << lock_no; if (locks & lock) { ErtsProcLocks wflg, old_lflgs; - if (lck->queues->queue[lock_no]) { + if (lck->queue[lock_no]) { /* Others already waiting */ enqueue: ERTS_LC_ASSERT(ERTS_PROC_LOCK_FLGS_READ_(lck) & (lock << ERTS_PROC_LOCK_WAITER_SHIFT)); - enqueue_waiter(lck->queues, lock_no, wtr); + enqueue_waiter(lck, lock_no, wtr); break; } wflg = lock << ERTS_PROC_LOCK_WAITER_SHIFT; @@ -364,7 +301,6 @@ transfer_locks(Process *p, for (lock_no = 0; tlocks && lock_no <= ERTS_PROC_LOCK_MAX_BIT; lock_no++) { ErtsProcLocks lock = ((ErtsProcLocks) 1) << lock_no; if (tlocks & lock) { - erts_proc_lock_queues_t *qs = p->lock.queues; /* Transfer lock */ #ifdef ERTS_ENABLE_LOCK_CHECK tlocks &= ~lock; @@ -372,9 +308,9 @@ transfer_locks(Process *p, ERTS_LC_ASSERT(ERTS_PROC_LOCK_FLGS_READ_(&p->lock) & (lock << ERTS_PROC_LOCK_WAITER_SHIFT)); transferred++; - wtr = dequeue_waiter(qs, lock_no); + wtr = dequeue_waiter(&p->lock, lock_no); ERTS_LC_ASSERT(wtr); - if (!qs->queue[lock_no]) + if (!p->lock.queue[lock_no]) unset_waiter |= lock; ERTS_LC_ASSERT(wtr->uflgs & lock); wtr->uflgs &= ~lock; @@ -463,7 +399,6 @@ wait_for_locks(Process *p, { erts_pix_lock_t *pix_lock = pixlck ? pixlck : ERTS_PID2PIXLOCK(p->id); erts_tse_t *wtr; - erts_proc_lock_queues_t *qs; /* Acquire a waiter object on which this thread can wait. */ wtr = tse_fetch(pix_lock); @@ -479,18 +414,6 @@ wait_for_locks(Process *p, ERTS_LC_ASSERT(erts_lc_pix_lock_is_locked(pix_lock)); - qs = wtr->udata; - ASSERT(qs); - /* Provide the process with waiter queues, if it doesn't have one. */ - if (!p->lock.queues) { - qs->next = NULL; - p->lock.queues = qs; - } - else { - qs->next = p->lock.queues->next; - p->lock.queues->next = qs; - } - #ifdef ERTS_PROC_LOCK_HARD_DEBUG check_queue(&p->lock); #endif @@ -504,7 +427,9 @@ wait_for_locks(Process *p, check_queue(&p->lock); #endif - if (wtr->uflgs) { + if (wtr->uflgs == 0) + erts_pix_unlock(pix_lock); + else { /* We didn't get them all; need to wait... */ ASSERT((wtr->uflgs & ~ERTS_PROC_LOCKS_ALL) == 0); @@ -529,28 +454,12 @@ wait_for_locks(Process *p, } while (res != 0); } - erts_pix_lock(pix_lock); - ASSERT(wtr->uflgs == 0); } - /* Recover some queues to store in the waiter. */ - ERTS_LC_ASSERT(p->lock.queues); - if (p->lock.queues->next) { - qs = p->lock.queues->next; - p->lock.queues->next = qs->next; - } - else { - qs = p->lock.queues; - p->lock.queues = NULL; - } - wtr->udata = qs; - - erts_pix_unlock(pix_lock); - ERTS_LC_ASSERT(locks == (ERTS_PROC_LOCK_FLGS_READ_(&p->lock) & locks)); - tse_return(wtr, 0); + tse_return(wtr); } /* @@ -971,6 +880,7 @@ erts_pid2proc_safelock(Process *c_p, void erts_proc_lock_init(Process *p) { + int i; /* We always start with all locks locked */ #if ERTS_PROC_LOCK_ATOMIC_IMPL erts_smp_atomic32_init_nob(&p->lock.flags, @@ -978,7 +888,8 @@ erts_proc_lock_init(Process *p) #else p->lock.flags = ERTS_PROC_LOCKS_ALL; #endif - p->lock.queues = NULL; + for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++) + p->lock.queue[i] = NULL; p->lock.refc = 1; #ifdef ERTS_ENABLE_LOCK_COUNT erts_lcnt_proc_lock_init(p); @@ -990,11 +901,8 @@ erts_proc_lock_init(Process *p) erts_proc_lc_trylock(p, ERTS_PROC_LOCKS_ALL, 1); #endif #ifdef ERTS_PROC_LOCK_DEBUG - { - int i; - for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++) - erts_smp_atomic32_init_nob(&p->lock.locked[i], (erts_aint32_t) 1); - } + for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++) + erts_smp_atomic32_init_nob(&p->lock.locked[i], (erts_aint32_t) 1); #endif } @@ -1463,21 +1371,21 @@ check_queue(erts_proc_lock_t *lck) if (lflgs & wtr) { int n; erts_tse_t *wtr; - ERTS_LC_ASSERT(lck->queues && lck->queues->queue[lock_no]); - wtr = lck->queues->queue[lock_no]; + ERTS_LC_ASSERT(lck->queue[lock_no]); + wtr = lck->queue[lock_no]; n = 0; do { wtr = wtr->next; n++; - } while (wtr != lck->queues->queue[lock_no]); + } while (wtr != lck->queue[lock_no]); do { wtr = wtr->prev; n--; - } while (wtr != lck->queues->queue[lock_no]); + } while (wtr != lck->queue[lock_no]); ERTS_LC_ASSERT(n == 0); } else { - ERTS_LC_ASSERT(!lck->queues || !lck->queues->queue[lock_no]); + ERTS_LC_ASSERT(!lck->queue[lock_no]); } } } diff --git a/erts/emulator/beam/erl_process_lock.h b/erts/emulator/beam/erl_process_lock.h index 413c45480c..290084d8ca 100644 --- a/erts/emulator/beam/erl_process_lock.h +++ b/erts/emulator/beam/erl_process_lock.h @@ -56,15 +56,13 @@ typedef erts_aint32_t ErtsProcLocks; -typedef struct erts_proc_lock_queues_t_ erts_proc_lock_queues_t; - typedef struct erts_proc_lock_t_ { #if ERTS_PROC_LOCK_ATOMIC_IMPL erts_smp_atomic32_t flags; #else ErtsProcLocks flags; #endif - erts_proc_lock_queues_t *queues; + erts_tse_t *queue[ERTS_PROC_LOCK_MAX_BIT+1]; Sint32 refc; #ifdef ERTS_PROC_LOCK_DEBUG erts_smp_atomic32_t locked[ERTS_PROC_LOCK_MAX_BIT+1]; diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c index 37b186abd9..a490aec734 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.c +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c @@ -116,54 +116,84 @@ erts_sspa_create(size_t blk_sz, int pa_size) return data; } -static ERTS_INLINE erts_aint_t +static ERTS_INLINE void enqueue_remote_managed_thread(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *this, - int want_last) + int cinit) { - erts_aint_t ilast, itmp; + erts_aint_t itmp; + erts_sspa_blk_t *enq; erts_atomic_init_nob(&this->next_atmc, ERTS_AINT_NULL); - /* Enqueue at end of list... */ - ilast = erts_atomic_read_nob(&chdr->tail.data.last); - while (1) { - erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast; - itmp = erts_atomic_cmpxchg_mb(&last->next_atmc, - (erts_aint_t) this, - ERTS_AINT_NULL); - if (itmp == ERTS_AINT_NULL) - break; - ilast = itmp; + enq = (erts_sspa_blk_t *) erts_atomic_read_nob(&chdr->tail.data.last); + itmp = erts_atomic_cmpxchg_relb(&enq->next_atmc, + (erts_aint_t) this, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + /* We are required to move last pointer */ +#ifdef DEBUG + ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->next_atmc)); + ASSERT(((erts_aint_t) enq) + == erts_atomic_xchg_relb(&chdr->tail.data.last, + (erts_aint_t) this)); +#else + erts_atomic_set_relb(&chdr->tail.data.last, (erts_aint_t) this); +#endif } + else { + /* + * We *need* to insert element somewhere in between the + * last element we read earlier and the actual last element. + */ + int i = cinit; - /* Move last pointer forward... */ - while (1) { - erts_aint_t itmp; - if (want_last) { - if (erts_atomic_read_rb(&this->next_atmc) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return erts_atomic_read_nob(&chdr->tail.data.last); + while (1) { + erts_aint_t itmp2; + erts_atomic_set_nob(&this->next_atmc, itmp); + itmp2 = erts_atomic_cmpxchg_relb(&enq->next_atmc, + (erts_aint_t) this, + itmp); + if (itmp == itmp2) + break; /* inserted this */ + if ((i & 1) == 0) + itmp = itmp2; + else { + enq = (erts_sspa_blk_t *) itmp; + itmp = erts_atomic_read_acqb(&enq->next_atmc); + ASSERT(itmp != ERTS_AINT_NULL); } + i++; } - else { - if (erts_atomic_read_nob(&this->next_atmc) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return ERTS_AINT_NULL; - } + } +} + +static ERTS_INLINE erts_aint_t +check_insert_marker(erts_sspa_chunk_header_t *chdr, erts_aint_t ilast) +{ + if (!chdr->head.used_marker + && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) { + erts_aint_t itmp; + erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast; + + erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL); + itmp = erts_atomic_cmpxchg_relb(&last->next_atmc, + (erts_aint_t) &chdr->tail.data.marker, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + ilast = (erts_aint_t) &chdr->tail.data.marker; + chdr->head.used_marker = !0; + erts_atomic_set_relb(&chdr->tail.data.last, ilast); } - itmp = erts_atomic_cmpxchg_mb(&chdr->tail.data.last, - (erts_aint_t) this, - ilast); - if (ilast == itmp) - return want_last ? (erts_aint_t) this : ERTS_AINT_NULL; - ilast = itmp; } + return ilast; } void -erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk) +erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, + erts_sspa_blk_t *blk, + int cinit) { int um_refc_ix = 0; int managed_thread = erts_thr_progress_is_managed_thread(); @@ -180,7 +210,7 @@ erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk) } } - (void) enqueue_remote_managed_thread(chdr, blk, 0); + enqueue_remote_managed_thread(chdr, blk, cinit); if (!managed_thread) erts_atomic_dec_relb(&chdr->tail.data.um_refc[um_refc_ix]); @@ -208,24 +238,17 @@ fetch_remote(erts_sspa_chunk_header_t *chdr, int max) int um_refc_ix; chdr->head.next.thr_progress_reached = 1; um_refc_ix = chdr->head.next.um_refc_ix; - if (erts_atomic_read_acqb(&chdr->tail.data.um_refc[um_refc_ix]) == 0) { + if (erts_atomic_read_nob(&chdr->tail.data.um_refc[um_refc_ix]) == 0) { + + ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore); /* Move unreferenced end pointer forward... */ chdr->head.unref_end = chdr->head.next.unref_end; - if (!chdr->head.used_marker - && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) { - /* Need to equeue marker */ - chdr->head.used_marker = 1; - ilast = enqueue_remote_managed_thread(chdr, - &chdr->tail.data.marker, - 1); - } + ilast = check_insert_marker(chdr, ilast); - if (chdr->head.unref_end == (erts_sspa_blk_t *) ilast) - ERTS_THR_MEMORY_BARRIER; - else { + if (chdr->head.unref_end != (erts_sspa_blk_t *) ilast) { chdr->head.next.unref_end = (erts_sspa_blk_t *) ilast; chdr->head.next.thr_progress = erts_thr_progress_later(NULL); erts_atomic32_set_relb(&chdr->tail.data.um_refc_ix, diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h index d36066c399..bccb1aba7a 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h @@ -142,7 +142,8 @@ check_local_list(erts_sspa_chunk_header_t *chdr) erts_sspa_data_t *erts_sspa_create(size_t blk_sz, int pa_size); void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, - erts_sspa_blk_t *blk); + erts_sspa_blk_t *blk, + int cinit); erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *old_res); @@ -216,7 +217,7 @@ erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk) chdr = &chnk->aligned.header; if (chnk_cix != cix) { /* Remote chunk */ - erts_sspa_remote_free(chdr, blk); + erts_sspa_remote_free(chdr, blk, chnk_cix - cix); } else { /* Local chunk */ |