From 146d7cb4821a9c2c864b91c78de2e58f79918c63 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Thu, 19 Jul 2012 22:54:09 +0200 Subject: Improve the enqueue operation of delayed dealloc The enqueue operation have been re-written to behave better during heavy contention by spreading writes over multiple locations. This enqueue operation also take advantage of the delayed aux work wake up functionality and can by this omit one memory barrier. --- erts/emulator/beam/erl_alloc_util.c | 129 +++++++++++++++++--------- erts/emulator/beam/erl_sched_spec_pre_alloc.c | 113 +++++++++++++--------- erts/emulator/beam/erl_sched_spec_pre_alloc.h | 5 +- 3 files changed, 158 insertions(+), 89 deletions(-) (limited to 'erts/emulator') diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index a0abd1c405..97ba306a79 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -829,46 +829,83 @@ init_dd_queue(ErtsAllctrDDQueue_t *ddq) ddq->head.used_marker = 1; } -static ERTS_INLINE erts_aint_t -ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr) +static ERTS_INLINE int +ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) { - erts_aint_t ilast, itmp; - ErtsAllctrDDBlock_t *this = ptr; + erts_aint_t itmp; + ErtsAllctrDDBlock_t *enq, *this = ptr; erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL); - /* Enqueue at end of list... */ - ilast = erts_atomic_read_nob(&ddq->tail.data.last); - while (1) { - ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast; - itmp = erts_atomic_cmpxchg_mb(&last->atmc_next, - (erts_aint_t) this, - ERTS_AINT_NULL); - if (itmp == ERTS_AINT_NULL) - break; - ilast = itmp; + enq = (ErtsAllctrDDBlock_t *) erts_atomic_read_nob(&ddq->tail.data.last); + itmp = erts_atomic_cmpxchg_relb(&enq->atmc_next, + (erts_aint_t) this, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + /* We are required to move last pointer */ +#ifdef DEBUG + ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->atmc_next)); + ASSERT(((erts_aint_t) enq) + == erts_atomic_xchg_relb(&ddq->tail.data.last, + (erts_aint_t) this)); +#else + erts_atomic_set_relb(&ddq->tail.data.last, (erts_aint_t) this); +#endif + return 1; } + else { + /* + * We *need* to insert element somewhere in between the + * last element we read earlier and the actual last element. + */ + int i = cinit; - /* Move last pointer forward... */ - while (1) { - if (erts_atomic_read_rb(&this->atmc_next) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return erts_atomic_read_rb(&ddq->tail.data.last); + while (1) { + erts_aint_t itmp2; + erts_atomic_set_nob(&this->atmc_next, itmp); + itmp2 = erts_atomic_cmpxchg_relb(&enq->atmc_next, + (erts_aint_t) this, + itmp); + if (itmp == itmp2) + return 0; /* inserted this */ + if ((i & 1) == 0) + itmp = itmp2; + else { + enq = (ErtsAllctrDDBlock_t *) itmp2; + itmp = erts_atomic_read_acqb(&enq->atmc_next); + ASSERT(itmp != ERTS_AINT_NULL); + } + i++; } - itmp = erts_atomic_cmpxchg_mb(&ddq->tail.data.last, - (erts_aint_t) this, - ilast); - if (ilast == itmp) - return (erts_aint_t) this; - ilast = itmp; } } +static ERTS_INLINE erts_aint_t +check_insert_marker(ErtsAllctrDDQueue_t *ddq, erts_aint_t ilast) +{ + if (!ddq->head.used_marker + && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) { + erts_aint_t itmp; + ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast; + + erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL); + itmp = erts_atomic_cmpxchg_relb(&last->atmc_next, + (erts_aint_t) &ddq->tail.data.marker, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + ilast = (erts_aint_t) &ddq->tail.data.marker; + ddq->head.used_marker = !0; + erts_atomic_set_relb(&ddq->tail.data.last, ilast); + } + } + return ilast; +} + static ERTS_INLINE int -ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr) +ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) { - erts_aint_t ilast; + int last_elem; int um_refc_ix = 0; int managed_thread = erts_thr_progress_is_managed_thread(); if (!managed_thread) { @@ -884,11 +921,11 @@ ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr) } } - ilast = ddq_managed_thread_enqueue(ddq, ptr); + last_elem = ddq_managed_thread_enqueue(ddq, ptr, cinit); if (!managed_thread) erts_atomic_dec_relb(&ddq->tail.data.um_refc[um_refc_ix]); - return ilast == (erts_aint_t) ptr; + return last_elem; } static ERTS_INLINE void * @@ -934,20 +971,16 @@ ddq_check_incoming(ErtsAllctrDDQueue_t *ddq) int um_refc_ix; ddq->head.next.thr_progress_reached = 1; um_refc_ix = ddq->head.next.um_refc_ix; - if (erts_atomic_read_acqb(&ddq->tail.data.um_refc[um_refc_ix]) == 0) { + if (erts_atomic_read_nob(&ddq->tail.data.um_refc[um_refc_ix]) == 0) { /* Move unreferenced end pointer forward... */ + ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore); + ddq->head.unref_end = ddq->head.next.unref_end; - if (!ddq->head.used_marker - && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) { - ddq->head.used_marker = 1; - ilast = ddq_managed_thread_enqueue(ddq, &ddq->tail.data.marker); - } + ilast = check_insert_marker(ddq, ilast); - if (ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) - ERTS_THR_MEMORY_BARRIER; - else { + if (ddq->head.unref_end != (ErtsAllctrDDBlock_t *) ilast) { ddq->head.next.unref_end = (ErtsAllctrDDBlock_t *) ilast; ddq->head.next.thr_progress = erts_thr_progress_later(NULL); erts_atomic32_set_relb(&ddq->tail.data.um_refc_ix, @@ -1092,12 +1125,15 @@ handle_delayed_dealloc(Allctr_t *allctr, } static ERTS_INLINE void -enqueue_dealloc_other_instance(ErtsAlcType_t type, Allctr_t *allctr, void *ptr) +enqueue_dealloc_other_instance(ErtsAlcType_t type, + Allctr_t *allctr, + void *ptr, + int cinit) { if (allctr->fix) ((UWord *) ptr)[ERTS_ALCU_DD_FIX_TYPE_OFFS] = (UWord) type; - if (ddq_enqueue(type, &allctr->dd.q, ptr)) + if (ddq_enqueue(type, &allctr->dd.q, ptr, cinit)) erts_alloc_notify_delayed_dealloc(allctr->ix); } @@ -3613,7 +3649,11 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p) get_pref_allctr(extra, &pref_allctr); ptr = get_used_allctr(extra, p, &used_allctr, NULL); if (pref_allctr != used_allctr) - enqueue_dealloc_other_instance(type, used_allctr, ptr); + enqueue_dealloc_other_instance(type, + used_allctr, + ptr, + (used_allctr->dd.ix + - pref_allctr->dd.ix)); else { if (used_allctr->thread_safe) erts_mtx_lock(&used_allctr->mutex); @@ -3988,7 +4028,11 @@ realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size, sys_memcpy(res, p, cpy_size); if (!force_move || used_allctr != pref_allctr) - enqueue_dealloc_other_instance(type, used_allctr, ptr); + enqueue_dealloc_other_instance(type, + used_allctr, + ptr, + (used_allctr->dd.ix + - pref_allctr->dd.ix)); else { do_erts_alcu_free(type, used_allctr, ptr); ASSERT(pref_allctr == used_allctr); @@ -4179,6 +4223,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->dd.use = 1; init_dd_queue(&allctr->dd.q); + allctr->dd.ix = init->ix; } else #endif diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c index 37b186abd9..a490aec734 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.c +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c @@ -116,54 +116,84 @@ erts_sspa_create(size_t blk_sz, int pa_size) return data; } -static ERTS_INLINE erts_aint_t +static ERTS_INLINE void enqueue_remote_managed_thread(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *this, - int want_last) + int cinit) { - erts_aint_t ilast, itmp; + erts_aint_t itmp; + erts_sspa_blk_t *enq; erts_atomic_init_nob(&this->next_atmc, ERTS_AINT_NULL); - /* Enqueue at end of list... */ - ilast = erts_atomic_read_nob(&chdr->tail.data.last); - while (1) { - erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast; - itmp = erts_atomic_cmpxchg_mb(&last->next_atmc, - (erts_aint_t) this, - ERTS_AINT_NULL); - if (itmp == ERTS_AINT_NULL) - break; - ilast = itmp; + enq = (erts_sspa_blk_t *) erts_atomic_read_nob(&chdr->tail.data.last); + itmp = erts_atomic_cmpxchg_relb(&enq->next_atmc, + (erts_aint_t) this, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + /* We are required to move last pointer */ +#ifdef DEBUG + ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->next_atmc)); + ASSERT(((erts_aint_t) enq) + == erts_atomic_xchg_relb(&chdr->tail.data.last, + (erts_aint_t) this)); +#else + erts_atomic_set_relb(&chdr->tail.data.last, (erts_aint_t) this); +#endif } + else { + /* + * We *need* to insert element somewhere in between the + * last element we read earlier and the actual last element. + */ + int i = cinit; - /* Move last pointer forward... */ - while (1) { - erts_aint_t itmp; - if (want_last) { - if (erts_atomic_read_rb(&this->next_atmc) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return erts_atomic_read_nob(&chdr->tail.data.last); + while (1) { + erts_aint_t itmp2; + erts_atomic_set_nob(&this->next_atmc, itmp); + itmp2 = erts_atomic_cmpxchg_relb(&enq->next_atmc, + (erts_aint_t) this, + itmp); + if (itmp == itmp2) + break; /* inserted this */ + if ((i & 1) == 0) + itmp = itmp2; + else { + enq = (erts_sspa_blk_t *) itmp; + itmp = erts_atomic_read_acqb(&enq->next_atmc); + ASSERT(itmp != ERTS_AINT_NULL); } + i++; } - else { - if (erts_atomic_read_nob(&this->next_atmc) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return ERTS_AINT_NULL; - } + } +} + +static ERTS_INLINE erts_aint_t +check_insert_marker(erts_sspa_chunk_header_t *chdr, erts_aint_t ilast) +{ + if (!chdr->head.used_marker + && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) { + erts_aint_t itmp; + erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast; + + erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL); + itmp = erts_atomic_cmpxchg_relb(&last->next_atmc, + (erts_aint_t) &chdr->tail.data.marker, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + ilast = (erts_aint_t) &chdr->tail.data.marker; + chdr->head.used_marker = !0; + erts_atomic_set_relb(&chdr->tail.data.last, ilast); } - itmp = erts_atomic_cmpxchg_mb(&chdr->tail.data.last, - (erts_aint_t) this, - ilast); - if (ilast == itmp) - return want_last ? (erts_aint_t) this : ERTS_AINT_NULL; - ilast = itmp; } + return ilast; } void -erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk) +erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, + erts_sspa_blk_t *blk, + int cinit) { int um_refc_ix = 0; int managed_thread = erts_thr_progress_is_managed_thread(); @@ -180,7 +210,7 @@ erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk) } } - (void) enqueue_remote_managed_thread(chdr, blk, 0); + enqueue_remote_managed_thread(chdr, blk, cinit); if (!managed_thread) erts_atomic_dec_relb(&chdr->tail.data.um_refc[um_refc_ix]); @@ -208,24 +238,17 @@ fetch_remote(erts_sspa_chunk_header_t *chdr, int max) int um_refc_ix; chdr->head.next.thr_progress_reached = 1; um_refc_ix = chdr->head.next.um_refc_ix; - if (erts_atomic_read_acqb(&chdr->tail.data.um_refc[um_refc_ix]) == 0) { + if (erts_atomic_read_nob(&chdr->tail.data.um_refc[um_refc_ix]) == 0) { + + ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore); /* Move unreferenced end pointer forward... */ chdr->head.unref_end = chdr->head.next.unref_end; - if (!chdr->head.used_marker - && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) { - /* Need to equeue marker */ - chdr->head.used_marker = 1; - ilast = enqueue_remote_managed_thread(chdr, - &chdr->tail.data.marker, - 1); - } + ilast = check_insert_marker(chdr, ilast); - if (chdr->head.unref_end == (erts_sspa_blk_t *) ilast) - ERTS_THR_MEMORY_BARRIER; - else { + if (chdr->head.unref_end != (erts_sspa_blk_t *) ilast) { chdr->head.next.unref_end = (erts_sspa_blk_t *) ilast; chdr->head.next.thr_progress = erts_thr_progress_later(NULL); erts_atomic32_set_relb(&chdr->tail.data.um_refc_ix, diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h index d36066c399..bccb1aba7a 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h @@ -142,7 +142,8 @@ check_local_list(erts_sspa_chunk_header_t *chdr) erts_sspa_data_t *erts_sspa_create(size_t blk_sz, int pa_size); void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, - erts_sspa_blk_t *blk); + erts_sspa_blk_t *blk, + int cinit); erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *old_res); @@ -216,7 +217,7 @@ erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk) chdr = &chnk->aligned.header; if (chnk_cix != cix) { /* Remote chunk */ - erts_sspa_remote_free(chdr, blk); + erts_sspa_remote_free(chdr, blk, chnk_cix - cix); } else { /* Local chunk */ -- cgit v1.2.3