diff options
author | Rickard Green <[email protected]> | 2012-07-31 21:58:48 +0200 |
---|---|---|
committer | Rickard Green <[email protected]> | 2012-07-31 21:58:48 +0200 |
commit | a67d0be885529844b3c14102604a38f556e70ecc (patch) | |
tree | 0a5a01ca22d7d2d1c7ac904ba6abbcb8979d1296 /erts/emulator/beam | |
parent | 75aa1216d65756e4a93098b481bbe0a468230b7a (diff) | |
parent | 020f7cb45b6a3e5ff8af55cc58c7fd0fa0b447c7 (diff) | |
download | otp-a67d0be885529844b3c14102604a38f556e70ecc.tar.gz otp-a67d0be885529844b3c14102604a38f556e70ecc.tar.bz2 otp-a67d0be885529844b3c14102604a38f556e70ecc.zip |
Merge branch 'maint'
* maint:
Improve the enqueue operation of delayed dealloc
Implement delayed aux work wake up
Conflicts:
erts/emulator/beam/erl_alloc_util.c
erts/emulator/beam/erl_process.h
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r-- | erts/emulator/beam/erl_alloc_util.c | 130 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 92 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.h | 42 | ||||
-rw-r--r-- | erts/emulator/beam/erl_sched_spec_pre_alloc.c | 113 | ||||
-rw-r--r-- | erts/emulator/beam/erl_sched_spec_pre_alloc.h | 5 |
5 files changed, 268 insertions, 114 deletions
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index 9a011e2adc..97ba306a79 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -829,49 +829,83 @@ init_dd_queue(ErtsAllctrDDQueue_t *ddq) ddq->head.used_marker = 1; } -static ERTS_INLINE erts_aint_t -ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr) +static ERTS_INLINE int +ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) { - erts_aint_t first_ilast, ilast, itmp; - ErtsAllctrDDBlock_t *this = ptr; + erts_aint_t itmp; + ErtsAllctrDDBlock_t *enq, *this = ptr; erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL); - /* Enqueue at end of list... */ - first_ilast = ilast = erts_atomic_read_nob(&ddq->tail.data.last); - while (1) { - ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast; - itmp = erts_atomic_cmpxchg_mb(&last->atmc_next, - (erts_aint_t) this, - ERTS_AINT_NULL); - if (itmp == ERTS_AINT_NULL) - break; - ilast = itmp; + enq = (ErtsAllctrDDBlock_t *) erts_atomic_read_nob(&ddq->tail.data.last); + itmp = erts_atomic_cmpxchg_relb(&enq->atmc_next, + (erts_aint_t) this, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + /* We are required to move last pointer */ +#ifdef DEBUG + ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->atmc_next)); + ASSERT(((erts_aint_t) enq) + == erts_atomic_xchg_relb(&ddq->tail.data.last, + (erts_aint_t) this)); +#else + erts_atomic_set_relb(&ddq->tail.data.last, (erts_aint_t) this); +#endif + return 1; } + else { + /* + * We *need* to insert element somewhere in between the + * last element we read earlier and the actual last element. + */ + int i = cinit; - /* Move last pointer forward... */ - while (1) { - if (erts_atomic_read_rb(&this->atmc_next) != ERTS_AINT_NULL) { - ilast = erts_atomic_read_rb(&ddq->tail.data.last); - if (first_ilast != ilast) { - /* Someone else will move it forward */ - return ilast; + while (1) { + erts_aint_t itmp2; + erts_atomic_set_nob(&this->atmc_next, itmp); + itmp2 = erts_atomic_cmpxchg_relb(&enq->atmc_next, + (erts_aint_t) this, + itmp); + if (itmp == itmp2) + return 0; /* inserted this */ + if ((i & 1) == 0) + itmp = itmp2; + else { + enq = (ErtsAllctrDDBlock_t *) itmp2; + itmp = erts_atomic_read_acqb(&enq->atmc_next); + ASSERT(itmp != ERTS_AINT_NULL); } + i++; } - itmp = erts_atomic_cmpxchg_mb(&ddq->tail.data.last, - (erts_aint_t) this, - ilast); - if (ilast == itmp) - return (erts_aint_t) this; - ilast = itmp; } } +static ERTS_INLINE erts_aint_t +check_insert_marker(ErtsAllctrDDQueue_t *ddq, erts_aint_t ilast) +{ + if (!ddq->head.used_marker + && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) { + erts_aint_t itmp; + ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast; + + erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL); + itmp = erts_atomic_cmpxchg_relb(&last->atmc_next, + (erts_aint_t) &ddq->tail.data.marker, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + ilast = (erts_aint_t) &ddq->tail.data.marker; + ddq->head.used_marker = !0; + erts_atomic_set_relb(&ddq->tail.data.last, ilast); + } + } + return ilast; +} + static ERTS_INLINE int -ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr) +ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) { - erts_aint_t ilast; + int last_elem; int um_refc_ix = 0; int managed_thread = erts_thr_progress_is_managed_thread(); if (!managed_thread) { @@ -887,11 +921,11 @@ ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr) } } - ilast = ddq_managed_thread_enqueue(ddq, ptr); + last_elem = ddq_managed_thread_enqueue(ddq, ptr, cinit); if (!managed_thread) erts_atomic_dec_relb(&ddq->tail.data.um_refc[um_refc_ix]); - return ilast == (erts_aint_t) ptr; + return last_elem; } static ERTS_INLINE void * @@ -937,20 +971,16 @@ ddq_check_incoming(ErtsAllctrDDQueue_t *ddq) int um_refc_ix; ddq->head.next.thr_progress_reached = 1; um_refc_ix = ddq->head.next.um_refc_ix; - if (erts_atomic_read_acqb(&ddq->tail.data.um_refc[um_refc_ix]) == 0) { + if (erts_atomic_read_nob(&ddq->tail.data.um_refc[um_refc_ix]) == 0) { /* Move unreferenced end pointer forward... */ + ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore); + ddq->head.unref_end = ddq->head.next.unref_end; - if (!ddq->head.used_marker - && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) { - ddq->head.used_marker = 1; - ilast = ddq_managed_thread_enqueue(ddq, &ddq->tail.data.marker); - } + ilast = check_insert_marker(ddq, ilast); - if (ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) - ERTS_THR_MEMORY_BARRIER; - else { + if (ddq->head.unref_end != (ErtsAllctrDDBlock_t *) ilast) { ddq->head.next.unref_end = (ErtsAllctrDDBlock_t *) ilast; ddq->head.next.thr_progress = erts_thr_progress_later(NULL); erts_atomic32_set_relb(&ddq->tail.data.um_refc_ix, @@ -1095,12 +1125,15 @@ handle_delayed_dealloc(Allctr_t *allctr, } static ERTS_INLINE void -enqueue_dealloc_other_instance(ErtsAlcType_t type, Allctr_t *allctr, void *ptr) +enqueue_dealloc_other_instance(ErtsAlcType_t type, + Allctr_t *allctr, + void *ptr, + int cinit) { if (allctr->fix) ((UWord *) ptr)[ERTS_ALCU_DD_FIX_TYPE_OFFS] = (UWord) type; - if (ddq_enqueue(type, &allctr->dd.q, ptr)) + if (ddq_enqueue(type, &allctr->dd.q, ptr, cinit)) erts_alloc_notify_delayed_dealloc(allctr->ix); } @@ -3616,7 +3649,11 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p) get_pref_allctr(extra, &pref_allctr); ptr = get_used_allctr(extra, p, &used_allctr, NULL); if (pref_allctr != used_allctr) - enqueue_dealloc_other_instance(type, used_allctr, ptr); + enqueue_dealloc_other_instance(type, + used_allctr, + ptr, + (used_allctr->dd.ix + - pref_allctr->dd.ix)); else { if (used_allctr->thread_safe) erts_mtx_lock(&used_allctr->mutex); @@ -3991,7 +4028,11 @@ realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size, sys_memcpy(res, p, cpy_size); if (!force_move || used_allctr != pref_allctr) - enqueue_dealloc_other_instance(type, used_allctr, ptr); + enqueue_dealloc_other_instance(type, + used_allctr, + ptr, + (used_allctr->dd.ix + - pref_allctr->dd.ix)); else { do_erts_alcu_free(type, used_allctr, ptr); ASSERT(pref_allctr == used_allctr); @@ -4182,6 +4223,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->dd.use = 1; init_dd_queue(&allctr->dd.q); + allctr->dd.ix = init->ix; } else #endif diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index e62556ce72..afd2ddc69d 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -518,6 +518,7 @@ dbg_chk_aux_work_val(erts_aint32_t value) valid |= ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN; #endif #ifdef ERTS_SMP + valid |= ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP; valid |= ERTS_SSI_AUX_WORK_MISC_THR_PRGR; valid |= ERTS_SSI_AUX_WORK_DD; valid |= ERTS_SSI_AUX_WORK_DD_THR_PRGR; @@ -1177,6 +1178,45 @@ haw_thr_prgr_current_check_progress(ErtsAuxWorkData *awdp) } } +static ERTS_INLINE erts_aint32_t +handle_delayed_aux_work_wakeup(ErtsAuxWorkData *awdp, erts_aint32_t aux_work) +{ + int jix, max_jix; + unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP); + + ERTS_THR_MEMORY_BARRIER; + + max_jix = awdp->delayed_wakeup.jix; + awdp->delayed_wakeup.jix = -1; + for (jix = 0; jix <= max_jix; jix++) { + int sched = awdp->delayed_wakeup.job[jix].sched; + erts_aint32_t aux_work = awdp->delayed_wakeup.job[jix].aux_work; + + ASSERT(awdp->delayed_wakeup.sched2jix[sched] == jix); + awdp->delayed_wakeup.sched2jix[sched] = -1; + set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(sched-1), + aux_work); + } + return aux_work & ~ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP; +} + +static ERTS_INLINE void +schedule_aux_work_wakeup(ErtsAuxWorkData *awdp, int sched, erts_aint32_t aux_work) +{ + int jix = awdp->delayed_wakeup.sched2jix[sched]; + if (jix >= 0) { + ASSERT(awdp->delayed_wakeup.job[jix].sched == sched); + awdp->delayed_wakeup.job[jix].aux_work |= aux_work; + } + else { + jix = ++awdp->delayed_wakeup.jix; + awdp->delayed_wakeup.sched2jix[sched] = jix; + awdp->delayed_wakeup.job[jix].sched = sched; + awdp->delayed_wakeup.job[jix].aux_work = aux_work; + } + set_aux_work_flags_wakeup_nob(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP); +} + #endif typedef struct erts_misc_aux_work_t_ erts_misc_aux_work_t; @@ -1520,8 +1560,14 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work) void erts_alloc_notify_delayed_dealloc(int ix) { - set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1), - ERTS_SSI_AUX_WORK_DD); + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + if (esdp) + schedule_aux_work_wakeup(&esdp->aux_work_data, + ix, + ERTS_SSI_AUX_WORK_DD); + else + set_aux_work_flags_wakeup_relb(ERTS_SCHED_SLEEP_INFO_IX(ix-1), + ERTS_SSI_AUX_WORK_DD); } static ERTS_INLINE erts_aint32_t @@ -1819,6 +1865,8 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting) * eachother. Most frequent first. */ #ifdef ERTS_SMP + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP, + handle_delayed_aux_work_wakeup); HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DD, handle_delayed_dealloc); /* DD must be before DD_THR_PRGR */ @@ -2305,7 +2353,7 @@ thr_prgr_fin_wait(void *vssi) | ERTS_SSI_FLG_TSE_SLEEPING)); } -static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp); +static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp); static void * aux_thread(void *unused) @@ -2325,7 +2373,7 @@ aux_thread(void *unused) callbacks.finalize_wait = thr_prgr_fin_wait; erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); - init_aux_work_data(awdp, NULL); + init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; sched_prep_spin_wait(ssi); @@ -4538,7 +4586,7 @@ erts_sched_set_busy_wait_threshold(char *str) return 0; } static void -init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp) +init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) { awdp->sched_id = esdp ? (int) esdp->no : 0; awdp->esdp = esdp; @@ -4556,12 +4604,32 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp) #endif awdp->async_ready.queue = NULL; #endif +#ifdef ERTS_SMP + if (!dawwp) { + awdp->delayed_wakeup.job = NULL; + awdp->delayed_wakeup.sched2jix = NULL; + awdp->delayed_wakeup.jix = -1; + } + else { + int i; + awdp->delayed_wakeup.job = (ErtsDelayedAuxWorkWakeupJob *) dawwp; + dawwp += sizeof(ErtsDelayedAuxWorkWakeupJob)*(erts_no_schedulers+1); + awdp->delayed_wakeup.sched2jix = (int *) dawwp; + awdp->delayed_wakeup.jix = -1; + for (i = 0; i <= erts_no_schedulers; i++) + awdp->delayed_wakeup.sched2jix[i] = -1; + } +#endif } void erts_init_scheduling(int no_schedulers, int no_schedulers_online) { int ix, n, no_ssi; + char *daww_ptr; +#ifdef ERTS_SMP + size_t daww_sz; +#endif init_misc_op_list_alloc(); @@ -4684,6 +4752,15 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) /* Create and initialize scheduler specific data */ +#ifdef ERTS_SMP + daww_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE((sizeof(ErtsDelayedAuxWorkWakeupJob) + + sizeof(int))*(n+1)); + daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, + daww_sz*n); +#else + daww_ptr = NULL; +#endif + erts_aligned_scheduler_data = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, n*sizeof(ErtsAlignedSchedulerData)); @@ -4718,7 +4795,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) esdp->run_queue = ERTS_RUNQ_IX(ix); esdp->run_queue->scheduler = esdp; - init_aux_work_data(&esdp->aux_work_data, esdp); + init_aux_work_data(&esdp->aux_work_data, esdp, daww_ptr); +#ifdef ERTS_SMP + daww_ptr += daww_sz; +#endif init_sched_wall_time(&esdp->sched_wall_time); } diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 93e71681da..b8b137f413 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -270,20 +270,21 @@ typedef enum { * eachother. Most frequent - lowest bit number. */ -#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 0) -#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 1) -#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 2) -#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 3) -#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 4) -#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 5) -#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 6) -#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 7) -#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 8) -#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 9) -#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 10) -#define ERTS_SSI_AUX_WORK_CODE_IX_ACTIVATION (((erts_aint32_t) 1) << 11) -#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 12) -#define ERTS_SSI_AUX_WORK_FINISH_BP (((erts_aint32_t) 1) << 13) +#define ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP (((erts_aint32_t) 1) << 0) +#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 1) +#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 2) +#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 3) +#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 4) +#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 5) +#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 6) +#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 7) +#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 8) +#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 9) +#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 10) +#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 11) +#define ERTS_SSI_AUX_WORK_CODE_IX_ACTIVATION (((erts_aint32_t) 1) << 12) +#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 13) +#define ERTS_SSI_AUX_WORK_FINISH_BP (((erts_aint32_t) 1) << 14) typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo; @@ -439,6 +440,11 @@ typedef struct { } ErtsSchedWallTime; typedef struct { + int sched; + erts_aint32_t aux_work; +} ErtsDelayedAuxWorkWakeupJob; + +typedef struct { int sched_id; ErtsSchedulerData *esdp; ErtsSchedulerSleepInfo *ssi; @@ -472,12 +478,15 @@ typedef struct { Process* code_stager; ErtsThrPrgrVal thr_prgr; } code_ix_activation; -#endif -#ifdef ERTS_SMP struct { Process* stager; ErtsThrPrgrVal thr_prgr; } bp_ix_activation; + struct { + int *sched2jix; + int jix; + ErtsDelayedAuxWorkWakeupJob *job; + } delayed_wakeup; #endif } ErtsAuxWorkData; @@ -512,7 +521,6 @@ struct ErtsSchedulerData_ { int virtual_reds; int cpu_id; /* >= 0 when bound */ ErtsAuxWorkData aux_work_data; - ErtsAtomCacheMap atom_cache_map; ErtsSchedAllocData alloc_data; diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c index 37b186abd9..a490aec734 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.c +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c @@ -116,54 +116,84 @@ erts_sspa_create(size_t blk_sz, int pa_size) return data; } -static ERTS_INLINE erts_aint_t +static ERTS_INLINE void enqueue_remote_managed_thread(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *this, - int want_last) + int cinit) { - erts_aint_t ilast, itmp; + erts_aint_t itmp; + erts_sspa_blk_t *enq; erts_atomic_init_nob(&this->next_atmc, ERTS_AINT_NULL); - /* Enqueue at end of list... */ - ilast = erts_atomic_read_nob(&chdr->tail.data.last); - while (1) { - erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast; - itmp = erts_atomic_cmpxchg_mb(&last->next_atmc, - (erts_aint_t) this, - ERTS_AINT_NULL); - if (itmp == ERTS_AINT_NULL) - break; - ilast = itmp; + enq = (erts_sspa_blk_t *) erts_atomic_read_nob(&chdr->tail.data.last); + itmp = erts_atomic_cmpxchg_relb(&enq->next_atmc, + (erts_aint_t) this, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + /* We are required to move last pointer */ +#ifdef DEBUG + ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->next_atmc)); + ASSERT(((erts_aint_t) enq) + == erts_atomic_xchg_relb(&chdr->tail.data.last, + (erts_aint_t) this)); +#else + erts_atomic_set_relb(&chdr->tail.data.last, (erts_aint_t) this); +#endif } + else { + /* + * We *need* to insert element somewhere in between the + * last element we read earlier and the actual last element. + */ + int i = cinit; - /* Move last pointer forward... */ - while (1) { - erts_aint_t itmp; - if (want_last) { - if (erts_atomic_read_rb(&this->next_atmc) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return erts_atomic_read_nob(&chdr->tail.data.last); + while (1) { + erts_aint_t itmp2; + erts_atomic_set_nob(&this->next_atmc, itmp); + itmp2 = erts_atomic_cmpxchg_relb(&enq->next_atmc, + (erts_aint_t) this, + itmp); + if (itmp == itmp2) + break; /* inserted this */ + if ((i & 1) == 0) + itmp = itmp2; + else { + enq = (erts_sspa_blk_t *) itmp; + itmp = erts_atomic_read_acqb(&enq->next_atmc); + ASSERT(itmp != ERTS_AINT_NULL); } + i++; } - else { - if (erts_atomic_read_nob(&this->next_atmc) != ERTS_AINT_NULL) { - /* Someone else will move it forward */ - return ERTS_AINT_NULL; - } + } +} + +static ERTS_INLINE erts_aint_t +check_insert_marker(erts_sspa_chunk_header_t *chdr, erts_aint_t ilast) +{ + if (!chdr->head.used_marker + && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) { + erts_aint_t itmp; + erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast; + + erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL); + itmp = erts_atomic_cmpxchg_relb(&last->next_atmc, + (erts_aint_t) &chdr->tail.data.marker, + ERTS_AINT_NULL); + if (itmp == ERTS_AINT_NULL) { + ilast = (erts_aint_t) &chdr->tail.data.marker; + chdr->head.used_marker = !0; + erts_atomic_set_relb(&chdr->tail.data.last, ilast); } - itmp = erts_atomic_cmpxchg_mb(&chdr->tail.data.last, - (erts_aint_t) this, - ilast); - if (ilast == itmp) - return want_last ? (erts_aint_t) this : ERTS_AINT_NULL; - ilast = itmp; } + return ilast; } void -erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk) +erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, + erts_sspa_blk_t *blk, + int cinit) { int um_refc_ix = 0; int managed_thread = erts_thr_progress_is_managed_thread(); @@ -180,7 +210,7 @@ erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk) } } - (void) enqueue_remote_managed_thread(chdr, blk, 0); + enqueue_remote_managed_thread(chdr, blk, cinit); if (!managed_thread) erts_atomic_dec_relb(&chdr->tail.data.um_refc[um_refc_ix]); @@ -208,24 +238,17 @@ fetch_remote(erts_sspa_chunk_header_t *chdr, int max) int um_refc_ix; chdr->head.next.thr_progress_reached = 1; um_refc_ix = chdr->head.next.um_refc_ix; - if (erts_atomic_read_acqb(&chdr->tail.data.um_refc[um_refc_ix]) == 0) { + if (erts_atomic_read_nob(&chdr->tail.data.um_refc[um_refc_ix]) == 0) { + + ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore); /* Move unreferenced end pointer forward... */ chdr->head.unref_end = chdr->head.next.unref_end; - if (!chdr->head.used_marker - && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) { - /* Need to equeue marker */ - chdr->head.used_marker = 1; - ilast = enqueue_remote_managed_thread(chdr, - &chdr->tail.data.marker, - 1); - } + ilast = check_insert_marker(chdr, ilast); - if (chdr->head.unref_end == (erts_sspa_blk_t *) ilast) - ERTS_THR_MEMORY_BARRIER; - else { + if (chdr->head.unref_end != (erts_sspa_blk_t *) ilast) { chdr->head.next.unref_end = (erts_sspa_blk_t *) ilast; chdr->head.next.thr_progress = erts_thr_progress_later(NULL); erts_atomic32_set_relb(&chdr->tail.data.um_refc_ix, diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h index d36066c399..bccb1aba7a 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h @@ -142,7 +142,8 @@ check_local_list(erts_sspa_chunk_header_t *chdr) erts_sspa_data_t *erts_sspa_create(size_t blk_sz, int pa_size); void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, - erts_sspa_blk_t *blk); + erts_sspa_blk_t *blk, + int cinit); erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *old_res); @@ -216,7 +217,7 @@ erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk) chdr = &chnk->aligned.header; if (chnk_cix != cix) { /* Remote chunk */ - erts_sspa_remote_free(chdr, blk); + erts_sspa_remote_free(chdr, blk, chnk_cix - cix); } else { /* Local chunk */ |