aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2012-07-31 21:58:48 +0200
committerRickard Green <[email protected]>2012-07-31 21:58:48 +0200
commita67d0be885529844b3c14102604a38f556e70ecc (patch)
tree0a5a01ca22d7d2d1c7ac904ba6abbcb8979d1296 /erts/emulator/beam
parent75aa1216d65756e4a93098b481bbe0a468230b7a (diff)
parent020f7cb45b6a3e5ff8af55cc58c7fd0fa0b447c7 (diff)
downloadotp-a67d0be885529844b3c14102604a38f556e70ecc.tar.gz
otp-a67d0be885529844b3c14102604a38f556e70ecc.tar.bz2
otp-a67d0be885529844b3c14102604a38f556e70ecc.zip
Merge branch 'maint'
* maint: Improve the enqueue operation of delayed dealloc Implement delayed aux work wake up Conflicts: erts/emulator/beam/erl_alloc_util.c erts/emulator/beam/erl_process.h
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r--erts/emulator/beam/erl_alloc_util.c130
-rw-r--r--erts/emulator/beam/erl_process.c92
-rw-r--r--erts/emulator/beam/erl_process.h42
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.c113
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.h5
5 files changed, 268 insertions, 114 deletions
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index 9a011e2adc..97ba306a79 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -829,49 +829,83 @@ init_dd_queue(ErtsAllctrDDQueue_t *ddq)
ddq->head.used_marker = 1;
}
-static ERTS_INLINE erts_aint_t
-ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr)
+static ERTS_INLINE int
+ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit)
{
- erts_aint_t first_ilast, ilast, itmp;
- ErtsAllctrDDBlock_t *this = ptr;
+ erts_aint_t itmp;
+ ErtsAllctrDDBlock_t *enq, *this = ptr;
erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL);
-
/* Enqueue at end of list... */
- first_ilast = ilast = erts_atomic_read_nob(&ddq->tail.data.last);
- while (1) {
- ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast;
- itmp = erts_atomic_cmpxchg_mb(&last->atmc_next,
- (erts_aint_t) this,
- ERTS_AINT_NULL);
- if (itmp == ERTS_AINT_NULL)
- break;
- ilast = itmp;
+ enq = (ErtsAllctrDDBlock_t *) erts_atomic_read_nob(&ddq->tail.data.last);
+ itmp = erts_atomic_cmpxchg_relb(&enq->atmc_next,
+ (erts_aint_t) this,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ /* We are required to move last pointer */
+#ifdef DEBUG
+ ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->atmc_next));
+ ASSERT(((erts_aint_t) enq)
+ == erts_atomic_xchg_relb(&ddq->tail.data.last,
+ (erts_aint_t) this));
+#else
+ erts_atomic_set_relb(&ddq->tail.data.last, (erts_aint_t) this);
+#endif
+ return 1;
}
+ else {
+ /*
+ * We *need* to insert element somewhere in between the
+ * last element we read earlier and the actual last element.
+ */
+ int i = cinit;
- /* Move last pointer forward... */
- while (1) {
- if (erts_atomic_read_rb(&this->atmc_next) != ERTS_AINT_NULL) {
- ilast = erts_atomic_read_rb(&ddq->tail.data.last);
- if (first_ilast != ilast) {
- /* Someone else will move it forward */
- return ilast;
+ while (1) {
+ erts_aint_t itmp2;
+ erts_atomic_set_nob(&this->atmc_next, itmp);
+ itmp2 = erts_atomic_cmpxchg_relb(&enq->atmc_next,
+ (erts_aint_t) this,
+ itmp);
+ if (itmp == itmp2)
+ return 0; /* inserted this */
+ if ((i & 1) == 0)
+ itmp = itmp2;
+ else {
+ enq = (ErtsAllctrDDBlock_t *) itmp2;
+ itmp = erts_atomic_read_acqb(&enq->atmc_next);
+ ASSERT(itmp != ERTS_AINT_NULL);
}
+ i++;
}
- itmp = erts_atomic_cmpxchg_mb(&ddq->tail.data.last,
- (erts_aint_t) this,
- ilast);
- if (ilast == itmp)
- return (erts_aint_t) this;
- ilast = itmp;
}
}
+static ERTS_INLINE erts_aint_t
+check_insert_marker(ErtsAllctrDDQueue_t *ddq, erts_aint_t ilast)
+{
+ if (!ddq->head.used_marker
+ && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) {
+ erts_aint_t itmp;
+ ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast;
+
+ erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL);
+ itmp = erts_atomic_cmpxchg_relb(&last->atmc_next,
+ (erts_aint_t) &ddq->tail.data.marker,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ ilast = (erts_aint_t) &ddq->tail.data.marker;
+ ddq->head.used_marker = !0;
+ erts_atomic_set_relb(&ddq->tail.data.last, ilast);
+ }
+ }
+ return ilast;
+}
+
static ERTS_INLINE int
-ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr)
+ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit)
{
- erts_aint_t ilast;
+ int last_elem;
int um_refc_ix = 0;
int managed_thread = erts_thr_progress_is_managed_thread();
if (!managed_thread) {
@@ -887,11 +921,11 @@ ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr)
}
}
- ilast = ddq_managed_thread_enqueue(ddq, ptr);
+ last_elem = ddq_managed_thread_enqueue(ddq, ptr, cinit);
if (!managed_thread)
erts_atomic_dec_relb(&ddq->tail.data.um_refc[um_refc_ix]);
- return ilast == (erts_aint_t) ptr;
+ return last_elem;
}
static ERTS_INLINE void *
@@ -937,20 +971,16 @@ ddq_check_incoming(ErtsAllctrDDQueue_t *ddq)
int um_refc_ix;
ddq->head.next.thr_progress_reached = 1;
um_refc_ix = ddq->head.next.um_refc_ix;
- if (erts_atomic_read_acqb(&ddq->tail.data.um_refc[um_refc_ix]) == 0) {
+ if (erts_atomic_read_nob(&ddq->tail.data.um_refc[um_refc_ix]) == 0) {
/* Move unreferenced end pointer forward... */
+ ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore);
+
ddq->head.unref_end = ddq->head.next.unref_end;
- if (!ddq->head.used_marker
- && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) {
- ddq->head.used_marker = 1;
- ilast = ddq_managed_thread_enqueue(ddq, &ddq->tail.data.marker);
- }
+ ilast = check_insert_marker(ddq, ilast);
- if (ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast)
- ERTS_THR_MEMORY_BARRIER;
- else {
+ if (ddq->head.unref_end != (ErtsAllctrDDBlock_t *) ilast) {
ddq->head.next.unref_end = (ErtsAllctrDDBlock_t *) ilast;
ddq->head.next.thr_progress = erts_thr_progress_later(NULL);
erts_atomic32_set_relb(&ddq->tail.data.um_refc_ix,
@@ -1095,12 +1125,15 @@ handle_delayed_dealloc(Allctr_t *allctr,
}
static ERTS_INLINE void
-enqueue_dealloc_other_instance(ErtsAlcType_t type, Allctr_t *allctr, void *ptr)
+enqueue_dealloc_other_instance(ErtsAlcType_t type,
+ Allctr_t *allctr,
+ void *ptr,
+ int cinit)
{
if (allctr->fix)
((UWord *) ptr)[ERTS_ALCU_DD_FIX_TYPE_OFFS] = (UWord) type;
- if (ddq_enqueue(type, &allctr->dd.q, ptr))
+ if (ddq_enqueue(type, &allctr->dd.q, ptr, cinit))
erts_alloc_notify_delayed_dealloc(allctr->ix);
}
@@ -3616,7 +3649,11 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p)
get_pref_allctr(extra, &pref_allctr);
ptr = get_used_allctr(extra, p, &used_allctr, NULL);
if (pref_allctr != used_allctr)
- enqueue_dealloc_other_instance(type, used_allctr, ptr);
+ enqueue_dealloc_other_instance(type,
+ used_allctr,
+ ptr,
+ (used_allctr->dd.ix
+ - pref_allctr->dd.ix));
else {
if (used_allctr->thread_safe)
erts_mtx_lock(&used_allctr->mutex);
@@ -3991,7 +4028,11 @@ realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size,
sys_memcpy(res, p, cpy_size);
if (!force_move || used_allctr != pref_allctr)
- enqueue_dealloc_other_instance(type, used_allctr, ptr);
+ enqueue_dealloc_other_instance(type,
+ used_allctr,
+ ptr,
+ (used_allctr->dd.ix
+ - pref_allctr->dd.ix));
else {
do_erts_alcu_free(type, used_allctr, ptr);
ASSERT(pref_allctr == used_allctr);
@@ -4182,6 +4223,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
allctr->dd.use = 1;
init_dd_queue(&allctr->dd.q);
+ allctr->dd.ix = init->ix;
}
else
#endif
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index e62556ce72..afd2ddc69d 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -518,6 +518,7 @@ dbg_chk_aux_work_val(erts_aint32_t value)
valid |= ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN;
#endif
#ifdef ERTS_SMP
+ valid |= ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP;
valid |= ERTS_SSI_AUX_WORK_MISC_THR_PRGR;
valid |= ERTS_SSI_AUX_WORK_DD;
valid |= ERTS_SSI_AUX_WORK_DD_THR_PRGR;
@@ -1177,6 +1178,45 @@ haw_thr_prgr_current_check_progress(ErtsAuxWorkData *awdp)
}
}
+static ERTS_INLINE erts_aint32_t
+handle_delayed_aux_work_wakeup(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+ int jix, max_jix;
+ unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP);
+
+ ERTS_THR_MEMORY_BARRIER;
+
+ max_jix = awdp->delayed_wakeup.jix;
+ awdp->delayed_wakeup.jix = -1;
+ for (jix = 0; jix <= max_jix; jix++) {
+ int sched = awdp->delayed_wakeup.job[jix].sched;
+ erts_aint32_t aux_work = awdp->delayed_wakeup.job[jix].aux_work;
+
+ ASSERT(awdp->delayed_wakeup.sched2jix[sched] == jix);
+ awdp->delayed_wakeup.sched2jix[sched] = -1;
+ set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(sched-1),
+ aux_work);
+ }
+ return aux_work & ~ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP;
+}
+
+static ERTS_INLINE void
+schedule_aux_work_wakeup(ErtsAuxWorkData *awdp, int sched, erts_aint32_t aux_work)
+{
+ int jix = awdp->delayed_wakeup.sched2jix[sched];
+ if (jix >= 0) {
+ ASSERT(awdp->delayed_wakeup.job[jix].sched == sched);
+ awdp->delayed_wakeup.job[jix].aux_work |= aux_work;
+ }
+ else {
+ jix = ++awdp->delayed_wakeup.jix;
+ awdp->delayed_wakeup.sched2jix[sched] = jix;
+ awdp->delayed_wakeup.job[jix].sched = sched;
+ awdp->delayed_wakeup.job[jix].aux_work = aux_work;
+ }
+ set_aux_work_flags_wakeup_nob(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP);
+}
+
#endif
typedef struct erts_misc_aux_work_t_ erts_misc_aux_work_t;
@@ -1520,8 +1560,14 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
void
erts_alloc_notify_delayed_dealloc(int ix)
{
- set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1),
- ERTS_SSI_AUX_WORK_DD);
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (esdp)
+ schedule_aux_work_wakeup(&esdp->aux_work_data,
+ ix,
+ ERTS_SSI_AUX_WORK_DD);
+ else
+ set_aux_work_flags_wakeup_relb(ERTS_SCHED_SLEEP_INFO_IX(ix-1),
+ ERTS_SSI_AUX_WORK_DD);
}
static ERTS_INLINE erts_aint32_t
@@ -1819,6 +1865,8 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting)
* eachother. Most frequent first.
*/
#ifdef ERTS_SMP
+ HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP,
+ handle_delayed_aux_work_wakeup);
HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DD,
handle_delayed_dealloc);
/* DD must be before DD_THR_PRGR */
@@ -2305,7 +2353,7 @@ thr_prgr_fin_wait(void *vssi)
| ERTS_SSI_FLG_TSE_SLEEPING));
}
-static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp);
+static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp);
static void *
aux_thread(void *unused)
@@ -2325,7 +2373,7 @@ aux_thread(void *unused)
callbacks.finalize_wait = thr_prgr_fin_wait;
erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
- init_aux_work_data(awdp, NULL);
+ init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
sched_prep_spin_wait(ssi);
@@ -4538,7 +4586,7 @@ erts_sched_set_busy_wait_threshold(char *str)
return 0;
}
static void
-init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp)
+init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp)
{
awdp->sched_id = esdp ? (int) esdp->no : 0;
awdp->esdp = esdp;
@@ -4556,12 +4604,32 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp)
#endif
awdp->async_ready.queue = NULL;
#endif
+#ifdef ERTS_SMP
+ if (!dawwp) {
+ awdp->delayed_wakeup.job = NULL;
+ awdp->delayed_wakeup.sched2jix = NULL;
+ awdp->delayed_wakeup.jix = -1;
+ }
+ else {
+ int i;
+ awdp->delayed_wakeup.job = (ErtsDelayedAuxWorkWakeupJob *) dawwp;
+ dawwp += sizeof(ErtsDelayedAuxWorkWakeupJob)*(erts_no_schedulers+1);
+ awdp->delayed_wakeup.sched2jix = (int *) dawwp;
+ awdp->delayed_wakeup.jix = -1;
+ for (i = 0; i <= erts_no_schedulers; i++)
+ awdp->delayed_wakeup.sched2jix[i] = -1;
+ }
+#endif
}
void
erts_init_scheduling(int no_schedulers, int no_schedulers_online)
{
int ix, n, no_ssi;
+ char *daww_ptr;
+#ifdef ERTS_SMP
+ size_t daww_sz;
+#endif
init_misc_op_list_alloc();
@@ -4684,6 +4752,15 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
/* Create and initialize scheduler specific data */
+#ifdef ERTS_SMP
+ daww_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE((sizeof(ErtsDelayedAuxWorkWakeupJob)
+ + sizeof(int))*(n+1));
+ daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA,
+ daww_sz*n);
+#else
+ daww_ptr = NULL;
+#endif
+
erts_aligned_scheduler_data =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA,
n*sizeof(ErtsAlignedSchedulerData));
@@ -4718,7 +4795,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
esdp->run_queue = ERTS_RUNQ_IX(ix);
esdp->run_queue->scheduler = esdp;
- init_aux_work_data(&esdp->aux_work_data, esdp);
+ init_aux_work_data(&esdp->aux_work_data, esdp, daww_ptr);
+#ifdef ERTS_SMP
+ daww_ptr += daww_sz;
+#endif
init_sched_wall_time(&esdp->sched_wall_time);
}
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 93e71681da..b8b137f413 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -270,20 +270,21 @@ typedef enum {
* eachother. Most frequent - lowest bit number.
*/
-#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 0)
-#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 1)
-#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 2)
-#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 3)
-#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 4)
-#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 5)
-#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 6)
-#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 7)
-#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 8)
-#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 9)
-#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 10)
-#define ERTS_SSI_AUX_WORK_CODE_IX_ACTIVATION (((erts_aint32_t) 1) << 11)
-#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 12)
-#define ERTS_SSI_AUX_WORK_FINISH_BP (((erts_aint32_t) 1) << 13)
+#define ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP (((erts_aint32_t) 1) << 0)
+#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 1)
+#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 2)
+#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 3)
+#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 4)
+#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 5)
+#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 6)
+#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 7)
+#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 8)
+#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 9)
+#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 10)
+#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 11)
+#define ERTS_SSI_AUX_WORK_CODE_IX_ACTIVATION (((erts_aint32_t) 1) << 12)
+#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 13)
+#define ERTS_SSI_AUX_WORK_FINISH_BP (((erts_aint32_t) 1) << 14)
typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo;
@@ -439,6 +440,11 @@ typedef struct {
} ErtsSchedWallTime;
typedef struct {
+ int sched;
+ erts_aint32_t aux_work;
+} ErtsDelayedAuxWorkWakeupJob;
+
+typedef struct {
int sched_id;
ErtsSchedulerData *esdp;
ErtsSchedulerSleepInfo *ssi;
@@ -472,12 +478,15 @@ typedef struct {
Process* code_stager;
ErtsThrPrgrVal thr_prgr;
} code_ix_activation;
-#endif
-#ifdef ERTS_SMP
struct {
Process* stager;
ErtsThrPrgrVal thr_prgr;
} bp_ix_activation;
+ struct {
+ int *sched2jix;
+ int jix;
+ ErtsDelayedAuxWorkWakeupJob *job;
+ } delayed_wakeup;
#endif
} ErtsAuxWorkData;
@@ -512,7 +521,6 @@ struct ErtsSchedulerData_ {
int virtual_reds;
int cpu_id; /* >= 0 when bound */
ErtsAuxWorkData aux_work_data;
-
ErtsAtomCacheMap atom_cache_map;
ErtsSchedAllocData alloc_data;
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
index 37b186abd9..a490aec734 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.c
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
@@ -116,54 +116,84 @@ erts_sspa_create(size_t blk_sz, int pa_size)
return data;
}
-static ERTS_INLINE erts_aint_t
+static ERTS_INLINE void
enqueue_remote_managed_thread(erts_sspa_chunk_header_t *chdr,
erts_sspa_blk_t *this,
- int want_last)
+ int cinit)
{
- erts_aint_t ilast, itmp;
+ erts_aint_t itmp;
+ erts_sspa_blk_t *enq;
erts_atomic_init_nob(&this->next_atmc, ERTS_AINT_NULL);
-
/* Enqueue at end of list... */
- ilast = erts_atomic_read_nob(&chdr->tail.data.last);
- while (1) {
- erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast;
- itmp = erts_atomic_cmpxchg_mb(&last->next_atmc,
- (erts_aint_t) this,
- ERTS_AINT_NULL);
- if (itmp == ERTS_AINT_NULL)
- break;
- ilast = itmp;
+ enq = (erts_sspa_blk_t *) erts_atomic_read_nob(&chdr->tail.data.last);
+ itmp = erts_atomic_cmpxchg_relb(&enq->next_atmc,
+ (erts_aint_t) this,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ /* We are required to move last pointer */
+#ifdef DEBUG
+ ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->next_atmc));
+ ASSERT(((erts_aint_t) enq)
+ == erts_atomic_xchg_relb(&chdr->tail.data.last,
+ (erts_aint_t) this));
+#else
+ erts_atomic_set_relb(&chdr->tail.data.last, (erts_aint_t) this);
+#endif
}
+ else {
+ /*
+ * We *need* to insert element somewhere in between the
+ * last element we read earlier and the actual last element.
+ */
+ int i = cinit;
- /* Move last pointer forward... */
- while (1) {
- erts_aint_t itmp;
- if (want_last) {
- if (erts_atomic_read_rb(&this->next_atmc) != ERTS_AINT_NULL) {
- /* Someone else will move it forward */
- return erts_atomic_read_nob(&chdr->tail.data.last);
+ while (1) {
+ erts_aint_t itmp2;
+ erts_atomic_set_nob(&this->next_atmc, itmp);
+ itmp2 = erts_atomic_cmpxchg_relb(&enq->next_atmc,
+ (erts_aint_t) this,
+ itmp);
+ if (itmp == itmp2)
+ break; /* inserted this */
+ if ((i & 1) == 0)
+ itmp = itmp2;
+ else {
+ enq = (erts_sspa_blk_t *) itmp;
+ itmp = erts_atomic_read_acqb(&enq->next_atmc);
+ ASSERT(itmp != ERTS_AINT_NULL);
}
+ i++;
}
- else {
- if (erts_atomic_read_nob(&this->next_atmc) != ERTS_AINT_NULL) {
- /* Someone else will move it forward */
- return ERTS_AINT_NULL;
- }
+ }
+}
+
+static ERTS_INLINE erts_aint_t
+check_insert_marker(erts_sspa_chunk_header_t *chdr, erts_aint_t ilast)
+{
+ if (!chdr->head.used_marker
+ && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) {
+ erts_aint_t itmp;
+ erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast;
+
+ erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL);
+ itmp = erts_atomic_cmpxchg_relb(&last->next_atmc,
+ (erts_aint_t) &chdr->tail.data.marker,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ ilast = (erts_aint_t) &chdr->tail.data.marker;
+ chdr->head.used_marker = !0;
+ erts_atomic_set_relb(&chdr->tail.data.last, ilast);
}
- itmp = erts_atomic_cmpxchg_mb(&chdr->tail.data.last,
- (erts_aint_t) this,
- ilast);
- if (ilast == itmp)
- return want_last ? (erts_aint_t) this : ERTS_AINT_NULL;
- ilast = itmp;
}
+ return ilast;
}
void
-erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk)
+erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
+ erts_sspa_blk_t *blk,
+ int cinit)
{
int um_refc_ix = 0;
int managed_thread = erts_thr_progress_is_managed_thread();
@@ -180,7 +210,7 @@ erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk)
}
}
- (void) enqueue_remote_managed_thread(chdr, blk, 0);
+ enqueue_remote_managed_thread(chdr, blk, cinit);
if (!managed_thread)
erts_atomic_dec_relb(&chdr->tail.data.um_refc[um_refc_ix]);
@@ -208,24 +238,17 @@ fetch_remote(erts_sspa_chunk_header_t *chdr, int max)
int um_refc_ix;
chdr->head.next.thr_progress_reached = 1;
um_refc_ix = chdr->head.next.um_refc_ix;
- if (erts_atomic_read_acqb(&chdr->tail.data.um_refc[um_refc_ix]) == 0) {
+ if (erts_atomic_read_nob(&chdr->tail.data.um_refc[um_refc_ix]) == 0) {
+
+ ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore);
/* Move unreferenced end pointer forward... */
chdr->head.unref_end = chdr->head.next.unref_end;
- if (!chdr->head.used_marker
- && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) {
- /* Need to equeue marker */
- chdr->head.used_marker = 1;
- ilast = enqueue_remote_managed_thread(chdr,
- &chdr->tail.data.marker,
- 1);
- }
+ ilast = check_insert_marker(chdr, ilast);
- if (chdr->head.unref_end == (erts_sspa_blk_t *) ilast)
- ERTS_THR_MEMORY_BARRIER;
- else {
+ if (chdr->head.unref_end != (erts_sspa_blk_t *) ilast) {
chdr->head.next.unref_end = (erts_sspa_blk_t *) ilast;
chdr->head.next.thr_progress = erts_thr_progress_later(NULL);
erts_atomic32_set_relb(&chdr->tail.data.um_refc_ix,
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
index d36066c399..bccb1aba7a 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -142,7 +142,8 @@ check_local_list(erts_sspa_chunk_header_t *chdr)
erts_sspa_data_t *erts_sspa_create(size_t blk_sz,
int pa_size);
void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
- erts_sspa_blk_t *blk);
+ erts_sspa_blk_t *blk,
+ int cinit);
erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr,
erts_sspa_blk_t *old_res);
@@ -216,7 +217,7 @@ erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk)
chdr = &chnk->aligned.header;
if (chnk_cix != cix) {
/* Remote chunk */
- erts_sspa_remote_free(chdr, blk);
+ erts_sspa_remote_free(chdr, blk, chnk_cix - cix);
}
else {
/* Local chunk */