aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/Makefile.in5
-rw-r--r--erts/emulator/beam/dtrace-wrapper.h2
-rw-r--r--erts/emulator/beam/erl_alloc.types1
-rw-r--r--erts/emulator/beam/erl_alloc_util.c129
-rw-r--r--erts/emulator/beam/erl_lock_check.c3
-rw-r--r--erts/emulator/beam/erl_message.c4
-rw-r--r--erts/emulator/beam/erl_process.c92
-rw-r--r--erts/emulator/beam/erl_process.h38
-rw-r--r--erts/emulator/beam/erl_process_lock.c168
-rw-r--r--erts/emulator/beam/erl_process_lock.h4
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.c113
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.h5
-rw-r--r--erts/emulator/drivers/common/efile_drv.c2
-rw-r--r--erts/emulator/drivers/unix/unix_efile.c8
-rw-r--r--erts/emulator/hipe/hipe_arm.c89
-rw-r--r--erts/emulator/hipe/hipe_arm_bifs.m418
16 files changed, 353 insertions, 328 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 04069b517a..985ef72517 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -924,6 +924,11 @@ $(TTF_DIR)/hipe_arm_bifs.S: hipe/hipe_arm_bifs.m4 hipe/hipe_arm_asm.m4 \
$(OBJDIR)/hipe_arm_bifs.o: $(TTF_DIR)/hipe_arm_bifs.S \
$(TTF_DIR)/hipe_literals.h
+# Use -fomit-frame-pointer to work around gcc (v4.5.2) bug causing
+# "error: r7 cannot be used in asm here" for DEBUG build.
+$(OBJDIR)/hipe_arm.o: hipe/hipe_arm.c
+ $(CC) $(subst O2,O3, $(CFLAGS)) -fomit-frame-pointer $(INCLUDES) -c $< -o $@
+
# end of HiPE section
########################################
diff --git a/erts/emulator/beam/dtrace-wrapper.h b/erts/emulator/beam/dtrace-wrapper.h
index 1aeb7f9221..6ec0c91e21 100644
--- a/erts/emulator/beam/dtrace-wrapper.h
+++ b/erts/emulator/beam/dtrace-wrapper.h
@@ -42,6 +42,8 @@
#define DTRACE_CHARBUF(name, size) \
char name##_BUFFER[size], *name = name##_BUFFER
+#define DTRACE_CHARBUF_NAME(name) name##_BUFFER
+
#if defined(USE_DYNAMIC_TRACE) && defined(USE_VM_PROBES)
#include "erlang_dtrace.h"
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index d4ef9cc553..4aa8fa82fb 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -291,7 +291,6 @@ type PORT_LOCK STANDARD SYSTEM port_lock
type DRIVER_LOCK STANDARD SYSTEM driver_lock
type XPORTS_LIST SHORT_LIVED SYSTEM extra_port_list
type PROC_LCK_WTR LONG_LIVED SYSTEM proc_lock_waiter
-type PROC_LCK_QS LONG_LIVED SYSTEM proc_lock_queues
type RUNQ_BLNS LONG_LIVED SYSTEM run_queue_balancing
type THR_PRGR_IDATA LONG_LIVED SYSTEM thr_prgr_internal_data
type THR_PRGR_DATA LONG_LIVED SYSTEM thr_prgr_data
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index a0abd1c405..97ba306a79 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -829,46 +829,83 @@ init_dd_queue(ErtsAllctrDDQueue_t *ddq)
ddq->head.used_marker = 1;
}
-static ERTS_INLINE erts_aint_t
-ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr)
+static ERTS_INLINE int
+ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit)
{
- erts_aint_t ilast, itmp;
- ErtsAllctrDDBlock_t *this = ptr;
+ erts_aint_t itmp;
+ ErtsAllctrDDBlock_t *enq, *this = ptr;
erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL);
-
/* Enqueue at end of list... */
- ilast = erts_atomic_read_nob(&ddq->tail.data.last);
- while (1) {
- ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast;
- itmp = erts_atomic_cmpxchg_mb(&last->atmc_next,
- (erts_aint_t) this,
- ERTS_AINT_NULL);
- if (itmp == ERTS_AINT_NULL)
- break;
- ilast = itmp;
+ enq = (ErtsAllctrDDBlock_t *) erts_atomic_read_nob(&ddq->tail.data.last);
+ itmp = erts_atomic_cmpxchg_relb(&enq->atmc_next,
+ (erts_aint_t) this,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ /* We are required to move last pointer */
+#ifdef DEBUG
+ ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->atmc_next));
+ ASSERT(((erts_aint_t) enq)
+ == erts_atomic_xchg_relb(&ddq->tail.data.last,
+ (erts_aint_t) this));
+#else
+ erts_atomic_set_relb(&ddq->tail.data.last, (erts_aint_t) this);
+#endif
+ return 1;
}
+ else {
+ /*
+ * We *need* to insert element somewhere in between the
+ * last element we read earlier and the actual last element.
+ */
+ int i = cinit;
- /* Move last pointer forward... */
- while (1) {
- if (erts_atomic_read_rb(&this->atmc_next) != ERTS_AINT_NULL) {
- /* Someone else will move it forward */
- return erts_atomic_read_rb(&ddq->tail.data.last);
+ while (1) {
+ erts_aint_t itmp2;
+ erts_atomic_set_nob(&this->atmc_next, itmp);
+ itmp2 = erts_atomic_cmpxchg_relb(&enq->atmc_next,
+ (erts_aint_t) this,
+ itmp);
+ if (itmp == itmp2)
+ return 0; /* inserted this */
+ if ((i & 1) == 0)
+ itmp = itmp2;
+ else {
+ enq = (ErtsAllctrDDBlock_t *) itmp2;
+ itmp = erts_atomic_read_acqb(&enq->atmc_next);
+ ASSERT(itmp != ERTS_AINT_NULL);
+ }
+ i++;
}
- itmp = erts_atomic_cmpxchg_mb(&ddq->tail.data.last,
- (erts_aint_t) this,
- ilast);
- if (ilast == itmp)
- return (erts_aint_t) this;
- ilast = itmp;
}
}
+static ERTS_INLINE erts_aint_t
+check_insert_marker(ErtsAllctrDDQueue_t *ddq, erts_aint_t ilast)
+{
+ if (!ddq->head.used_marker
+ && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) {
+ erts_aint_t itmp;
+ ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast;
+
+ erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL);
+ itmp = erts_atomic_cmpxchg_relb(&last->atmc_next,
+ (erts_aint_t) &ddq->tail.data.marker,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ ilast = (erts_aint_t) &ddq->tail.data.marker;
+ ddq->head.used_marker = !0;
+ erts_atomic_set_relb(&ddq->tail.data.last, ilast);
+ }
+ }
+ return ilast;
+}
+
static ERTS_INLINE int
-ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr)
+ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit)
{
- erts_aint_t ilast;
+ int last_elem;
int um_refc_ix = 0;
int managed_thread = erts_thr_progress_is_managed_thread();
if (!managed_thread) {
@@ -884,11 +921,11 @@ ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr)
}
}
- ilast = ddq_managed_thread_enqueue(ddq, ptr);
+ last_elem = ddq_managed_thread_enqueue(ddq, ptr, cinit);
if (!managed_thread)
erts_atomic_dec_relb(&ddq->tail.data.um_refc[um_refc_ix]);
- return ilast == (erts_aint_t) ptr;
+ return last_elem;
}
static ERTS_INLINE void *
@@ -934,20 +971,16 @@ ddq_check_incoming(ErtsAllctrDDQueue_t *ddq)
int um_refc_ix;
ddq->head.next.thr_progress_reached = 1;
um_refc_ix = ddq->head.next.um_refc_ix;
- if (erts_atomic_read_acqb(&ddq->tail.data.um_refc[um_refc_ix]) == 0) {
+ if (erts_atomic_read_nob(&ddq->tail.data.um_refc[um_refc_ix]) == 0) {
/* Move unreferenced end pointer forward... */
+ ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore);
+
ddq->head.unref_end = ddq->head.next.unref_end;
- if (!ddq->head.used_marker
- && ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) {
- ddq->head.used_marker = 1;
- ilast = ddq_managed_thread_enqueue(ddq, &ddq->tail.data.marker);
- }
+ ilast = check_insert_marker(ddq, ilast);
- if (ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast)
- ERTS_THR_MEMORY_BARRIER;
- else {
+ if (ddq->head.unref_end != (ErtsAllctrDDBlock_t *) ilast) {
ddq->head.next.unref_end = (ErtsAllctrDDBlock_t *) ilast;
ddq->head.next.thr_progress = erts_thr_progress_later(NULL);
erts_atomic32_set_relb(&ddq->tail.data.um_refc_ix,
@@ -1092,12 +1125,15 @@ handle_delayed_dealloc(Allctr_t *allctr,
}
static ERTS_INLINE void
-enqueue_dealloc_other_instance(ErtsAlcType_t type, Allctr_t *allctr, void *ptr)
+enqueue_dealloc_other_instance(ErtsAlcType_t type,
+ Allctr_t *allctr,
+ void *ptr,
+ int cinit)
{
if (allctr->fix)
((UWord *) ptr)[ERTS_ALCU_DD_FIX_TYPE_OFFS] = (UWord) type;
- if (ddq_enqueue(type, &allctr->dd.q, ptr))
+ if (ddq_enqueue(type, &allctr->dd.q, ptr, cinit))
erts_alloc_notify_delayed_dealloc(allctr->ix);
}
@@ -3613,7 +3649,11 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p)
get_pref_allctr(extra, &pref_allctr);
ptr = get_used_allctr(extra, p, &used_allctr, NULL);
if (pref_allctr != used_allctr)
- enqueue_dealloc_other_instance(type, used_allctr, ptr);
+ enqueue_dealloc_other_instance(type,
+ used_allctr,
+ ptr,
+ (used_allctr->dd.ix
+ - pref_allctr->dd.ix));
else {
if (used_allctr->thread_safe)
erts_mtx_lock(&used_allctr->mutex);
@@ -3988,7 +4028,11 @@ realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size,
sys_memcpy(res, p, cpy_size);
if (!force_move || used_allctr != pref_allctr)
- enqueue_dealloc_other_instance(type, used_allctr, ptr);
+ enqueue_dealloc_other_instance(type,
+ used_allctr,
+ ptr,
+ (used_allctr->dd.ix
+ - pref_allctr->dd.ix));
else {
do_erts_alcu_free(type, used_allctr, ptr);
ASSERT(pref_allctr == used_allctr);
@@ -4179,6 +4223,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
allctr->dd.use = 1;
init_dd_queue(&allctr->dd.q);
+ allctr->dd.ix = init->ix;
}
else
#endif
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index a0f744be9d..b545ec07c0 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -175,9 +175,6 @@ static erts_lc_lock_order_t erts_lock_order[] = {
{ "sched_stat", NULL },
#endif
{ "async_init_mtx", NULL },
-#ifdef ERTS_SMP
- { "proc_lck_qs_alloc", NULL },
-#endif
#ifdef __WIN32__
#ifdef DEBUG
{ "save_ops_lock", NULL },
diff --git a/erts/emulator/beam/erl_message.c b/erts/emulator/beam/erl_message.c
index 499a2aac53..919567ab27 100644
--- a/erts/emulator/beam/erl_message.c
+++ b/erts/emulator/beam/erl_message.c
@@ -902,8 +902,8 @@ erts_send_message(Process* sender,
#ifdef USE_VM_PROBES
*sender_name = *receiver_name = '\0';
if (DTRACE_ENABLED(message_send)) {
- erts_snprintf(sender_name, sizeof(sender_name), "%T", sender->id);
- erts_snprintf(receiver_name, sizeof(receiver_name), "%T", receiver->id);
+ erts_snprintf(sender_name, sizeof(DTRACE_CHARBUF_NAME(sender_name)), "%T", sender->id);
+ erts_snprintf(receiver_name, sizeof(DTRACE_CHARBUF_NAME(receiver_name)), "%T", receiver->id);
}
#endif
if (SEQ_TRACE_TOKEN(sender) != NIL && !(flags & ERTS_SND_FLG_NO_SEQ_TRACE)) {
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index d2fa111b80..0fa2def5af 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -359,6 +359,7 @@ dbg_chk_aux_work_val(erts_aint32_t value)
valid |= ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN;
#endif
#ifdef ERTS_SMP
+ valid |= ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP;
valid |= ERTS_SSI_AUX_WORK_MISC_THR_PRGR;
valid |= ERTS_SSI_AUX_WORK_DD;
valid |= ERTS_SSI_AUX_WORK_DD_THR_PRGR;
@@ -930,6 +931,45 @@ haw_thr_prgr_current_check_progress(ErtsAuxWorkData *awdp)
}
}
+static ERTS_INLINE erts_aint32_t
+handle_delayed_aux_work_wakeup(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+ int jix, max_jix;
+ unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP);
+
+ ERTS_THR_MEMORY_BARRIER;
+
+ max_jix = awdp->delayed_wakeup.jix;
+ awdp->delayed_wakeup.jix = -1;
+ for (jix = 0; jix <= max_jix; jix++) {
+ int sched = awdp->delayed_wakeup.job[jix].sched;
+ erts_aint32_t aux_work = awdp->delayed_wakeup.job[jix].aux_work;
+
+ ASSERT(awdp->delayed_wakeup.sched2jix[sched] == jix);
+ awdp->delayed_wakeup.sched2jix[sched] = -1;
+ set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(sched-1),
+ aux_work);
+ }
+ return aux_work & ~ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP;
+}
+
+static ERTS_INLINE void
+schedule_aux_work_wakeup(ErtsAuxWorkData *awdp, int sched, erts_aint32_t aux_work)
+{
+ int jix = awdp->delayed_wakeup.sched2jix[sched];
+ if (jix >= 0) {
+ ASSERT(awdp->delayed_wakeup.job[jix].sched == sched);
+ awdp->delayed_wakeup.job[jix].aux_work |= aux_work;
+ }
+ else {
+ jix = ++awdp->delayed_wakeup.jix;
+ awdp->delayed_wakeup.sched2jix[sched] = jix;
+ awdp->delayed_wakeup.job[jix].sched = sched;
+ awdp->delayed_wakeup.job[jix].aux_work = aux_work;
+ }
+ set_aux_work_flags_wakeup_nob(awdp->ssi, ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP);
+}
+
#endif
typedef struct erts_misc_aux_work_t_ erts_misc_aux_work_t;
@@ -1186,8 +1226,14 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
void
erts_alloc_notify_delayed_dealloc(int ix)
{
- set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1),
- ERTS_SSI_AUX_WORK_DD);
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (esdp)
+ schedule_aux_work_wakeup(&esdp->aux_work_data,
+ ix,
+ ERTS_SSI_AUX_WORK_DD);
+ else
+ set_aux_work_flags_wakeup_relb(ERTS_SCHED_SLEEP_INFO_IX(ix-1),
+ ERTS_SSI_AUX_WORK_DD);
}
static ERTS_INLINE erts_aint32_t
@@ -1485,6 +1531,8 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting)
* eachother. Most frequent first.
*/
#ifdef ERTS_SMP
+ HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP,
+ handle_delayed_aux_work_wakeup);
HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DD,
handle_delayed_dealloc);
/* DD must be before DD_THR_PRGR */
@@ -1963,7 +2011,7 @@ thr_prgr_fin_wait(void *vssi)
| ERTS_SSI_FLG_TSE_SLEEPING));
}
-static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp);
+static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp);
static void *
aux_thread(void *unused)
@@ -1983,7 +2031,7 @@ aux_thread(void *unused)
callbacks.finalize_wait = thr_prgr_fin_wait;
erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
- init_aux_work_data(awdp, NULL);
+ init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
sched_prep_spin_wait(ssi);
@@ -3850,7 +3898,7 @@ erts_sched_set_busy_wait_threshold(char *str)
return 0;
}
static void
-init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp)
+init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp)
{
awdp->sched_id = esdp ? (int) esdp->no : 0;
awdp->esdp = esdp;
@@ -3868,12 +3916,32 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp)
#endif
awdp->async_ready.queue = NULL;
#endif
+#ifdef ERTS_SMP
+ if (!dawwp) {
+ awdp->delayed_wakeup.job = NULL;
+ awdp->delayed_wakeup.sched2jix = NULL;
+ awdp->delayed_wakeup.jix = -1;
+ }
+ else {
+ int i;
+ awdp->delayed_wakeup.job = (ErtsDelayedAuxWorkWakeupJob *) dawwp;
+ dawwp += sizeof(ErtsDelayedAuxWorkWakeupJob)*(erts_no_schedulers+1);
+ awdp->delayed_wakeup.sched2jix = (int *) dawwp;
+ awdp->delayed_wakeup.jix = -1;
+ for (i = 0; i <= erts_no_schedulers; i++)
+ awdp->delayed_wakeup.sched2jix[i] = -1;
+ }
+#endif
}
void
erts_init_scheduling(int no_schedulers, int no_schedulers_online)
{
int ix, n, no_ssi;
+ char *daww_ptr;
+#ifdef ERTS_SMP
+ size_t daww_sz;
+#endif
init_misc_op_list_alloc();
@@ -4006,6 +4074,15 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
/* Create and initialize scheduler specific data */
+#ifdef ERTS_SMP
+ daww_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE((sizeof(ErtsDelayedAuxWorkWakeupJob)
+ + sizeof(int))*(n+1));
+ daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA,
+ daww_sz*n);
+#else
+ daww_ptr = NULL;
+#endif
+
erts_aligned_scheduler_data =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA,
n*sizeof(ErtsAlignedSchedulerData));
@@ -4040,7 +4117,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
esdp->run_queue = ERTS_RUNQ_IX(ix);
esdp->run_queue->scheduler = esdp;
- init_aux_work_data(&esdp->aux_work_data, esdp);
+ init_aux_work_data(&esdp->aux_work_data, esdp, daww_ptr);
+#ifdef ERTS_SMP
+ daww_ptr += daww_sz;
+#endif
init_sched_wall_time(&esdp->sched_wall_time);
}
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 5b79c40d93..9e7a5a5c74 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -253,18 +253,19 @@ typedef enum {
* eachother. Most frequent - lowest bit number.
*/
-#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 0)
-#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 1)
-#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 2)
-#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 3)
-#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 4)
-#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 5)
-#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 6)
-#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 7)
-#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 8)
-#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 9)
-#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 10)
-#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 11)
+#define ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP (((erts_aint32_t) 1) << 0)
+#define ERTS_SSI_AUX_WORK_DD (((erts_aint32_t) 1) << 1)
+#define ERTS_SSI_AUX_WORK_DD_THR_PRGR (((erts_aint32_t) 1) << 2)
+#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC (((erts_aint32_t) 1) << 3)
+#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM (((erts_aint32_t) 1) << 4)
+#define ERTS_SSI_AUX_WORK_ASYNC_READY (((erts_aint32_t) 1) << 5)
+#define ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN (((erts_aint32_t) 1) << 6)
+#define ERTS_SSI_AUX_WORK_MISC_THR_PRGR (((erts_aint32_t) 1) << 7)
+#define ERTS_SSI_AUX_WORK_MISC (((erts_aint32_t) 1) << 8)
+#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN (((erts_aint32_t) 1) << 9)
+#define ERTS_SSI_AUX_WORK_SET_TMO (((erts_aint32_t) 1) << 10)
+#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK (((erts_aint32_t) 1) << 11)
+#define ERTS_SSI_AUX_WORK_REAP_PORTS (((erts_aint32_t) 1) << 12)
typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo;
@@ -403,6 +404,11 @@ typedef struct {
} ErtsSchedWallTime;
typedef struct {
+ int sched;
+ erts_aint32_t aux_work;
+} ErtsDelayedAuxWorkWakeupJob;
+
+typedef struct {
int sched_id;
ErtsSchedulerData *esdp;
ErtsSchedulerSleepInfo *ssi;
@@ -431,6 +437,13 @@ typedef struct {
void *queue;
} async_ready;
#endif
+#ifdef ERTS_SMP
+ struct {
+ int *sched2jix;
+ int jix;
+ ErtsDelayedAuxWorkWakeupJob *job;
+ } delayed_wakeup;
+#endif
} ErtsAuxWorkData;
struct ErtsSchedulerData_ {
@@ -464,7 +477,6 @@ struct ErtsSchedulerData_ {
int virtual_reds;
int cpu_id; /* >= 0 when bound */
ErtsAuxWorkData aux_work_data;
-
ErtsAtomCacheMap atom_cache_map;
ErtsSchedAllocData alloc_data;
diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c
index b3b4601a31..34d591df40 100644
--- a/erts/emulator/beam/erl_process_lock.c
+++ b/erts/emulator/beam/erl_process_lock.c
@@ -90,16 +90,6 @@ static void check_queue(erts_proc_lock_t *lck);
#error "The size of the 'uflgs' field of the erts_tse_t type is too small"
#endif
-struct erts_proc_lock_queues_t_ {
- erts_proc_lock_queues_t *next;
- erts_tse_t *queue[ERTS_PROC_LOCK_MAX_BIT+1];
-};
-
-static erts_proc_lock_queues_t zeroqs = {0};
-
-static erts_smp_spinlock_t qs_lock;
-static erts_proc_lock_queues_t *queue_free_list;
-
#ifdef ERTS_ENABLE_LOCK_CHECK
static struct {
Sint16 proc_lock_main;
@@ -120,7 +110,6 @@ void
erts_init_proc_lock(int cpus)
{
int i;
- erts_smp_spinlock_init(&qs_lock, "proc_lck_qs_alloc");
for (i = 0; i < ERTS_NO_OF_PIX_LOCKS; i++) {
#ifdef ERTS_ENABLE_LOCK_COUNT
erts_mtx_init_x(&erts_pix_locks[i].u.mtx,
@@ -129,7 +118,6 @@ erts_init_proc_lock(int cpus)
erts_mtx_init(&erts_pix_locks[i].u.mtx, "pix_lock");
#endif
}
- queue_free_list = NULL;
erts_thr_install_exit_handler(cleanup_tse);
#ifdef ERTS_ENABLE_LOCK_CHECK
lc_id.proc_lock_main = erts_lc_get_lock_order_id("proc_main");
@@ -156,16 +144,7 @@ erts_init_proc_lock(int cpus)
}
#ifdef ERTS_ENABLE_LOCK_CHECK
-static void
-check_unused_tse(erts_tse_t *wtr)
-{
- int i;
- erts_proc_lock_queues_t *queues = wtr->udata;
- ERTS_LC_ASSERT(wtr->uflgs == 0);
- for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++)
- ERTS_LC_ASSERT(!queues->queue[i]);
-}
-#define CHECK_UNUSED_TSE(W) check_unused_tse((W))
+#define CHECK_UNUSED_TSE(W) ERTS_LC_ASSERT((W)->uflgs == 0)
#else
#define CHECK_UNUSED_TSE(W)
#endif
@@ -174,56 +153,21 @@ static ERTS_INLINE erts_tse_t *
tse_fetch(erts_pix_lock_t *pix_lock)
{
erts_tse_t *tse = erts_tse_fetch();
- if (!tse->udata) {
- erts_proc_lock_queues_t *qs;
-#if ERTS_PROC_LOCK_SPINLOCK_IMPL && !ERTS_PROC_LOCK_ATOMIC_IMPL
- if (pix_lock)
- erts_pix_unlock(pix_lock);
-#endif
- erts_smp_spin_lock(&qs_lock);
- qs = queue_free_list;
- if (qs) {
- queue_free_list = queue_free_list->next;
- erts_smp_spin_unlock(&qs_lock);
- }
- else {
- erts_smp_spin_unlock(&qs_lock);
- qs = erts_alloc(ERTS_ALC_T_PROC_LCK_QS,
- sizeof(erts_proc_lock_queues_t));
- sys_memcpy((void *) qs,
- (void *) &zeroqs,
- sizeof(erts_proc_lock_queues_t));
- }
- tse->udata = qs;
-#if ERTS_PROC_LOCK_SPINLOCK_IMPL && !ERTS_PROC_LOCK_ATOMIC_IMPL
- if (pix_lock)
- erts_pix_lock(pix_lock);
-#endif
- }
tse->uflgs = 0;
return tse;
}
static ERTS_INLINE void
-tse_return(erts_tse_t *tse, int force_free_q)
+tse_return(erts_tse_t *tse)
{
CHECK_UNUSED_TSE(tse);
- if (force_free_q || erts_tse_is_tmp(tse)) {
- erts_proc_lock_queues_t *qs = tse->udata;
- ASSERT(qs);
- erts_smp_spin_lock(&qs_lock);
- qs->next = queue_free_list;
- queue_free_list = qs;
- erts_smp_spin_unlock(&qs_lock);
- tse->udata = NULL;
- }
erts_tse_return(tse);
}
void
erts_proc_lock_prepare_proc_lock_waiter(void)
{
- tse_return(tse_fetch(NULL), 0);
+ tse_return(tse_fetch(NULL));
}
@@ -231,55 +175,49 @@ static void
cleanup_tse(void)
{
erts_tse_t *tse = erts_tse_fetch();
- if (tse) {
- if (tse->udata)
- tse_return(tse, 1);
- else
- erts_tse_return(tse);
- }
+ if (tse)
+ erts_tse_return(tse);
}
/*
* Waiters are queued in a circular double linked list;
- * where qs->queue[lock_ix] is the first waiter in queue, and
- * qs->queue[lock_ix]->prev is the last waiter in queue.
+ * where lck->queue[lock_ix] is the first waiter in queue, and
+ * lck->queue[lock_ix]->prev is the last waiter in queue.
*/
static ERTS_INLINE void
-enqueue_waiter(erts_proc_lock_queues_t *qs,
- int ix,
- erts_tse_t *wtr)
+enqueue_waiter(erts_proc_lock_t *lck, int ix, erts_tse_t *wtr)
{
- if (!qs->queue[ix]) {
- qs->queue[ix] = wtr;
+ if (!lck->queue[ix]) {
+ lck->queue[ix] = wtr;
wtr->next = wtr;
wtr->prev = wtr;
}
else {
- ERTS_LC_ASSERT(qs->queue[ix]->next && qs->queue[ix]->prev);
- wtr->next = qs->queue[ix];
- wtr->prev = qs->queue[ix]->prev;
+ ERTS_LC_ASSERT(lck->queue[ix]->next && lck->queue[ix]->prev);
+ wtr->next = lck->queue[ix];
+ wtr->prev = lck->queue[ix]->prev;
wtr->prev->next = wtr;
- qs->queue[ix]->prev = wtr;
+ lck->queue[ix]->prev = wtr;
}
}
static erts_tse_t *
-dequeue_waiter(erts_proc_lock_queues_t *qs, int ix)
+dequeue_waiter(erts_proc_lock_t *lck, int ix)
{
- erts_tse_t *wtr = qs->queue[ix];
- ERTS_LC_ASSERT(qs->queue[ix]);
+ erts_tse_t *wtr = lck->queue[ix];
+ ERTS_LC_ASSERT(lck->queue[ix]);
if (wtr->next == wtr) {
- ERTS_LC_ASSERT(qs->queue[ix]->prev == wtr);
- qs->queue[ix] = NULL;
+ ERTS_LC_ASSERT(lck->queue[ix]->prev == wtr);
+ lck->queue[ix] = NULL;
}
else {
ERTS_LC_ASSERT(wtr->next != wtr);
ERTS_LC_ASSERT(wtr->prev != wtr);
wtr->next->prev = wtr->prev;
wtr->prev->next = wtr->next;
- qs->queue[ix] = wtr->next;
+ lck->queue[ix] = wtr->next;
}
return wtr;
}
@@ -300,19 +238,18 @@ try_aquire(erts_proc_lock_t *lck, erts_tse_t *wtr)
ErtsProcLocks locks = wtr->uflgs;
int lock_no;
- ERTS_LC_ASSERT(lck->queues);
ERTS_LC_ASSERT(got_locks != locks);
for (lock_no = 0; lock_no <= ERTS_PROC_LOCK_MAX_BIT; lock_no++) {
ErtsProcLocks lock = ((ErtsProcLocks) 1) << lock_no;
if (locks & lock) {
ErtsProcLocks wflg, old_lflgs;
- if (lck->queues->queue[lock_no]) {
+ if (lck->queue[lock_no]) {
/* Others already waiting */
enqueue:
ERTS_LC_ASSERT(ERTS_PROC_LOCK_FLGS_READ_(lck)
& (lock << ERTS_PROC_LOCK_WAITER_SHIFT));
- enqueue_waiter(lck->queues, lock_no, wtr);
+ enqueue_waiter(lck, lock_no, wtr);
break;
}
wflg = lock << ERTS_PROC_LOCK_WAITER_SHIFT;
@@ -364,7 +301,6 @@ transfer_locks(Process *p,
for (lock_no = 0; tlocks && lock_no <= ERTS_PROC_LOCK_MAX_BIT; lock_no++) {
ErtsProcLocks lock = ((ErtsProcLocks) 1) << lock_no;
if (tlocks & lock) {
- erts_proc_lock_queues_t *qs = p->lock.queues;
/* Transfer lock */
#ifdef ERTS_ENABLE_LOCK_CHECK
tlocks &= ~lock;
@@ -372,9 +308,9 @@ transfer_locks(Process *p,
ERTS_LC_ASSERT(ERTS_PROC_LOCK_FLGS_READ_(&p->lock)
& (lock << ERTS_PROC_LOCK_WAITER_SHIFT));
transferred++;
- wtr = dequeue_waiter(qs, lock_no);
+ wtr = dequeue_waiter(&p->lock, lock_no);
ERTS_LC_ASSERT(wtr);
- if (!qs->queue[lock_no])
+ if (!p->lock.queue[lock_no])
unset_waiter |= lock;
ERTS_LC_ASSERT(wtr->uflgs & lock);
wtr->uflgs &= ~lock;
@@ -463,7 +399,6 @@ wait_for_locks(Process *p,
{
erts_pix_lock_t *pix_lock = pixlck ? pixlck : ERTS_PID2PIXLOCK(p->id);
erts_tse_t *wtr;
- erts_proc_lock_queues_t *qs;
/* Acquire a waiter object on which this thread can wait. */
wtr = tse_fetch(pix_lock);
@@ -479,18 +414,6 @@ wait_for_locks(Process *p,
ERTS_LC_ASSERT(erts_lc_pix_lock_is_locked(pix_lock));
- qs = wtr->udata;
- ASSERT(qs);
- /* Provide the process with waiter queues, if it doesn't have one. */
- if (!p->lock.queues) {
- qs->next = NULL;
- p->lock.queues = qs;
- }
- else {
- qs->next = p->lock.queues->next;
- p->lock.queues->next = qs;
- }
-
#ifdef ERTS_PROC_LOCK_HARD_DEBUG
check_queue(&p->lock);
#endif
@@ -504,7 +427,9 @@ wait_for_locks(Process *p,
check_queue(&p->lock);
#endif
- if (wtr->uflgs) {
+ if (wtr->uflgs == 0)
+ erts_pix_unlock(pix_lock);
+ else {
/* We didn't get them all; need to wait... */
ASSERT((wtr->uflgs & ~ERTS_PROC_LOCKS_ALL) == 0);
@@ -529,28 +454,12 @@ wait_for_locks(Process *p,
} while (res != 0);
}
- erts_pix_lock(pix_lock);
-
ASSERT(wtr->uflgs == 0);
}
- /* Recover some queues to store in the waiter. */
- ERTS_LC_ASSERT(p->lock.queues);
- if (p->lock.queues->next) {
- qs = p->lock.queues->next;
- p->lock.queues->next = qs->next;
- }
- else {
- qs = p->lock.queues;
- p->lock.queues = NULL;
- }
- wtr->udata = qs;
-
- erts_pix_unlock(pix_lock);
-
ERTS_LC_ASSERT(locks == (ERTS_PROC_LOCK_FLGS_READ_(&p->lock) & locks));
- tse_return(wtr, 0);
+ tse_return(wtr);
}
/*
@@ -971,6 +880,7 @@ erts_pid2proc_safelock(Process *c_p,
void
erts_proc_lock_init(Process *p)
{
+ int i;
/* We always start with all locks locked */
#if ERTS_PROC_LOCK_ATOMIC_IMPL
erts_smp_atomic32_init_nob(&p->lock.flags,
@@ -978,7 +888,8 @@ erts_proc_lock_init(Process *p)
#else
p->lock.flags = ERTS_PROC_LOCKS_ALL;
#endif
- p->lock.queues = NULL;
+ for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++)
+ p->lock.queue[i] = NULL;
p->lock.refc = 1;
#ifdef ERTS_ENABLE_LOCK_COUNT
erts_lcnt_proc_lock_init(p);
@@ -990,11 +901,8 @@ erts_proc_lock_init(Process *p)
erts_proc_lc_trylock(p, ERTS_PROC_LOCKS_ALL, 1);
#endif
#ifdef ERTS_PROC_LOCK_DEBUG
- {
- int i;
- for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++)
- erts_smp_atomic32_init_nob(&p->lock.locked[i], (erts_aint32_t) 1);
- }
+ for (i = 0; i <= ERTS_PROC_LOCK_MAX_BIT; i++)
+ erts_smp_atomic32_init_nob(&p->lock.locked[i], (erts_aint32_t) 1);
#endif
}
@@ -1463,21 +1371,21 @@ check_queue(erts_proc_lock_t *lck)
if (lflgs & wtr) {
int n;
erts_tse_t *wtr;
- ERTS_LC_ASSERT(lck->queues && lck->queues->queue[lock_no]);
- wtr = lck->queues->queue[lock_no];
+ ERTS_LC_ASSERT(lck->queue[lock_no]);
+ wtr = lck->queue[lock_no];
n = 0;
do {
wtr = wtr->next;
n++;
- } while (wtr != lck->queues->queue[lock_no]);
+ } while (wtr != lck->queue[lock_no]);
do {
wtr = wtr->prev;
n--;
- } while (wtr != lck->queues->queue[lock_no]);
+ } while (wtr != lck->queue[lock_no]);
ERTS_LC_ASSERT(n == 0);
}
else {
- ERTS_LC_ASSERT(!lck->queues || !lck->queues->queue[lock_no]);
+ ERTS_LC_ASSERT(!lck->queue[lock_no]);
}
}
}
diff --git a/erts/emulator/beam/erl_process_lock.h b/erts/emulator/beam/erl_process_lock.h
index 413c45480c..290084d8ca 100644
--- a/erts/emulator/beam/erl_process_lock.h
+++ b/erts/emulator/beam/erl_process_lock.h
@@ -56,15 +56,13 @@
typedef erts_aint32_t ErtsProcLocks;
-typedef struct erts_proc_lock_queues_t_ erts_proc_lock_queues_t;
-
typedef struct erts_proc_lock_t_ {
#if ERTS_PROC_LOCK_ATOMIC_IMPL
erts_smp_atomic32_t flags;
#else
ErtsProcLocks flags;
#endif
- erts_proc_lock_queues_t *queues;
+ erts_tse_t *queue[ERTS_PROC_LOCK_MAX_BIT+1];
Sint32 refc;
#ifdef ERTS_PROC_LOCK_DEBUG
erts_smp_atomic32_t locked[ERTS_PROC_LOCK_MAX_BIT+1];
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
index 37b186abd9..a490aec734 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.c
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
@@ -116,54 +116,84 @@ erts_sspa_create(size_t blk_sz, int pa_size)
return data;
}
-static ERTS_INLINE erts_aint_t
+static ERTS_INLINE void
enqueue_remote_managed_thread(erts_sspa_chunk_header_t *chdr,
erts_sspa_blk_t *this,
- int want_last)
+ int cinit)
{
- erts_aint_t ilast, itmp;
+ erts_aint_t itmp;
+ erts_sspa_blk_t *enq;
erts_atomic_init_nob(&this->next_atmc, ERTS_AINT_NULL);
-
/* Enqueue at end of list... */
- ilast = erts_atomic_read_nob(&chdr->tail.data.last);
- while (1) {
- erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast;
- itmp = erts_atomic_cmpxchg_mb(&last->next_atmc,
- (erts_aint_t) this,
- ERTS_AINT_NULL);
- if (itmp == ERTS_AINT_NULL)
- break;
- ilast = itmp;
+ enq = (erts_sspa_blk_t *) erts_atomic_read_nob(&chdr->tail.data.last);
+ itmp = erts_atomic_cmpxchg_relb(&enq->next_atmc,
+ (erts_aint_t) this,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ /* We are required to move last pointer */
+#ifdef DEBUG
+ ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->next_atmc));
+ ASSERT(((erts_aint_t) enq)
+ == erts_atomic_xchg_relb(&chdr->tail.data.last,
+ (erts_aint_t) this));
+#else
+ erts_atomic_set_relb(&chdr->tail.data.last, (erts_aint_t) this);
+#endif
}
+ else {
+ /*
+ * We *need* to insert element somewhere in between the
+ * last element we read earlier and the actual last element.
+ */
+ int i = cinit;
- /* Move last pointer forward... */
- while (1) {
- erts_aint_t itmp;
- if (want_last) {
- if (erts_atomic_read_rb(&this->next_atmc) != ERTS_AINT_NULL) {
- /* Someone else will move it forward */
- return erts_atomic_read_nob(&chdr->tail.data.last);
+ while (1) {
+ erts_aint_t itmp2;
+ erts_atomic_set_nob(&this->next_atmc, itmp);
+ itmp2 = erts_atomic_cmpxchg_relb(&enq->next_atmc,
+ (erts_aint_t) this,
+ itmp);
+ if (itmp == itmp2)
+ break; /* inserted this */
+ if ((i & 1) == 0)
+ itmp = itmp2;
+ else {
+ enq = (erts_sspa_blk_t *) itmp;
+ itmp = erts_atomic_read_acqb(&enq->next_atmc);
+ ASSERT(itmp != ERTS_AINT_NULL);
}
+ i++;
}
- else {
- if (erts_atomic_read_nob(&this->next_atmc) != ERTS_AINT_NULL) {
- /* Someone else will move it forward */
- return ERTS_AINT_NULL;
- }
+ }
+}
+
+static ERTS_INLINE erts_aint_t
+check_insert_marker(erts_sspa_chunk_header_t *chdr, erts_aint_t ilast)
+{
+ if (!chdr->head.used_marker
+ && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) {
+ erts_aint_t itmp;
+ erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast;
+
+ erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL);
+ itmp = erts_atomic_cmpxchg_relb(&last->next_atmc,
+ (erts_aint_t) &chdr->tail.data.marker,
+ ERTS_AINT_NULL);
+ if (itmp == ERTS_AINT_NULL) {
+ ilast = (erts_aint_t) &chdr->tail.data.marker;
+ chdr->head.used_marker = !0;
+ erts_atomic_set_relb(&chdr->tail.data.last, ilast);
}
- itmp = erts_atomic_cmpxchg_mb(&chdr->tail.data.last,
- (erts_aint_t) this,
- ilast);
- if (ilast == itmp)
- return want_last ? (erts_aint_t) this : ERTS_AINT_NULL;
- ilast = itmp;
}
+ return ilast;
}
void
-erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk)
+erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
+ erts_sspa_blk_t *blk,
+ int cinit)
{
int um_refc_ix = 0;
int managed_thread = erts_thr_progress_is_managed_thread();
@@ -180,7 +210,7 @@ erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk)
}
}
- (void) enqueue_remote_managed_thread(chdr, blk, 0);
+ enqueue_remote_managed_thread(chdr, blk, cinit);
if (!managed_thread)
erts_atomic_dec_relb(&chdr->tail.data.um_refc[um_refc_ix]);
@@ -208,24 +238,17 @@ fetch_remote(erts_sspa_chunk_header_t *chdr, int max)
int um_refc_ix;
chdr->head.next.thr_progress_reached = 1;
um_refc_ix = chdr->head.next.um_refc_ix;
- if (erts_atomic_read_acqb(&chdr->tail.data.um_refc[um_refc_ix]) == 0) {
+ if (erts_atomic_read_nob(&chdr->tail.data.um_refc[um_refc_ix]) == 0) {
+
+ ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore);
/* Move unreferenced end pointer forward... */
chdr->head.unref_end = chdr->head.next.unref_end;
- if (!chdr->head.used_marker
- && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) {
- /* Need to equeue marker */
- chdr->head.used_marker = 1;
- ilast = enqueue_remote_managed_thread(chdr,
- &chdr->tail.data.marker,
- 1);
- }
+ ilast = check_insert_marker(chdr, ilast);
- if (chdr->head.unref_end == (erts_sspa_blk_t *) ilast)
- ERTS_THR_MEMORY_BARRIER;
- else {
+ if (chdr->head.unref_end != (erts_sspa_blk_t *) ilast) {
chdr->head.next.unref_end = (erts_sspa_blk_t *) ilast;
chdr->head.next.thr_progress = erts_thr_progress_later(NULL);
erts_atomic32_set_relb(&chdr->tail.data.um_refc_ix,
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
index d36066c399..bccb1aba7a 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -142,7 +142,8 @@ check_local_list(erts_sspa_chunk_header_t *chdr)
erts_sspa_data_t *erts_sspa_create(size_t blk_sz,
int pa_size);
void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
- erts_sspa_blk_t *blk);
+ erts_sspa_blk_t *blk,
+ int cinit);
erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr,
erts_sspa_blk_t *old_res);
@@ -216,7 +217,7 @@ erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk)
chdr = &chnk->aligned.header;
if (chnk_cix != cix) {
/* Remote chunk */
- erts_sspa_remote_free(chdr, blk);
+ erts_sspa_remote_free(chdr, blk, chnk_cix - cix);
}
else {
/* Local chunk */
diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c
index 603d1d47b6..347247ee7b 100644
--- a/erts/emulator/drivers/common/efile_drv.c
+++ b/erts/emulator/drivers/common/efile_drv.c
@@ -522,7 +522,7 @@ static void *ef_safe_alloc(Uint s)
static void *ef_safe_realloc(void *op, Uint s)
{
void *p = EF_REALLOC(op, s);
- if (!p) erl_exit(1, "efile drv: Can't reallocate %d bytes of memory\n", s);
+ if (!p) erl_exit(1, "efile drv: Can't reallocate %lu bytes of memory\n", (unsigned long)s);
return p;
}
diff --git a/erts/emulator/drivers/unix/unix_efile.c b/erts/emulator/drivers/unix/unix_efile.c
index ad112f7590..b250bac4dc 100644
--- a/erts/emulator/drivers/unix/unix_efile.c
+++ b/erts/emulator/drivers/unix/unix_efile.c
@@ -107,8 +107,8 @@ static void *ef_safe_alloc(Uint s)
{
void *p = EF_ALLOC(s);
if (!p) erl_exit(1,
- "unix efile drv: Can't allocate %d bytes of memory\n",
- s);
+ "unix efile drv: Can't allocate %lu bytes of memory\n",
+ (unsigned long)s);
return p;
}
@@ -118,8 +118,8 @@ static void *ef_safe_realloc(void *op, Uint s)
{
void *p = EF_REALLOC(op, s);
if (!p) erl_exit(1,
- "unix efile drv: Can't reallocate %d bytes of memory\n",
- s);
+ "unix efile drv: Can't reallocate %lu bytes of memory\n",
+ (unsigned long)s);
return p;
}
diff --git a/erts/emulator/hipe/hipe_arm.c b/erts/emulator/hipe/hipe_arm.c
index e20a8a7969..651d0e3a75 100644
--- a/erts/emulator/hipe/hipe_arm.c
+++ b/erts/emulator/hipe/hipe_arm.c
@@ -181,11 +181,9 @@ void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *
curseg.base = base;
curseg.code_pos = base;
curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES);
-#if defined(__arm__)
curseg.tramp_pos -= 2;
curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */
curseg.tramp_pos[1] = (unsigned int)&nbif_callemu;
-#endif
address = try_alloc(nrwords, nrcallees, callees, trampvec);
if (!address) {
@@ -214,11 +212,9 @@ static unsigned int *alloc_stub(Uint nrwords, unsigned int **tramp_callemu)
curseg.base = base;
curseg.code_pos = base;
curseg.tramp_pos = (unsigned int*)((char*)base + SEGMENT_NRBYTES);
-#if defined(__arm__)
curseg.tramp_pos -= 2;
curseg.tramp_pos[0] = 0xE51FF004; /* ldr pc, [pc,#-4] */
curseg.tramp_pos[1] = (unsigned int)&nbif_callemu;
-#endif
address = try_alloc(nrwords, 0, NIL, NULL);
if (!address) {
@@ -269,10 +265,8 @@ int hipe_patch_insn(void *address, Uint32 value, Eterm type)
void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
{
unsigned int *code;
-#if defined(__arm__)
unsigned int *tramp_callemu;
int callemu_offset;
-#endif
/*
* Native code calls BEAM via a stub looking as follows:
@@ -288,13 +282,6 @@ void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
* (Trampolines are allowed to modify r12, but they don't.)
*/
-#if !defined(__arm__)
- /* verify that 'ba' can reach nbif_callemu */
- if ((unsigned long)&nbif_callemu & ~0x01FFFFFCUL)
- abort();
-#endif
-
-#if defined(__arm__)
code = alloc_stub(4, &tramp_callemu);
callemu_offset = ((int)&nbif_callemu - ((int)&code[2] + 8)) >> 2;
if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF)) {
@@ -302,11 +289,7 @@ void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
if (!(callemu_offset >= -0x00800000 && callemu_offset <= 0x007FFFFF))
abort();
}
-#else
- code = alloc_stub(4, &trampoline);
-#endif
-#if defined(__arm__)
/* mov r0, #beamArity */
code[0] = 0xE3A00000 | (beamArity & 0xFF);
/* ldr r8, [pc,#0] // beamAddress */
@@ -315,16 +298,6 @@ void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
code[2] = 0xEA000000 | (callemu_offset & 0x00FFFFFF);
/* .long beamAddress */
code[3] = (unsigned int)beamAddress;
-#else
- /* addi r12,0,beamAddress@l */
- code[0] = 0x39800000 | ((unsigned long)beamAddress & 0xFFFF);
- /* addi r0,0,beamArity */
- code[1] = 0x38000000 | (beamArity & 0x7FFF);
- /* addis r12,r12,beamAddress@ha */
- code[2] = 0x3D8C0000 | at_ha((unsigned long)beamAddress);
- /* ba nbif_callemu */
- code[3] = 0x48000002 | (unsigned long)&nbif_callemu;
-#endif
hipe_flush_icache_range(code, 4*sizeof(int));
@@ -334,60 +307,32 @@ void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA)
{
Uint32 oldI = *address;
-#if defined(__arm__)
Uint32 newI = (oldI & 0xFF000000) | (offset & 0x00FFFFFF);
-#else
- Uint32 newI = (oldI & 0xFC000001) | ((offset & 0x00FFFFFF) << 2) | (AA & 2);
-#endif
*address = newI;
hipe_flush_icache_word(address);
}
int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
{
-#if !defined(__arm__)
- if ((Uint32)destAddress == ((Uint32)destAddress & 0x01FFFFFC)) {
- /* The destination is in the [0,32MB[ range.
- We can reach it with a ba/bla instruction.
- This is the typical case for BIFs and primops.
- It's also common for trap-to-BEAM stubs (on ppc32). */
- patch_b((Uint32*)callAddress, (Uint32)destAddress >> 2, 2);
+ Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2;
+ if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) {
+ /* The destination is within a [-32MB,+32MB[ range from us.
+ We can reach it with a b/bl instruction.
+ This is typical for nearby Erlang code. */
+ patch_b((Uint32*)callAddress, destOffset, 0);
} else {
-#endif
-#if defined(__arm__)
- Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2;
-#else
- Sint32 destOffset = ((Sint32)destAddress - (Sint32)callAddress) >> 2;
-#endif
- if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) {
- /* The destination is within a [-32MB,+32MB[ range from us.
- We can reach it with a b/bl instruction.
- This is typical for nearby Erlang code. */
- patch_b((Uint32*)callAddress, destOffset, 0);
- } else {
- /* The destination is too distant for b/bl/ba/bla.
- Must do a b/bl to the trampoline. */
-#if defined(__arm__)
- Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2;
-#else
- Sint32 trampOffset = ((Sint32)trampoline - (Sint32)callAddress) >> 2;
-#endif
- if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) {
- /* Update the trampoline's address computation.
- (May be redundant, but we can't tell.) */
-#if defined(__arm__)
- patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress);
-#else
- patch_li((Uint32*)trampoline, (Uint32)destAddress);
-#endif
- /* Update this call site. */
- patch_b((Uint32*)callAddress, trampOffset, 0);
- } else
- return -1;
- }
-#if !defined(__arm__)
+ /* The destination is too distant for b/bl.
+ Must do a b/bl to the trampoline. */
+ Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2;
+ if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) {
+ /* Update the trampoline's address computation.
+ (May be redundant, but we can't tell.) */
+ patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress);
+ /* Update this call site. */
+ patch_b((Uint32*)callAddress, trampOffset, 0);
+ } else
+ return -1;
}
-#endif
return 0;
}
diff --git a/erts/emulator/hipe/hipe_arm_bifs.m4 b/erts/emulator/hipe/hipe_arm_bifs.m4
index e0c6f09796..17c013f1fb 100644
--- a/erts/emulator/hipe/hipe_arm_bifs.m4
+++ b/erts/emulator/hipe/hipe_arm_bifs.m4
@@ -27,7 +27,7 @@ include(`hipe/hipe_arm_asm.m4')
.p2align 2
`#if defined(ERTS_ENABLE_LOCK_CHECK) && defined(ERTS_SMP)
-# define CALL_BIF(F) mov r14, #F; str r14, [r0, #P_BIF_CALLEE]; bl hipe_debug_bif_wrapper
+# define CALL_BIF(F) ldr r14, =F; str r14, [r0, #P_BIF_CALLEE]; bl hipe_debug_bif_wrapper
#else
# define CALL_BIF(F) bl F
#endif'
@@ -67,6 +67,7 @@ $1:
RESTORE_CONTEXT_BIF
beq nbif_1_simple_exception
NBIF_RET(1)
+ .ltorg /* needed by LDR in debug version of `CALL_BIF' */
.size $1, .-$1
.type $1, %function
#endif')
@@ -95,6 +96,7 @@ $1:
RESTORE_CONTEXT_BIF
beq nbif_2_simple_exception
NBIF_RET(2)
+ .ltorg
.size $1, .-$1
.type $1, %function
#endif')
@@ -125,6 +127,7 @@ $1:
RESTORE_CONTEXT_BIF
beq nbif_3_simple_exception
NBIF_RET(3)
+ .ltorg
.size $1, .-$1
.type $1, %function
#endif')
@@ -149,6 +152,7 @@ $1:
RESTORE_CONTEXT_BIF
beq nbif_0_simple_exception
NBIF_RET(0)
+ .ltorg
.size $1, .-$1
.type $1, %function
#endif')
@@ -173,7 +177,8 @@ $1:
/* Save caller-save registers and call the C function. */
SAVE_CONTEXT_GC
- bl $2
+ /* ignore empty BIF__ARGS */
+ CALL_BIF($2)
TEST_GOT_MBUF(0)
/* Restore registers. */
@@ -195,7 +200,9 @@ $1:
/* Save caller-save registers and call the C function. */
SAVE_CONTEXT_GC
- bl $2
+ str r1, [r0, #P_ARG0] /* Store BIF__ARGS in def_arg_reg[] */
+ add r1, r0, #P_ARG0
+ CALL_BIF($2)
TEST_GOT_MBUF(1)
/* Restore registers. Check for exception. */
@@ -220,7 +227,10 @@ $1:
/* Save caller-save registers and call the C function. */
SAVE_CONTEXT_GC
- bl $2
+ str r1, [r0, #P_ARG0] /* Store BIF__ARGS in def_arg_reg[] */
+ str r2, [r0, #P_ARG1]
+ add r1, r0, #P_ARG0
+ CALL_BIF($2)
TEST_GOT_MBUF(2)
/* Restore registers. Check for exception. */