diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /erts/emulator/beam/erl_process.c | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'erts/emulator/beam/erl_process.c')
-rw-r--r-- | erts/emulator/beam/erl_process.c | 9469 |
1 files changed, 9469 insertions, 0 deletions
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c new file mode 100644 index 0000000000..9960172366 --- /dev/null +++ b/erts/emulator/beam/erl_process.c @@ -0,0 +1,9469 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1996-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#define ERL_PROCESS_C__ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stddef.h> /* offsetof() */ +#include <ctype.h> +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "erl_nmgc.h" +#include "error.h" +#include "bif.h" +#include "erl_db.h" +#include "dist.h" +#include "beam_catches.h" +#include "erl_instrument.h" +#include "erl_threads.h" +#include "erl_binary.h" + +#define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS) +#define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \ + (ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED/2) + +#define ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST (CONTEXT_REDS/10) + +#define ERTS_SCHED_SLEEP_SPINCOUNT 10000 + +#define ERTS_WAKEUP_OTHER_LIMIT (100*CONTEXT_REDS/2) +#define ERTS_WAKEUP_OTHER_DEC 10 +#define ERTS_WAKEUP_OTHER_FIXED_INC (CONTEXT_REDS/10) + +#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff) + +#if 0 || defined(DEBUG) +#define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA +#endif + +#if defined(DEBUG) && 0 +#define HARDDEBUG +#else +#undef HARDDEBUG +#endif + +#ifdef HARDDEBUG +#define HARDDEBUG_RUNQS +#endif + +#ifdef HIPE +#include "hipe_mode_switch.h" /* for hipe_init_process() */ +#include "hipe_signal.h" /* for hipe_thread_signal_init() */ +#endif + +#ifdef ERTS_ENABLE_LOCK_COUNT +#include "erl_lock_count.h" +#endif + +#define MAX_BIT (1 << PRIORITY_MAX) +#define HIGH_BIT (1 << PRIORITY_HIGH) +#define NORMAL_BIT (1 << PRIORITY_NORMAL) +#define LOW_BIT (1 << PRIORITY_LOW) + +#define ERTS_MAYBE_SAVE_TERMINATING_PROCESS(P) \ +do { \ + ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&proc_tab_mtx)); \ + if (saved_term_procs.end) \ + save_terminating_process((P)); \ +} while (0) + +#define ERTS_EMPTY_RUNQ(RQ) \ + ((RQ)->len == 0 && (RQ)->misc.start == NULL) + +extern Eterm beam_apply[]; +extern Eterm beam_exit[]; +extern Eterm beam_continue_exit[]; + +static Sint p_last; +static Sint p_next; +static Sint p_serial; +static Uint p_serial_mask; +static Uint p_serial_shift; + +Uint erts_no_schedulers; +Uint erts_max_processes = ERTS_DEFAULT_MAX_PROCESSES; +Uint erts_process_tab_index_mask; + +int erts_sched_thread_suggested_stack_size = -1; + +#ifdef ERTS_ENABLE_LOCK_CHECK +ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE]; +#endif + +#ifdef ERTS_SMP + +int erts_disable_proc_not_running_opt; + +#define ERTS_SCHED_CHANGING_ONLINE 1 +#define ERTS_SCHED_CHANGING_MULTI_SCHED 2 + +static struct { + erts_smp_mtx_t mtx; + erts_smp_cnd_t cnd; + int changing; + int online; + int curr_online; + int wait_curr_online; + erts_smp_atomic_t active; + struct { + erts_smp_atomic_t ongoing; + long wait_active; + ErtsProcList *procs; + } msb; /* Multi Scheduling Block */ +} schdlr_sspnd; + +static struct { + erts_smp_mtx_t update_mtx; + erts_smp_atomic_t active_runqs; + int last_active_runqs; + erts_smp_atomic_t used_runqs; + int forced_check_balance; + erts_smp_atomic_t checking_balance; + int halftime; + int full_reds_history_index; + struct { + int active_runqs; + int reds; + int max_len; + } prev_rise; + Uint n; +} balance_info; + +#define ERTS_BLNCE_SAVE_RISE(ACTIVE, MAX_LEN, REDS) \ +do { \ + balance_info.prev_rise.active_runqs = (ACTIVE); \ + balance_info.prev_rise.max_len = (MAX_LEN); \ + balance_info.prev_rise.reds = (REDS); \ +} while (0) + +#endif + +/* + * Cpu topology hierarchy. + */ +#define ERTS_TOPOLOGY_NODE 0 +#define ERTS_TOPOLOGY_PROCESSOR 1 +#define ERTS_TOPOLOGY_PROCESSOR_NODE 2 +#define ERTS_TOPOLOGY_CORE 3 +#define ERTS_TOPOLOGY_THREAD 4 +#define ERTS_TOPOLOGY_LOGICAL 5 + +#define ERTS_TOPOLOGY_MAX_DEPTH 6 + +typedef struct { + int bind_id; + int bound_id; +} ErtsCpuBindData; + +static ErtsCpuBindData *scheduler2cpu_map; +erts_smp_rwmtx_t erts_cpu_bind_rwmtx; + +typedef enum { + ERTS_CPU_BIND_SPREAD, + ERTS_CPU_BIND_PROCESSOR_SPREAD, + ERTS_CPU_BIND_THREAD_SPREAD, + ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD, + ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD, + ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD, + ERTS_CPU_BIND_NO_SPREAD, + ERTS_CPU_BIND_NONE +} ErtsCpuBindOrder; + +ErtsCpuBindOrder cpu_bind_order; + +static erts_cpu_topology_t *user_cpudata; +static int user_cpudata_size; +static erts_cpu_topology_t *system_cpudata; +static int system_cpudata_size; + +erts_sched_stat_t erts_sched_stat; + +ErtsRunQueue *erts_common_run_queue; + +#ifdef USE_THREADS +static erts_tsd_key_t sched_data_key; +#endif + +static erts_smp_mtx_t proc_tab_mtx; + +static erts_smp_atomic_t function_calls; + +#ifdef ERTS_SMP +static erts_smp_atomic_t doing_sys_schedule; +static erts_smp_atomic_t no_empty_run_queues; +#else /* !ERTS_SMP */ +ErtsSchedulerData *erts_scheduler_data; +#endif + +ErtsAlignedRunQueue *erts_aligned_run_queues; +Uint erts_no_run_queues; + +typedef struct { + ErtsSchedulerData esd; + char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))]; +} ErtsAlignedSchedulerData; + +ErtsAlignedSchedulerData *erts_aligned_scheduler_data; + +#ifndef BM_COUNTERS +static int processes_busy; +#endif + +Process** process_tab; +static Uint last_reductions; +static Uint last_exact_reductions; +Uint erts_default_process_flags; +Eterm erts_system_monitor; +Eterm erts_system_monitor_msg_queue_len; +Eterm erts_system_monitor_long_gc; +Eterm erts_system_monitor_large_heap; +struct erts_system_monitor_flags_t erts_system_monitor_flags; + +/* system performance monitor */ +Eterm erts_system_profile; +struct erts_system_profile_flags_t erts_system_profile_flags; + +#ifdef HYBRID +Uint erts_num_active_procs; +Process** erts_active_procs; +#endif + +static erts_smp_atomic_t process_count; + +typedef struct ErtsTermProcElement_ ErtsTermProcElement; +struct ErtsTermProcElement_ { + ErtsTermProcElement *next; + ErtsTermProcElement *prev; + int ix; + union { + struct { + Eterm pid; + SysTimeval spawned; + SysTimeval exited; + } process; + struct { + SysTimeval time; + } bif_invocation; + } u; +}; + +static struct { + ErtsTermProcElement *start; + ErtsTermProcElement *end; +} saved_term_procs; + +ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(misc_op_list, + ErtsMiscOpList, + 10, + ERTS_ALC_T_MISC_OP_LIST) + +ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, + ErtsProcList, + 200, + ERTS_ALC_T_PROC_LIST) + +#define ERTS_RUNQ_IX(IX) (&erts_aligned_run_queues[(IX)].runq) +#define ERTS_SCHEDULER_IX(IX) (&erts_aligned_scheduler_data[(IX)].esd) + +#define ERTS_FOREACH_RUNQ(RQVAR, DO) \ +do { \ + ErtsRunQueue *RQVAR; \ + int ix__; \ + for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) { \ + RQVAR = ERTS_RUNQ_IX(ix__); \ + erts_smp_runq_lock(RQVAR); \ + { DO; } \ + erts_smp_runq_unlock(RQVAR); \ + } \ +} while (0) + +#define ERTS_FOREACH_OP_RUNQ(RQVAR, DO) \ +do { \ + ErtsRunQueue *RQVAR; \ + int ix__; \ + ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&schdlr_sspnd.mtx)); \ + for (ix__ = 0; ix__ < schdlr_sspnd.online; ix__++) { \ + RQVAR = ERTS_RUNQ_IX(ix__); \ + erts_smp_runq_lock(RQVAR); \ + { DO; } \ + erts_smp_runq_unlock(RQVAR); \ + } \ +} while (0) + +#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, DOX) \ +do { \ + ErtsRunQueue *RQVAR; \ + int ix__; \ + for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) { \ + RQVAR = ERTS_RUNQ_IX(ix__); \ + erts_smp_runq_lock(RQVAR); \ + { DO; } \ + } \ + { DOX; } \ + for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) \ + erts_smp_runq_unlock(ERTS_RUNQ_IX(ix__)); \ +} while (0) + +#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \ + ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, ) +/* + * Local functions. + */ + +static void init_processes_bif(void); +static void save_terminating_process(Process *p); +static void exec_misc_ops(ErtsRunQueue *); +static void print_function_from_pc(int to, void *to_arg, Eterm* x); +static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, + int yreg); +#ifdef ERTS_SMP +static void handle_pending_exiters(ErtsProcList *); + +static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata, + int size, + ErtsCpuBindOrder bind_order, + int mk_seq); +static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size); + +#endif + +static void early_cpu_bind_init(void); +static void late_cpu_bind_init(void); + +#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) +int +erts_smp_lc_runq_is_locked(ErtsRunQueue *runq) +{ + return erts_smp_lc_mtx_is_locked(&runq->mtx); +} +#endif + +void +erts_pre_init_process(void) +{ +#ifdef USE_THREADS + erts_tsd_key_create(&sched_data_key); +#endif + +#ifdef ERTS_ENABLE_LOCK_CHECK + { + int ix; + + erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].get_locks + = ERTS_PSD_ERROR_HANDLER_BUF_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].set_locks + = ERTS_PSD_ERROR_HANDLER_BUF_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].get_locks + = ERTS_PSD_SAVED_CALLS_BUF_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].set_locks + = ERTS_PSD_SAVED_CALLS_BUF_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_SCHED_ID].get_locks + = ERTS_PSD_SCHED_ID_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_SCHED_ID].set_locks + = ERTS_PSD_SCHED_ID_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].get_locks + = ERTS_PSD_DIST_ENTRY_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].set_locks + = ERTS_PSD_DIST_ENTRY_GET_LOCKS; + + /* Check that we have locks for all entries */ + for (ix = 0; ix < ERTS_PSD_SIZE; ix++) { + ERTS_SMP_LC_ASSERT(erts_psd_required_locks[ix].get_locks); + ERTS_SMP_LC_ASSERT(erts_psd_required_locks[ix].set_locks); + } + } +#endif +} + +/* initialize the scheduler */ +void +erts_init_process(void) +{ + Uint proc_bits = ERTS_PROC_BITS; + +#ifdef ERTS_SMP + erts_disable_proc_not_running_opt = 0; + erts_init_proc_lock(); +#endif + + init_proclist_alloc(); + + erts_smp_atomic_init(&process_count, 0); + + if (erts_use_r9_pids_ports) { + proc_bits = ERTS_R9_PROC_BITS; + ASSERT(erts_max_processes <= (1 << ERTS_R9_PROC_BITS)); + } + + process_tab = (Process**) erts_alloc(ERTS_ALC_T_PROC_TABLE, + erts_max_processes*sizeof(Process*)); + sys_memzero(process_tab, erts_max_processes * sizeof(Process*)); +#ifdef HYBRID + erts_active_procs = (Process**) + erts_alloc(ERTS_ALC_T_ACTIVE_PROCS, + erts_max_processes * sizeof(Process*)); + erts_num_active_procs = 0; +#endif + + erts_smp_mtx_init(&proc_tab_mtx, "proc_tab"); + p_last = -1; + p_next = 0; + p_serial = 0; + + p_serial_shift = erts_fit_in_bits(erts_max_processes - 1); + p_serial_mask = ((~(~((Uint) 0) << proc_bits)) >> p_serial_shift); + erts_process_tab_index_mask = ~(~((Uint) 0) << p_serial_shift); +#ifndef BM_COUNTERS + processes_busy = 0; +#endif + last_reductions = 0; + last_exact_reductions = 0; + erts_default_process_flags = 0; +} + +void +erts_late_init_process(void) +{ + int ix; + init_processes_bif(); + + erts_smp_spinlock_init(&erts_sched_stat.lock, "sched_stat"); + for (ix = 0; ix < ERTS_NO_PRIO_LEVELS; ix++) { + Eterm atom; + char *atom_str; + switch (ix) { + case PRIORITY_MAX: + atom_str = "process_max"; + break; + case PRIORITY_HIGH: + atom_str = "process_high"; + break; + case PRIORITY_NORMAL: + atom_str = "process_normal"; + break; + case PRIORITY_LOW: + atom_str = "process_low"; + break; + case ERTS_PORT_PRIO_LEVEL: + atom_str = "port"; + break; + default: + atom_str = "bad_prio"; + ASSERT(!"bad prio"); + break; + } + atom = am_atom_put(atom_str, sys_strlen(atom_str)); + erts_sched_stat.prio[ix].name = atom; + erts_sched_stat.prio[ix].total_executed = 0; + erts_sched_stat.prio[ix].executed = 0; + erts_sched_stat.prio[ix].total_migrated = 0; + erts_sched_stat.prio[ix].migrated = 0; + } + +} + +static ERTS_INLINE ErtsProcList * +proclist_create(Process *p) +{ + ErtsProcList *plp = proclist_alloc(); + plp->pid = p->id; + plp->started = p->started; + return plp; +} + +static ERTS_INLINE void +proclist_destroy(ErtsProcList *plp) +{ + proclist_free(plp); +} + +static ERTS_INLINE int +proclist_same(ErtsProcList *plp, Process *p) +{ + return (plp->pid == p->id + && erts_cmp_timeval(&plp->started, &p->started) == 0); +} + +ErtsProcList * +erts_proclist_create(Process *p) +{ + return proclist_create(p); +} + +void +erts_proclist_destroy(ErtsProcList *plp) +{ + proclist_destroy(plp); +} + +int +erts_proclist_same(ErtsProcList *plp, Process *p) +{ + return proclist_same(plp, p); +} + +void * +erts_psd_set_init(Process *p, ErtsProcLocks plocks, int ix, void *data) +{ + void *old; + ErtsProcLocks xplocks; + int refc = 0; + ErtsPSD *psd = erts_alloc(ERTS_ALC_T_PSD, sizeof(ErtsPSD)); + int i; + for (i = 0; i < ERTS_PSD_SIZE; i++) + psd->data[i] = NULL; + + ERTS_SMP_LC_ASSERT(plocks); + ERTS_SMP_LC_ASSERT(plocks == erts_proc_lc_my_proc_locks(p)); + + xplocks = ERTS_PROC_LOCKS_ALL; + xplocks &= ~plocks; + if (xplocks && erts_smp_proc_trylock(p, xplocks) == EBUSY) { + if (xplocks & ERTS_PROC_LOCK_MAIN) { + erts_smp_proc_inc_refc(p); + erts_smp_proc_unlock(p, plocks); + erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL); + refc = 1; + } + else { + if (plocks & ERTS_PROC_LOCKS_ALL_MINOR) + erts_smp_proc_unlock(p, plocks & ERTS_PROC_LOCKS_ALL_MINOR); + erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); + } + } + if (!p->psd) + p->psd = psd; + if (xplocks) + erts_smp_proc_unlock(p, xplocks); + if (refc) + erts_smp_proc_dec_refc(p); + ASSERT(p->psd); + if (p->psd != psd) + erts_free(ERTS_ALC_T_PSD, psd); + old = p->psd->data[ix]; + p->psd->data[ix] = data; + ERTS_SMP_LC_ASSERT(plocks == erts_proc_lc_my_proc_locks(p)); + return old; +} + +#ifdef ERTS_SMP + +static void +prepare_for_block(void *vrq) +{ + erts_smp_runq_unlock((ErtsRunQueue *) vrq); +} + +static void +resume_after_block(void *vrq) +{ + erts_smp_runq_lock((ErtsRunQueue *) vrq); +} + +#endif + +static ERTS_INLINE void +sched_waiting_sys(Uint no, ErtsRunQueue *rq) +{ + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ASSERT(rq->waiting >= 0); + rq->flags |= (ERTS_RUNQ_FLG_OUT_OF_WORK + | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK); + rq->waiting++; + rq->waiting *= -1; + rq->woken = 0; + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(no), am_inactive); +} + +static ERTS_INLINE void +sched_active_sys(Uint no, ErtsRunQueue *rq) +{ + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ASSERT(rq->waiting < 0); + rq->waiting *= -1; + rq->waiting--; + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(no), am_active); +} + +Uint +erts_active_schedulers(void) +{ + /* RRRRRRRRR */ + + Uint as = erts_no_schedulers; + + ERTS_ATOMIC_FOREACH_RUNQ(rq, as -= abs(rq->waiting)); + + ASSERT(as >= 0); + return as; +} + +#ifdef ERTS_SMP + +static ERTS_INLINE void +sched_waiting(Uint no, ErtsRunQueue *rq) +{ + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + rq->flags |= (ERTS_RUNQ_FLG_OUT_OF_WORK + | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK); + if (rq->waiting < 0) + rq->waiting--; + else + rq->waiting++; + rq->woken = 0; + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(no), am_inactive); +} + +static ERTS_INLINE void +sched_active(Uint no, ErtsRunQueue *rq) +{ + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + if (rq->waiting < 0) + rq->waiting++; + else + rq->waiting--; + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(no), am_active); +} + +static int ERTS_INLINE +ongoing_multi_scheduling_block(void) +{ + return erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing) != 0; +} + +static ERTS_INLINE void +empty_runq(ErtsRunQueue *rq) +{ + long oifls = erts_smp_atomic_band(&rq->info_flags, ~ERTS_RUNQ_IFLG_NONEMPTY); + if (oifls & ERTS_RUNQ_IFLG_NONEMPTY) { +#ifdef DEBUG + long empty = erts_smp_atomic_read(&no_empty_run_queues); + ASSERT(0 <= empty && empty < erts_no_run_queues); +#endif + erts_smp_atomic_inc(&no_empty_run_queues); + } +} + +static ERTS_INLINE void +non_empty_runq(ErtsRunQueue *rq) +{ + long oifls = erts_smp_atomic_bor(&rq->info_flags, ERTS_RUNQ_IFLG_NONEMPTY); + if (!(oifls & ERTS_RUNQ_IFLG_NONEMPTY)) { +#ifdef DEBUG + long empty = erts_smp_atomic_read(&no_empty_run_queues); + ASSERT(0 < empty && empty <= erts_no_run_queues); +#endif + erts_smp_atomic_dec(&no_empty_run_queues); + } +} + +static ERTS_INLINE int +sched_spin_wake(ErtsRunQueue *rq) +{ +#if ERTS_SCHED_SLEEP_SPINCOUNT == 0 + return 0; +#else + long val; + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + + val = erts_smp_atomic_read(&rq->spin_waiter); + ASSERT(val >= 0); + if (val != 0) { + erts_smp_atomic_inc(&rq->spin_wake); + return 1; + } + return 0; +#endif +} + +static ERTS_INLINE int +sched_spin_wake_all(ErtsRunQueue *rq) +{ +#if ERTS_SCHED_SLEEP_SPINCOUNT == 0 + return 0; +#else + long val; + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + + val = erts_smp_atomic_read(&rq->spin_waiter); + ASSERT(val >= 0); + if (val != 0) + erts_smp_atomic_add(&rq->spin_wake, val); + return val; +#endif +} + +static void +sched_sys_wait(Uint no, ErtsRunQueue *rq) +{ + long dt; +#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 + int val; + int spincount = ERTS_SCHED_SLEEP_SPINCOUNT; + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + +#endif + + sched_waiting_sys(no, rq); + +#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 + erts_smp_atomic_inc(&rq->spin_waiter); + erts_smp_runq_unlock(rq); + + erl_sys_schedule(1); /* Might give us something to do */ + + dt = do_time_read_and_reset(); + if (dt) bump_timer(dt); + + while (spincount-- > 0) { + val = erts_smp_atomic_read(&rq->spin_wake); + ASSERT(val >= 0); + if (val != 0) { + erts_smp_runq_lock(rq); + val = erts_smp_atomic_read(&rq->spin_wake); + ASSERT(val >= 0); + if (val != 0) + goto woken; + if (spincount == 0) + goto sleep; + erts_smp_runq_unlock(rq); + } + } + + erts_smp_runq_lock(rq); + val = erts_smp_atomic_read(&rq->spin_wake); + ASSERT(val >= 0); + if (val != 0) { + woken: + erts_smp_atomic_dec(&rq->spin_wake); + ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0); + erts_smp_atomic_dec(&rq->spin_waiter); + ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); + } + else { + sleep: + erts_smp_atomic_dec(&rq->spin_waiter); + ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); + /* + * If we got new I/O tasks we aren't allowed to + * sleep in erl_sys_schedule(). + */ + if (!erts_port_task_have_outstanding_io_tasks()) { +#endif + + erts_sys_schedule_interrupt(0); + erts_smp_runq_unlock(rq); + + erl_sys_schedule(0); + + dt = do_time_read_and_reset(); + if (dt) bump_timer(dt); + + erts_smp_runq_lock(rq); + +#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 + } + } +#endif + + sched_active_sys(no, rq); +} + +static void +sched_cnd_wait(Uint no, ErtsRunQueue *rq) +{ +#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 + int val; + int spincount = ERTS_SCHED_SLEEP_SPINCOUNT; + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); +#endif + + sched_waiting(no, rq); + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, + prepare_for_block, + resume_after_block, + (void *) rq); + +#if ERTS_SCHED_SLEEP_SPINCOUNT == 0 + erts_smp_cnd_wait(&rq->cnd, &rq->mtx); +#else + erts_smp_atomic_inc(&rq->spin_waiter); + erts_smp_mtx_unlock(&rq->mtx); + + while (spincount-- > 0) { + val = erts_smp_atomic_read(&rq->spin_wake); + ASSERT(val >= 0); + if (val != 0) { + erts_smp_mtx_lock(&rq->mtx); + val = erts_smp_atomic_read(&rq->spin_wake); + ASSERT(val >= 0); + if (val != 0) + goto woken; + if (spincount == 0) + goto sleep; + erts_smp_mtx_unlock(&rq->mtx); + } + } + + erts_smp_mtx_lock(&rq->mtx); + val = erts_smp_atomic_read(&rq->spin_wake); + ASSERT(val >= 0); + if (val == 0) { + sleep: + erts_smp_atomic_dec(&rq->spin_waiter); + ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); + erts_smp_cnd_wait(&rq->cnd, &rq->mtx); + } + else { + woken: + erts_smp_atomic_dec(&rq->spin_wake); + ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0); + erts_smp_atomic_dec(&rq->spin_waiter); + ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); + } +#endif + + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, + prepare_for_block, + resume_after_block, + (void *) rq); + + sched_active(no, rq); +} + +static void +wake_one_scheduler(void) +{ + ASSERT(erts_common_run_queue); + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(erts_common_run_queue)); + if (erts_common_run_queue->waiting) { + if (!sched_spin_wake(erts_common_run_queue)) { + if (erts_common_run_queue->waiting == -1) /* One scheduler waiting + and doing so in + sys_schedule */ + erts_sys_schedule_interrupt(1); + else + erts_smp_cnd_signal(&erts_common_run_queue->cnd); + } + } +} + +static void +wake_scheduler(ErtsRunQueue *rq, int incq) +{ + ASSERT(!erts_common_run_queue); + ASSERT(-1 <= rq->waiting && rq->waiting <= 1); + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + if (rq->waiting && !rq->woken) { + if (!sched_spin_wake(rq)) { + if (rq->waiting < 0) + erts_sys_schedule_interrupt(1); + else + erts_smp_cnd_signal(&rq->cnd); + } + rq->woken = 1; + if (incq) + non_empty_runq(rq); + } +} + +static void +wake_all_schedulers(void) +{ + if (erts_common_run_queue) { + erts_smp_runq_lock(erts_common_run_queue); + if (erts_common_run_queue->waiting) { + if (erts_common_run_queue->waiting < 0) + erts_sys_schedule_interrupt(1); + sched_spin_wake_all(erts_common_run_queue); + erts_smp_cnd_broadcast(&erts_common_run_queue->cnd); + } + erts_smp_runq_unlock(erts_common_run_queue); + } + else { + int ix; + for (ix = 0; ix < erts_no_run_queues; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + erts_smp_runq_lock(rq); + wake_scheduler(rq, 0); + erts_smp_runq_unlock(rq); + } + } +} + +static ERTS_INLINE int +chk_wake_sched(ErtsRunQueue *crq, int ix, int activate) +{ + long iflgs; + ErtsRunQueue *wrq; + if (crq->ix == ix) + return 0; + wrq = ERTS_RUNQ_IX(ix); + iflgs = erts_smp_atomic_read(&wrq->info_flags); + if (!(iflgs & (ERTS_RUNQ_IFLG_SUSPENDED|ERTS_RUNQ_IFLG_NONEMPTY))) { + erts_smp_xrunq_lock(crq, wrq); + if (activate) { + if (ix == erts_smp_atomic_cmpxchg(&balance_info.active_runqs, ix+1, ix)) { + wrq->flags &= ~ERTS_RUNQ_FLG_INACTIVE; + } + } + wake_scheduler(wrq, 0); + erts_smp_xrunq_unlock(crq, wrq); + return 1; + } + return 0; +} + +static void +wake_scheduler_on_empty_runq(ErtsRunQueue *crq) +{ + int ix = crq->ix; + int stop_ix = ix; + int active_ix = erts_smp_atomic_read(&balance_info.active_runqs); + int balance_ix = erts_smp_atomic_read(&balance_info.used_runqs); + + if (active_ix > balance_ix) + active_ix = balance_ix; + + if (ix >= active_ix) + stop_ix = ix = active_ix; + + /* Try to wake a scheduler on an active run queue */ + while (1) { + ix--; + if (ix < 0) { + if (active_ix == stop_ix) + break; + ix = active_ix - 1; + } + if (ix == stop_ix) + break; + if (chk_wake_sched(crq, ix, 0)) + return; + } + + if (active_ix < balance_ix) { + /* Try to activate a new run queue and wake its scheduler */ + (void) chk_wake_sched(crq, active_ix, 1); + } +} + +#endif /* ERTS_SMP */ + +static ERTS_INLINE void +smp_notify_inc_runq(ErtsRunQueue *runq) +{ +#ifdef ERTS_SMP + if (erts_common_run_queue) + wake_one_scheduler(); + else + wake_scheduler(runq, 1); +#endif +} + +void +erts_smp_notify_inc_runq__(ErtsRunQueue *runq) +{ + smp_notify_inc_runq(runq); +} + +#ifdef ERTS_SMP + +ErtsRunQueue * +erts_prepare_emigrate(ErtsRunQueue *c_rq, ErtsRunQueueInfo *c_rqi, int prio) +{ + ASSERT(ERTS_CHK_RUNQ_FLG_EMIGRATE(c_rq->flags, prio)); + ASSERT(ERTS_CHK_RUNQ_FLG_EVACUATE(c_rq->flags, prio) + || c_rqi->len >= c_rqi->migrate.limit.this); + + while (1) { + ErtsRunQueue *n_rq = c_rqi->migrate.runq; + ERTS_DBG_VERIFY_VALID_RUNQP(n_rq); + erts_smp_xrunq_lock(c_rq, n_rq); + + /* + * erts_smp_xrunq_lock() may release lock on c_rq! We have + * to check that we still want to emigrate and emigrate + * to the same run queue as before. + */ + + if (ERTS_CHK_RUNQ_FLG_EMIGRATE(c_rq->flags, prio)) { + Uint32 force = (ERTS_CHK_RUNQ_FLG_EVACUATE(c_rq->flags, prio) + | (c_rq->flags & ERTS_RUNQ_FLG_INACTIVE)); + if (force || c_rqi->len > c_rqi->migrate.limit.this) { + ErtsRunQueueInfo *n_rqi; + /* We still want to emigrate */ + + if (n_rq != c_rqi->migrate.runq) { + /* Ahh... run queue changed; need to do it all over again... */ + erts_smp_runq_unlock(n_rq); + continue; + } + else { + + if (prio == ERTS_PORT_PRIO_LEVEL) + n_rqi = &n_rq->ports.info; + else + n_rqi = &n_rq->procs.prio_info[prio]; + + if (force || (n_rqi->len < c_rqi->migrate.limit.other)) { + /* emigrate ... */ + return n_rq; + } + } + } + } + + ASSERT(n_rq != c_rq); + erts_smp_runq_unlock(n_rq); + if (!(c_rq->flags & ERTS_RUNQ_FLG_INACTIVE)) { + /* No more emigrations to this runq */ + ERTS_UNSET_RUNQ_FLG_EMIGRATE(c_rq->flags, prio); + ERTS_DBG_SET_INVALID_RUNQP(c_rqi->migrate.runq, 0x3); + } + + return NULL; + } +} + +static void +immigrate(ErtsRunQueue *rq) +{ + int prio; + + ASSERT(rq->flags & ERTS_RUNQ_FLGS_IMMIGRATE_QMASK); + + for (prio = 0; prio < ERTS_NO_PRIO_LEVELS; prio++) { + if (ERTS_CHK_RUNQ_FLG_IMMIGRATE(rq->flags, prio)) { + ErtsRunQueueInfo *rqi = (prio == ERTS_PORT_PRIO_LEVEL + ? &rq->ports.info + : &rq->procs.prio_info[prio]); + ErtsRunQueue *from_rq = rqi->migrate.runq; + int rq_locked, from_rq_locked; + + ERTS_DBG_VERIFY_VALID_RUNQP(from_rq); + + rq_locked = 1; + from_rq_locked = 1; + erts_smp_xrunq_lock(rq, from_rq); + /* + * erts_smp_xrunq_lock() may release lock on rq! We have + * to check that we still want to immigrate from the same + * run queue as before. + */ + if (ERTS_CHK_RUNQ_FLG_IMMIGRATE(rq->flags, prio) + && from_rq == rqi->migrate.runq) { + ErtsRunQueueInfo *from_rqi = (prio == ERTS_PORT_PRIO_LEVEL + ? &from_rq->ports.info + : &from_rq->procs.prio_info[prio]); + if ((ERTS_CHK_RUNQ_FLG_EVACUATE(rq->flags, prio) + && ERTS_CHK_RUNQ_FLG_EVACUATE(from_rq->flags, prio) + && from_rqi->len) + || (from_rqi->len > rqi->migrate.limit.other + && rqi->len < rqi->migrate.limit.this)) { + if (prio == ERTS_PORT_PRIO_LEVEL) { + Port *prt = from_rq->ports.start; + if (prt) { + int prt_locked = 0; + (void) erts_port_migrate(prt, &prt_locked, + from_rq, &from_rq_locked, + rq, &rq_locked); + if (prt_locked) + erts_smp_port_unlock(prt); + } + } + else { + Process *proc; + ErtsRunPrioQueue *from_rpq; + from_rpq = (prio == PRIORITY_LOW + ? &from_rq->procs.prio[PRIORITY_NORMAL] + : &from_rq->procs.prio[prio]); + for (proc = from_rpq->first; proc; proc = proc->next) + if (proc->prio == prio && !proc->bound_runq) + break; + if (proc) { + ErtsProcLocks proc_locks = 0; + (void) erts_proc_migrate(proc, &proc_locks, + from_rq, &from_rq_locked, + rq, &rq_locked); + if (proc_locks) + erts_smp_proc_unlock(proc, proc_locks); + } + } + } + else { + ERTS_UNSET_RUNQ_FLG_IMMIGRATE(rq->flags, prio); + ERTS_DBG_SET_INVALID_RUNQP(rqi->migrate.runq, 0x1); + } + } + if (from_rq_locked) + erts_smp_runq_unlock(from_rq); + if (!rq_locked) + erts_smp_runq_lock(rq); + } + } +} + +static void +evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq) +{ + Port *prt; + int prio; + int prt_locked = 0; + int rq_locked = 0; + int evac_rq_locked = 1; + + erts_smp_runq_lock(evac_rq); + + evac_rq->flags &= ~ERTS_RUNQ_FLGS_IMMIGRATE_QMASK; + evac_rq->flags |= (ERTS_RUNQ_FLGS_EMIGRATE_QMASK + | ERTS_RUNQ_FLGS_EVACUATE_QMASK + | ERTS_RUNQ_FLG_SUSPENDED); + + erts_smp_atomic_bor(&evac_rq->info_flags, ERTS_RUNQ_IFLG_SUSPENDED); + + /* + * Need to set up evacuation paths first since we + * may release the run queue lock on evac_rq + * when evacuating. + */ + evac_rq->misc.evac_runq = rq; + evac_rq->ports.info.migrate.runq = rq; + for (prio = 0; prio < ERTS_NO_PROC_PRIO_LEVELS; prio++) + evac_rq->procs.prio_info[prio].migrate.runq = rq; + + /* Evacuate scheduled misc ops */ + + if (evac_rq->misc.start) { + rq_locked = 1; + erts_smp_xrunq_lock(evac_rq, rq); + if (rq->misc.end) + rq->misc.end->next = evac_rq->misc.start; + else + rq->misc.start = evac_rq->misc.start; + rq->misc.end = evac_rq->misc.end; + evac_rq->misc.start = NULL; + evac_rq->misc.end = NULL; + } + + /* Evacuate scheduled ports */ + prt = evac_rq->ports.start; + while (prt) { + (void) erts_port_migrate(prt, &prt_locked, + evac_rq, &evac_rq_locked, + rq, &rq_locked); + if (prt_locked) + erts_smp_port_unlock(prt); + if (!evac_rq_locked) { + evac_rq_locked = 1; + erts_smp_runq_lock(evac_rq); + } + prt = evac_rq->ports.start; + } + + /* Evacuate scheduled processes */ + for (prio = 0; prio < ERTS_NO_PROC_PRIO_LEVELS; prio++) { + Process *proc; + + switch (prio) { + case PRIORITY_MAX: + case PRIORITY_HIGH: + case PRIORITY_NORMAL: + proc = evac_rq->procs.prio[prio].first; + while (proc) { + ErtsProcLocks proc_locks = 0; + + /* Bound processes are stuck... */ + while (proc->bound_runq) { + proc = proc->next; + if (!proc) + goto end_of_proc; + } + + (void) erts_proc_migrate(proc, &proc_locks, + evac_rq, &evac_rq_locked, + rq, &rq_locked); + if (proc_locks) + erts_smp_proc_unlock(proc, proc_locks); + if (!evac_rq_locked) { + erts_smp_runq_lock(evac_rq); + evac_rq_locked = 1; + } + + proc = evac_rq->procs.prio[prio].first; + } + + end_of_proc: + +#ifdef DEBUG + for (proc = evac_rq->procs.prio[prio].first; + proc; + proc = proc->next) { + ASSERT(proc->bound_runq); + } +#endif + break; + case PRIORITY_LOW: + break; + default: + ASSERT(!"Invalid process priority"); + break; + } + } + + if (rq_locked) + erts_smp_runq_unlock(rq); + + if (!evac_rq_locked) + erts_smp_runq_lock(evac_rq); + wake_scheduler(evac_rq, 0); + erts_smp_runq_unlock(evac_rq); +} + +static int +try_steal_task_from_victim(ErtsRunQueue *rq, int *rq_lockedp, ErtsRunQueue *vrq) +{ + Process *proc; + int vrq_locked; + + if (*rq_lockedp) + erts_smp_xrunq_lock(rq, vrq); + else + erts_smp_runq_lock(vrq); + vrq_locked = 1; + + ERTS_SMP_LC_CHK_RUNQ_LOCK(rq, *rq_lockedp); + ERTS_SMP_LC_CHK_RUNQ_LOCK(vrq, vrq_locked); + + /* + * Check for a runnable process to steal... + */ + + switch (vrq->flags & ERTS_RUNQ_FLGS_PROCS_QMASK) { + case MAX_BIT: + case MAX_BIT|HIGH_BIT: + case MAX_BIT|NORMAL_BIT: + case MAX_BIT|LOW_BIT: + case MAX_BIT|HIGH_BIT|NORMAL_BIT: + case MAX_BIT|HIGH_BIT|LOW_BIT: + case MAX_BIT|NORMAL_BIT|LOW_BIT: + case MAX_BIT|HIGH_BIT|NORMAL_BIT|LOW_BIT: + for (proc = vrq->procs.prio[PRIORITY_MAX].last; + proc; + proc = proc->prev) { + if (!proc->bound_runq) + break; + } + if (proc) + break; + case HIGH_BIT: + case HIGH_BIT|NORMAL_BIT: + case HIGH_BIT|LOW_BIT: + case HIGH_BIT|NORMAL_BIT|LOW_BIT: + for (proc = vrq->procs.prio[PRIORITY_HIGH].last; + proc; + proc = proc->prev) { + if (!proc->bound_runq) + break; + } + if (proc) + break; + case NORMAL_BIT: + case LOW_BIT: + case NORMAL_BIT|LOW_BIT: + for (proc = vrq->procs.prio[PRIORITY_NORMAL].last; + proc; + proc = proc->prev) { + if (!proc->bound_runq) + break; + } + if (proc) + break; + case 0: + proc = NULL; + break; + default: + ASSERT(!"Invalid queue mask"); + proc = NULL; + break; + } + + if (proc) { + ErtsProcLocks proc_locks = 0; + int res; + ErtsMigrateResult mres; + mres = erts_proc_migrate(proc, &proc_locks, + vrq, &vrq_locked, + rq, rq_lockedp); + if (proc_locks) + erts_smp_proc_unlock(proc, proc_locks); + res = !0; + switch (mres) { + case ERTS_MIGRATE_FAILED_RUNQ_SUSPENDED: + res = 0; + case ERTS_MIGRATE_SUCCESS: + if (vrq_locked) + erts_smp_runq_unlock(vrq); + return res; + default: /* Other failures */ + break; + } + } + + ERTS_SMP_LC_CHK_RUNQ_LOCK(rq, *rq_lockedp); + ERTS_SMP_LC_CHK_RUNQ_LOCK(vrq, vrq_locked); + + if (!vrq_locked) { + if (*rq_lockedp) + erts_smp_xrunq_lock(rq, vrq); + else + erts_smp_runq_lock(vrq); + vrq_locked = 1; + } + + ERTS_SMP_LC_CHK_RUNQ_LOCK(rq, *rq_lockedp); + ERTS_SMP_LC_CHK_RUNQ_LOCK(vrq, vrq_locked); + + /* + * Check for a runnable port to steal... + */ + + if (vrq->ports.info.len) { + Port *prt = vrq->ports.end; + int prt_locked = 0; + int res; + ErtsMigrateResult mres; + + mres = erts_port_migrate(prt, &prt_locked, + vrq, &vrq_locked, + rq, rq_lockedp); + if (prt_locked) + erts_smp_port_unlock(prt); + res = !0; + switch (mres) { + case ERTS_MIGRATE_FAILED_RUNQ_SUSPENDED: + res = 0; + case ERTS_MIGRATE_SUCCESS: + if (vrq_locked) + erts_smp_runq_unlock(vrq); + return res; + default: /* Other failures */ + break; + } + } + + if (vrq_locked) + erts_smp_runq_unlock(vrq); + + return 0; +} + + +static ERTS_INLINE int +check_possible_steal_victim(ErtsRunQueue *rq, int *rq_lockedp, int vix) +{ + ErtsRunQueue *vrq = ERTS_RUNQ_IX(vix); + long iflgs = erts_smp_atomic_read(&vrq->info_flags); + if (iflgs & ERTS_RUNQ_IFLG_NONEMPTY) + return try_steal_task_from_victim(rq, rq_lockedp, vrq); + else + return 0; +} + + +static int +try_steal_task(ErtsRunQueue *rq) +{ + int res, rq_locked, vix, active_rqs, blnc_rqs; + + if (erts_common_run_queue) + return 0; + + /* + * We are not allowed to steal jobs to this run queue + * if it is suspended. Note that it might get suspended + * at any time when we don't have the lock on the run + * queue. + */ + if (rq->flags & ERTS_RUNQ_FLG_SUSPENDED) + return 0; + + res = 0; + rq_locked = 1; + + ERTS_SMP_LC_CHK_RUNQ_LOCK(rq, rq_locked); + + active_rqs = erts_smp_atomic_read(&balance_info.active_runqs); + blnc_rqs = erts_smp_atomic_read(&balance_info.used_runqs); + + if (active_rqs > blnc_rqs) + active_rqs = blnc_rqs; + + if (rq->ix < active_rqs) { + + /* First try to steal from an inactive run queue... */ + if (active_rqs < blnc_rqs) { + int no = blnc_rqs - active_rqs; + int stop_ix = vix = active_rqs + rq->ix % no; + while (erts_smp_atomic_read(&no_empty_run_queues) < blnc_rqs) { + res = check_possible_steal_victim(rq, &rq_locked, vix); + if (res) + goto done; + vix++; + if (vix >= blnc_rqs) + vix = active_rqs; + if (vix == stop_ix) + break; + } + } + + vix = rq->ix; + + /* ... then try to steal a job from another active queue... */ + while (erts_smp_atomic_read(&no_empty_run_queues) < blnc_rqs) { + vix++; + if (vix >= active_rqs) + vix = 0; + if (vix == rq->ix) + break; + + res = check_possible_steal_victim(rq, &rq_locked, vix); + if (res) + goto done; + } + + } + + done: + + if (!rq_locked) + erts_smp_runq_lock(rq); + + if (!res) + res = !ERTS_EMPTY_RUNQ(rq); + + return res; +} + +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN +void +erts_smp_notify_check_children_needed(void) +{ + int i; + for (i = 0; i < erts_no_schedulers; i++) { + erts_smp_runq_lock(ERTS_SCHEDULER_IX(i)->run_queue); + ERTS_SCHEDULER_IX(i)->check_children = 1; + if (!erts_common_run_queue) + wake_scheduler(ERTS_SCHEDULER_IX(i)->run_queue, 0); + erts_smp_runq_unlock(ERTS_SCHEDULER_IX(i)->run_queue); + } + if (ongoing_multi_scheduling_block()) { + /* Also blocked schedulers need to check children */ + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + for (i = 0; i < erts_no_schedulers; i++) + ERTS_SCHEDULER_IX(i)->blocked_check_children = 1; + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + } + if (erts_common_run_queue) + wake_all_schedulers(); +} +#endif + +/* Run queue balancing */ + +typedef struct { + Uint32 flags; + struct { + int max_len; + int avail; + int reds; + int migration_limit; + int emigrate_to; + int immigrate_from; + } prio[ERTS_NO_PRIO_LEVELS]; + int reds; + int full_reds; + int full_reds_history_sum; + int full_reds_history_change; + int oowc; + int max_len; +} ErtsRunQueueBalance; +static ErtsRunQueueBalance *run_queue_info; + +typedef struct { + int qix; + int len; +} ErtsRunQueueCompare; +static ErtsRunQueueCompare *run_queue_compare; + +static int +rqc_len_cmp(const void *x, const void *y) +{ + return ((ErtsRunQueueCompare *) x)->len - ((ErtsRunQueueCompare *) y)->len; +} + +#define ERTS_PERCENT(X, Y) \ + ((Y) == 0 \ + ? ((X) == 0 ? 100 : INT_MAX) \ + : ((100*(X))/(Y))) + +#define ERTS_UPDATE_FULL_REDS(QIX, LAST_REDS) \ +do { \ + run_queue_info[(QIX)].full_reds \ + = run_queue_info[(QIX)].full_reds_history_sum; \ + run_queue_info[(QIX)].full_reds += (LAST_REDS); \ + run_queue_info[(QIX)].full_reds \ + >>= ERTS_FULL_REDS_HISTORY_AVG_SHFT; \ + run_queue_info[(QIX)].full_reds_history_sum \ + -= run_queue_info[(QIX)].full_reds_history_change; \ + run_queue_info[(QIX)].full_reds_history_sum += (LAST_REDS); \ + run_queue_info[(QIX)].full_reds_history_change = (LAST_REDS); \ +} while (0) + +#define ERTS_DBG_CHK_FULL_REDS_HISTORY(RQ) \ +do { \ + int sum__ = 0; \ + int rix__; \ + for (rix__ = 0; rix__ < ERTS_FULL_REDS_HISTORY_SIZE; rix__++) \ + sum__ += (RQ)->full_reds_history[rix__]; \ + ASSERT(sum__ == (RQ)->full_reds_history_sum); \ +} while (0); + +static void +check_balance(ErtsRunQueue *c_rq) +{ + ErtsRunQueueBalance avg = {0}; + Sint64 scheds_reds, full_scheds_reds; + int forced, active, current_active, oowc, half_full_scheds, full_scheds, + mmax_len, blnc_no_rqs, qix, pix, freds_hist_ix; + + if (erts_smp_atomic_xchg(&balance_info.checking_balance, 1)) { + c_rq->check_balance_reds = INT_MAX; + return; + } + + blnc_no_rqs = (int) erts_smp_atomic_read(&balance_info.used_runqs); + if (blnc_no_rqs == 1) { + c_rq->check_balance_reds = INT_MAX; + erts_smp_atomic_set(&balance_info.checking_balance, 0); + return; + } + + erts_smp_runq_unlock(c_rq); + + if (balance_info.halftime) { + balance_info.halftime = 0; + erts_smp_atomic_set(&balance_info.checking_balance, 0); + ERTS_FOREACH_RUNQ(rq, + { + if (rq->waiting) + rq->flags |= ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK; + else + rq->flags &= ~ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK; + rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + }); + + erts_smp_runq_lock(c_rq); + return; + } + + /* + * check_balance() is never called in more threads + * than one at a time, i.e., we will normally never + * get any conflicts on the balance_info.update_mtx. + * However, when blocking multi scheduling (which performance + * critical applications do *not* do) migration information + * is manipulated. Such updates of the migration information + * might clash with balancing. + */ + erts_smp_mtx_lock(&balance_info.update_mtx); + + forced = balance_info.forced_check_balance; + balance_info.forced_check_balance = 0; + + blnc_no_rqs = (int) erts_smp_atomic_read(&balance_info.used_runqs); + if (blnc_no_rqs == 1) { + erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_smp_runq_lock(c_rq); + c_rq->check_balance_reds = INT_MAX; + erts_smp_atomic_set(&balance_info.checking_balance, 0); + return; + } + + freds_hist_ix = balance_info.full_reds_history_index; + balance_info.full_reds_history_index++; + if (balance_info.full_reds_history_index >= ERTS_FULL_REDS_HISTORY_SIZE) + balance_info.full_reds_history_index = 0; + + current_active = erts_smp_atomic_read(&balance_info.active_runqs); + + /* Read balance information for all run queues */ + for (qix = 0; qix < blnc_no_rqs; qix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(qix); + erts_smp_runq_lock(rq); + + run_queue_info[qix].flags = rq->flags; + for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { + run_queue_info[qix].prio[pix].max_len + = rq->procs.prio_info[pix].max_len; + run_queue_info[qix].prio[pix].reds + = rq->procs.prio_info[pix].reds; + } + run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].max_len + = rq->ports.info.max_len; + run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].reds + = rq->ports.info.reds; + + run_queue_info[qix].full_reds_history_sum + = rq->full_reds_history_sum; + run_queue_info[qix].full_reds_history_change + = rq->full_reds_history[freds_hist_ix]; + + run_queue_info[qix].oowc = rq->out_of_work_count; + run_queue_info[qix].max_len = rq->max_len; + rq->check_balance_reds = INT_MAX; + + erts_smp_runq_unlock(rq); + } + + full_scheds = 0; + half_full_scheds = 0; + full_scheds_reds = 0; + scheds_reds = 0; + oowc = 0; + mmax_len = 0; + + /* Calculate availability for each priority in each run queues */ + for (qix = 0; qix < blnc_no_rqs; qix++) { + int treds = 0; + + if (run_queue_info[qix].flags & ERTS_RUNQ_FLG_OUT_OF_WORK) { + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + run_queue_info[qix].prio[pix].avail = 100; + treds += run_queue_info[qix].prio[pix].reds; + } + if (!(run_queue_info[qix].flags & ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)) + half_full_scheds++; + ERTS_UPDATE_FULL_REDS(qix, ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED); + } + else { + ASSERT(!(run_queue_info[qix].flags & ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) + treds += run_queue_info[qix].prio[pix].reds; + if (treds == 0) { + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) + run_queue_info[qix].prio[pix].avail = 0; + } + else { + int xreds = 0; + int procreds = treds; + procreds -= run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].reds; + + for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { + int av; + + if (xreds == 0) + av = 100; + else if (procreds == xreds) + av = 0; + else { + av = (100*(procreds - xreds)) / procreds; + if (av == 0) + av = 1; + } + run_queue_info[qix].prio[pix].avail = av; + if (pix < PRIORITY_NORMAL) /* ie., max or high */ + xreds += run_queue_info[qix].prio[pix].reds; + } + run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].avail = 100; + } + ERTS_UPDATE_FULL_REDS(qix, treds); + full_scheds_reds += run_queue_info[qix].full_reds; + full_scheds++; + half_full_scheds++; + } + run_queue_info[qix].reds = treds; + scheds_reds += treds; + oowc += run_queue_info[qix].oowc; + if (mmax_len < run_queue_info[qix].max_len) + mmax_len = run_queue_info[qix].max_len; + } + + if (!forced && half_full_scheds != blnc_no_rqs) { + int min = 1; + if (min < half_full_scheds) + min = half_full_scheds; + if (full_scheds) { + active = (scheds_reds - 1)/ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED+1; + } + else { + active = balance_info.last_active_runqs - 1; + } + + if (balance_info.last_active_runqs < current_active) { + ERTS_BLNCE_SAVE_RISE(current_active, mmax_len, scheds_reds); + active = current_active; + } + else if (active < balance_info.prev_rise.active_runqs) { + if (ERTS_PERCENT(mmax_len, + balance_info.prev_rise.max_len) >= 90 + && ERTS_PERCENT(scheds_reds, + balance_info.prev_rise.reds) >= 90) { + active = balance_info.prev_rise.active_runqs; + } + } + + if (active < min) + active = min; + else if (active > blnc_no_rqs) + active = blnc_no_rqs; + + if (active == blnc_no_rqs) + goto all_active; + + for (qix = 0; qix < active; qix++) { + run_queue_info[qix].flags = 0; + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + run_queue_info[qix].prio[pix].emigrate_to = -1; + run_queue_info[qix].prio[pix].immigrate_from = -1; + run_queue_info[qix].prio[pix].migration_limit = 0; + } + } + for (qix = active; qix < blnc_no_rqs; qix++) { + run_queue_info[qix].flags = ERTS_RUNQ_FLG_INACTIVE; + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + int tix = qix % active; + ERTS_SET_RUNQ_FLG_EMIGRATE(run_queue_info[qix].flags, pix); + run_queue_info[qix].prio[pix].emigrate_to = tix; + run_queue_info[qix].prio[pix].immigrate_from = -1; + run_queue_info[qix].prio[pix].migration_limit = 0; + } + } + } + else { + if (balance_info.last_active_runqs < current_active) + ERTS_BLNCE_SAVE_RISE(current_active, mmax_len, scheds_reds); + all_active: + + active = blnc_no_rqs; + + for (qix = 0; qix < blnc_no_rqs; qix++) { + + if (full_scheds_reds > 0) { + /* Calculate availability compared to other schedulers */ + if (!(run_queue_info[qix].flags & ERTS_RUNQ_FLG_OUT_OF_WORK)) { + Sint64 tmp = ((Sint64) run_queue_info[qix].full_reds + * (Sint64) full_scheds); + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + Sint64 avail = run_queue_info[qix].prio[pix].avail; + avail = (avail*tmp)/full_scheds_reds; + ASSERT(avail >= 0); + run_queue_info[qix].prio[pix].avail = (int) avail; + } + } + } + + /* Calculate average max length */ + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + run_queue_info[qix].prio[pix].emigrate_to = -1; + run_queue_info[qix].prio[pix].immigrate_from = -1; + avg.prio[pix].max_len += run_queue_info[qix].prio[pix].max_len; + avg.prio[pix].avail += run_queue_info[qix].prio[pix].avail; + } + + } + + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + int max_len = avg.prio[pix].max_len; + if (max_len != 0) { + int avail = avg.prio[pix].avail; + if (avail != 0) { + max_len = ((100*max_len - 1) / avail) + 1; + avg.prio[pix].max_len = max_len; + ASSERT(max_len >= 0); + } + } + } + + /* Calculate migration limits for all priority queues in all + run queues */ + for (qix = 0; qix < blnc_no_rqs; qix++) { + run_queue_info[qix].flags = 0; /* Reset for later use... */ + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + int limit; + if (avg.prio[pix].max_len == 0 + || run_queue_info[qix].prio[pix].avail == 0) + limit = 0; + else + limit = (((avg.prio[pix].max_len + * run_queue_info[qix].prio[pix].avail) - 1) + / 100 + 1); + run_queue_info[qix].prio[pix].migration_limit = limit; + } + } + + /* Setup migration paths for all priorities */ + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + int low = 0, high = 0; + for (qix = 0; qix < blnc_no_rqs; qix++) { + int len_diff = run_queue_info[qix].prio[pix].max_len; + len_diff -= run_queue_info[qix].prio[pix].migration_limit; +#ifdef DBG_PRINT +if (pix == 2) erts_fprintf(stderr, "%d ", len_diff); +#endif + run_queue_compare[qix].qix = qix; + run_queue_compare[qix].len = len_diff; + if (len_diff != 0) { + if (len_diff < 0) + low++; + else + high++; + } + } +#ifdef DBG_PRINT +if (pix == 2) erts_fprintf(stderr, "\n"); +#endif + if (low && high) { + int from_qix; + int to_qix; + int eof = 0; + int eot = 0; + int tix = 0; + int fix = blnc_no_rqs-1; + qsort(run_queue_compare, + blnc_no_rqs, + sizeof(ErtsRunQueueCompare), + rqc_len_cmp); + + while (1) { + if (run_queue_compare[fix].len <= 0) + eof = 1; + if (run_queue_compare[tix].len >= 0) + eot = 1; + if (eof || eot) + break; + from_qix = run_queue_compare[fix].qix; + to_qix = run_queue_compare[tix].qix; + if (run_queue_info[from_qix].prio[pix].avail == 0) { + ERTS_SET_RUNQ_FLG_EVACUATE(run_queue_info[from_qix].flags, + pix); + ERTS_SET_RUNQ_FLG_EVACUATE(run_queue_info[to_qix].flags, + pix); + } + ERTS_SET_RUNQ_FLG_EMIGRATE(run_queue_info[from_qix].flags, pix); + ERTS_SET_RUNQ_FLG_IMMIGRATE(run_queue_info[to_qix].flags, pix); + run_queue_info[from_qix].prio[pix].emigrate_to = to_qix; + run_queue_info[to_qix].prio[pix].immigrate_from = from_qix; + tix++; + fix--; + +#ifdef DBG_PRINT +if (pix == 2) erts_fprintf(stderr, "%d >--> %d\n", from_qix, to_qix); +#endif + } + + if (!eot && eof) { + if (fix < blnc_no_rqs-1) + fix++; + + if (run_queue_compare[fix].len > 0) { + int fix2 = -1; + while (tix < fix) { + if (run_queue_compare[tix].len >= 0) + break; + if (fix2 < fix) + fix2 = blnc_no_rqs-1; + from_qix = run_queue_compare[fix2].qix; + to_qix = run_queue_compare[tix].qix; + ASSERT(to_qix != from_qix); + if (run_queue_info[from_qix].prio[pix].avail == 0) + ERTS_SET_RUNQ_FLG_EVACUATE(run_queue_info[to_qix].flags, + pix); + ERTS_SET_RUNQ_FLG_IMMIGRATE(run_queue_info[to_qix].flags, pix); + run_queue_info[to_qix].prio[pix].immigrate_from = from_qix; + tix++; + fix2--; +#ifdef DBG_PRINT +if (pix == 2) erts_fprintf(stderr, "%d --> %d\n", from_qix, to_qix); +#endif + } + } + } + else if (!eof && eot) { + if (tix > 0) + tix--; + if (run_queue_compare[tix].len < 0) { + int tix2 = 0; + while (tix < fix) { + if (run_queue_compare[fix].len <= 0) + break; + if (tix2 > tix) + tix2 = 0; + from_qix = run_queue_compare[fix].qix; + to_qix = run_queue_compare[tix2].qix; + ASSERT(to_qix != from_qix); + if (run_queue_info[from_qix].prio[pix].avail == 0) + ERTS_SET_RUNQ_FLG_EVACUATE(run_queue_info[from_qix].flags, + pix); + ERTS_SET_RUNQ_FLG_EMIGRATE(run_queue_info[from_qix].flags, pix); + run_queue_info[from_qix].prio[pix].emigrate_to = to_qix; + fix--; + tix2++; +#ifdef DBG_PRINT +if (pix == 2) erts_fprintf(stderr, "%d >-- %d\n", from_qix, to_qix); +#endif + + } + } + } + } + } + +#ifdef DBG_PRINT +erts_fprintf(stderr, "--------------------------------\n"); +#endif + } + + balance_info.last_active_runqs = active; + erts_smp_atomic_set(&balance_info.active_runqs, active); + + balance_info.halftime = 1; + erts_smp_atomic_set(&balance_info.checking_balance, 0); + + /* Write migration paths and reset balance statistics in all queues */ + for (qix = 0; qix < blnc_no_rqs; qix++) { + int mqix; + Uint32 flags; + ErtsRunQueue *rq = ERTS_RUNQ_IX(qix); + ErtsRunQueueInfo *rqi; + flags = run_queue_info[qix].flags; + erts_smp_runq_lock(rq); + flags |= (rq->flags & ~ERTS_RUNQ_FLGS_MIGRATION_INFO); + ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK)); + if (rq->waiting) + flags |= ERTS_RUNQ_FLG_OUT_OF_WORK; + + rq->full_reds_history_sum + = run_queue_info[qix].full_reds_history_sum; + rq->full_reds_history[freds_hist_ix] + = run_queue_info[qix].full_reds_history_change; + + ERTS_DBG_CHK_FULL_REDS_HISTORY(rq); + + rq->out_of_work_count = 0; + rq->flags = flags; + rq->max_len = rq->len; + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + rqi = (pix == ERTS_PORT_PRIO_LEVEL + ? &rq->ports.info + : &rq->procs.prio_info[pix]); + rqi->max_len = rqi->len; + rqi->reds = 0; + if (!(ERTS_CHK_RUNQ_FLG_EMIGRATE(flags, pix) + | ERTS_CHK_RUNQ_FLG_IMMIGRATE(flags, pix))) { + ASSERT(run_queue_info[qix].prio[pix].immigrate_from < 0); + ASSERT(run_queue_info[qix].prio[pix].emigrate_to < 0); +#ifdef DEBUG + rqi->migrate.limit.this = -1; + rqi->migrate.limit.other = -1; + ERTS_DBG_SET_INVALID_RUNQP(rqi->migrate.runq, 0x2); +#endif + + } + else if (ERTS_CHK_RUNQ_FLG_EMIGRATE(flags, pix)) { + ASSERT(!ERTS_CHK_RUNQ_FLG_IMMIGRATE(flags, pix)); + ASSERT(run_queue_info[qix].prio[pix].immigrate_from < 0); + ASSERT(run_queue_info[qix].prio[pix].emigrate_to >= 0); + + mqix = run_queue_info[qix].prio[pix].emigrate_to; + rqi->migrate.limit.this + = run_queue_info[qix].prio[pix].migration_limit; + rqi->migrate.limit.other + = run_queue_info[mqix].prio[pix].migration_limit; + rqi->migrate.runq = ERTS_RUNQ_IX(mqix); + } + else { + ASSERT(ERTS_CHK_RUNQ_FLG_IMMIGRATE(flags, pix)); + ASSERT(run_queue_info[qix].prio[pix].emigrate_to < 0); + ASSERT(run_queue_info[qix].prio[pix].immigrate_from >= 0); + + mqix = run_queue_info[qix].prio[pix].immigrate_from; + rqi->migrate.limit.this + = run_queue_info[qix].prio[pix].migration_limit; + rqi->migrate.limit.other + = run_queue_info[mqix].prio[pix].migration_limit; + rqi->migrate.runq = ERTS_RUNQ_IX(mqix); + } + } + + rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + erts_smp_runq_unlock(rq); + } + + balance_info.n++; + erts_smp_mtx_unlock(&balance_info.update_mtx); + + erts_smp_runq_lock(c_rq); +} + +#endif /* #ifdef ERTS_SMP */ + +Uint +erts_debug_nbalance(void) +{ +#ifdef ERTS_SMP + Uint n; + erts_smp_mtx_lock(&balance_info.update_mtx); + n = balance_info.n; + erts_smp_mtx_unlock(&balance_info.update_mtx); + return n; +#else + return 0; +#endif +} + +void +erts_early_init_scheduling(void) +{ + early_cpu_bind_init(); +} + +void +erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) +{ + int ix, n; + +#ifndef ERTS_SMP + mrq = 0; +#endif + + init_misc_op_list_alloc(); + + ASSERT(no_schedulers_online <= no_schedulers); + ASSERT(no_schedulers_online >= 1); + ASSERT(no_schedulers >= 1); + + /* Create and initialize run queues */ + + n = (int) (mrq ? no_schedulers : 1); + + erts_aligned_run_queues = erts_alloc(ERTS_ALC_T_RUNQS, + (sizeof(ErtsAlignedRunQueue)*(n+1))); + if ((((Uint) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) == 0) + erts_aligned_run_queues = ((ErtsAlignedRunQueue *) + ((((Uint) erts_aligned_run_queues) + & ~ERTS_CACHE_LINE_MASK) + + ERTS_CACHE_LINE_SIZE)); + +#ifdef ERTS_SMP + erts_smp_atomic_init(&no_empty_run_queues, 0); +#endif + + for (ix = 0; ix < n; ix++) { + int pix, rix; + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + + rq->ix = ix; + erts_smp_atomic_init(&rq->info_flags, ERTS_RUNQ_IFLG_NONEMPTY); + + erts_smp_mtx_init(&rq->mtx, "run_queue"); + erts_smp_cnd_init(&rq->cnd); + + erts_smp_atomic_init(&rq->spin_waiter, 0); + erts_smp_atomic_init(&rq->spin_wake, 0); + + rq->waiting = 0; + rq->woken = 0; + rq->flags = !mrq ? ERTS_RUNQ_FLG_SHARED_RUNQ : 0; + rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + rq->full_reds_history_sum = 0; + for (rix = 0; rix < ERTS_FULL_REDS_HISTORY_SIZE; rix++) { + rq->full_reds_history_sum += ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED; + rq->full_reds_history[rix] = ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED; + } + rq->out_of_work_count = 0; + rq->max_len = 0; + rq->len = 0; + rq->wakeup_other = 0; + rq->wakeup_other_reds = 0; + + rq->procs.len = 0; + rq->procs.pending_exiters = NULL; + rq->procs.context_switches = 0; + rq->procs.reductions = 0; + + for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { + rq->procs.prio_info[pix].len = 0; + rq->procs.prio_info[pix].max_len = 0; + rq->procs.prio_info[pix].reds = 0; + rq->procs.prio_info[pix].migrate.limit.this = 0; + rq->procs.prio_info[pix].migrate.limit.other = 0; + ERTS_DBG_SET_INVALID_RUNQP(rq->procs.prio_info[pix].migrate.runq, + 0x0); + if (pix < ERTS_NO_PROC_PRIO_LEVELS - 1) { + rq->procs.prio[pix].first = NULL; + rq->procs.prio[pix].last = NULL; + } + } + + rq->misc.start = NULL; + rq->misc.end = NULL; + rq->misc.evac_runq = NULL; + + rq->ports.info.len = 0; + rq->ports.info.max_len = 0; + rq->ports.info.reds = 0; + rq->ports.info.migrate.limit.this = 0; + rq->ports.info.migrate.limit.other = 0; + rq->ports.info.migrate.runq = NULL; + rq->ports.start = NULL; + rq->ports.end = NULL; + } + + erts_common_run_queue = !mrq ? ERTS_RUNQ_IX(0) : NULL; + erts_no_run_queues = n; + +#ifdef ERTS_SMP + + if (erts_no_run_queues != 1) { + run_queue_info = erts_alloc(ERTS_ALC_T_RUNQ_BLNS, + (sizeof(ErtsRunQueueBalance) + * erts_no_run_queues)); + run_queue_compare = erts_alloc(ERTS_ALC_T_RUNQ_BLNS, + (sizeof(ErtsRunQueueCompare) + * erts_no_run_queues)); + } + +#endif + + /* Create and initialize scheduler specific data */ + + n = (int) no_schedulers; + erts_aligned_scheduler_data = erts_alloc(ERTS_ALC_T_SCHDLR_DATA, + (sizeof(ErtsAlignedSchedulerData) + *(n+1))); + if ((((Uint) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) == 0) + erts_aligned_scheduler_data = ((ErtsAlignedSchedulerData *) + ((((Uint) erts_aligned_scheduler_data) + & ~ERTS_CACHE_LINE_MASK) + + ERTS_CACHE_LINE_SIZE)); + for (ix = 0; ix < n; ix++) { + ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix); +#ifdef ERTS_SMP + erts_bits_init_state(&esdp->erl_bits_state); + esdp->match_pseudo_process = NULL; + esdp->free_process = NULL; +#endif + esdp->no = (Uint) ix+1; + esdp->current_process = NULL; + esdp->current_port = NULL; + + esdp->virtual_reds = 0; + esdp->cpu_id = -1; + + erts_init_atom_cache_map(&esdp->atom_cache_map); + + if (erts_common_run_queue) { + esdp->run_queue = erts_common_run_queue; + esdp->run_queue->scheduler = NULL; + } + else { + esdp->run_queue = ERTS_RUNQ_IX(ix); + esdp->run_queue->scheduler = esdp; + } + +#ifdef ERTS_SMP +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN + esdp->check_children = 0; + esdp->blocked_check_children = 0; +#endif + erts_smp_atomic_init(&esdp->suspended, 0); + erts_smp_atomic_init(&esdp->chk_cpu_bind, 0); +#endif + } + +#ifdef ERTS_SMP + erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd"); + erts_smp_cnd_init(&schdlr_sspnd.cnd); + + schdlr_sspnd.changing = 0; + schdlr_sspnd.online = no_schedulers_online; + schdlr_sspnd.curr_online = no_schedulers; + erts_smp_atomic_init(&schdlr_sspnd.msb.ongoing, 0); + erts_smp_atomic_init(&schdlr_sspnd.active, no_schedulers); + schdlr_sspnd.msb.procs = NULL; + erts_smp_atomic_set(&balance_info.used_runqs, + erts_common_run_queue ? 1 : no_schedulers_online); + erts_smp_atomic_init(&balance_info.active_runqs, no_schedulers); + balance_info.last_active_runqs = no_schedulers; + erts_smp_mtx_init(&balance_info.update_mtx, "migration_info_update"); + balance_info.forced_check_balance = 0; + balance_info.halftime = 1; + balance_info.full_reds_history_index = 0; + erts_smp_atomic_init(&balance_info.checking_balance, 0); + balance_info.prev_rise.active_runqs = 0; + balance_info.prev_rise.max_len = 0; + balance_info.prev_rise.reds = 0; + balance_info.n = 0; + + if (no_schedulers_online < no_schedulers) { + if (erts_common_run_queue) { + for (ix = no_schedulers_online; ix < no_schedulers; ix++) + erts_smp_atomic_set(&(ERTS_SCHEDULER_IX(ix)->suspended), 1); + } + else { + for (ix = no_schedulers_online; ix < erts_no_run_queues; ix++) + evacuate_run_queue(ERTS_RUNQ_IX(ix), + ERTS_RUNQ_IX(ix % no_schedulers_online)); + } + } + + schdlr_sspnd.wait_curr_online = no_schedulers_online; + schdlr_sspnd.curr_online *= 2; /* Boot strapping... */ + schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE; + + erts_smp_atomic_init(&doing_sys_schedule, 0); + +#else /* !ERTS_SMP */ + { + ErtsSchedulerData *esdp; + esdp = ERTS_SCHEDULER_IX(0); + erts_scheduler_data = esdp; +#ifdef USE_THREADS + erts_tsd_set(sched_data_key, (void *) esdp); +#endif + } + erts_no_schedulers = 1; +#endif + + erts_smp_atomic_init(&function_calls, 0); + + /* init port tasks */ + erts_port_task_init(); + + late_cpu_bind_init(); +} + +ErtsRunQueue * +erts_schedid2runq(Uint id) +{ + int ix; + if (erts_common_run_queue) + return erts_common_run_queue; + ix = (int) id - 1; + ASSERT(0 <= ix && ix < erts_no_run_queues); + return ERTS_RUNQ_IX(ix); +} + +#ifdef USE_THREADS + +ErtsSchedulerData * +erts_get_scheduler_data(void) +{ + return (ErtsSchedulerData *) erts_tsd_get(sched_data_key); +} + +#endif + +static int remove_proc_from_runq(ErtsRunQueue *rq, Process *p, int to_inactive); + +static ERTS_INLINE void +suspend_process(ErtsRunQueue *rq, Process *p) +{ + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + p->rcount++; /* count number of suspend */ +#ifdef ERTS_SMP + ASSERT(!(p->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) + || p == erts_get_current_process()); + ASSERT(p->status != P_RUNNING + || p->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING); + if (p->status_flags & ERTS_PROC_SFLG_PENDADD2SCHEDQ) + goto runable; +#endif + switch(p->status) { + case P_SUSPENDED: + break; + case P_RUNABLE: +#ifdef ERTS_SMP + runable: + if (!ERTS_PROC_PENDING_EXIT(p)) +#endif + remove_proc_from_runq(rq, p, 1); + /* else: + * leave process in schedq so it will discover the pending exit + */ + p->rstatus = P_RUNABLE; /* wakeup as runnable */ + break; + case P_RUNNING: + p->rstatus = P_RUNABLE; /* wakeup as runnable */ + break; + case P_WAITING: + p->rstatus = P_WAITING; /* wakeup as waiting */ + break; + case P_EXITING: + return; /* ignore this */ + case P_GARBING: + case P_FREE: + erl_exit(1, "bad state in suspend_process()\n"); + } + + if ((erts_system_profile_flags.runnable_procs) && (p->rcount == 1) && (p->status != P_WAITING)) { + profile_runnable_proc(p, am_inactive); + } + + p->status = P_SUSPENDED; + +} + +static ERTS_INLINE void +resume_process(Process *p) +{ + Uint32 *statusp; + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + switch (p->status) { + case P_SUSPENDED: + statusp = &p->status; + break; + case P_GARBING: + if (p->gcstatus == P_SUSPENDED) { + statusp = &p->gcstatus; + break; + } + /* Fall through */ + default: + return; + } + + ASSERT(p->rcount > 0); + + if (--p->rcount > 0) /* multiple suspend */ + return; + switch(p->rstatus) { + case P_RUNABLE: + *statusp = P_WAITING; /* make erts_add_to_runq work */ + erts_add_to_runq(p); + break; + case P_WAITING: + *statusp = P_WAITING; + break; + default: + erl_exit(1, "bad state in resume_process()\n"); + } + p->rstatus = P_FREE; +} + +#ifdef ERTS_SMP + +static void +susp_sched_prep_block(void *unused) +{ + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); +} + +static void +susp_sched_resume_block(void *unused) +{ + erts_smp_mtx_lock(&schdlr_sspnd.mtx); +} + +static void +suspend_scheduler(ErtsSchedulerData *esdp) +{ + long no = (long) esdp->no; + ErtsRunQueue *rq = esdp->run_queue; + long active_schedulers; + int curr_online = 1; + int wake = 0; + + /* + * Schedulers may be suspended in two different ways: + * - A scheduler may be suspended since it is not online. + * All schedulers with scheduler ids greater than + * schdlr_sspnd.online are suspended. + * - Multi scheduling is blocked. All schedulers except the + * scheduler with scheduler id 1 are suspended. + * + * Regardless of why a scheduler is suspended, it ends up here. + */ + + ASSERT(no != 1); + + erts_smp_runq_unlock(esdp->run_queue); + + /* Unbind from cpu */ + erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); + if (scheduler2cpu_map[esdp->no].bound_id >= 0 + && erts_unbind_from_cpu(erts_cpuinfo) == 0) { + esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; + } + erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); + + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(esdp->no), am_inactive); + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + + active_schedulers = erts_smp_atomic_dectest(&schdlr_sspnd.active); + ASSERT(active_schedulers >= 1); + if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_MULTI_SCHED) { + if (active_schedulers == schdlr_sspnd.msb.wait_active) + wake = 1; + if (active_schedulers == 1) + schdlr_sspnd.changing = 0; + } + + while (1) { + +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN + int check_children; + erts_smp_runq_lock(esdp->run_queue); + check_children = esdp->check_children; + esdp->check_children = 0; + erts_smp_runq_unlock(esdp->run_queue); + if (check_children) { + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_check_children(); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + } +#endif + + if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE) { + int changed = 0; + if (no > schdlr_sspnd.online && curr_online) { + schdlr_sspnd.curr_online--; + curr_online = 0; + changed = 1; + } + else if (no <= schdlr_sspnd.online && !curr_online) { + schdlr_sspnd.curr_online++; + curr_online = 1; + changed = 1; + } + if (changed + && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) + wake = 1; + if (schdlr_sspnd.online == schdlr_sspnd.curr_online) + schdlr_sspnd.changing = 0; + } + + if (wake) { + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + wake = 0; + } + + + if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ|ERTS_RUNQ_FLG_SUSPENDED))) + break; + if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) + && !erts_smp_atomic_read(&esdp->suspended)) + break; + + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + while (1) { + +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN + if (esdp->blocked_check_children) + break; +#endif + + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + + if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE) + break; + + if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ + | ERTS_RUNQ_FLG_SUSPENDED))) + break; + if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) + && !erts_smp_atomic_read(&esdp->suspended)) + break; + } + +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN + esdp->blocked_check_children = 0; +#endif + + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + } + + erts_smp_atomic_inc(&schdlr_sspnd.active); + + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(esdp->no), am_active); + + erts_smp_runq_lock(esdp->run_queue); + non_empty_runq(esdp->run_queue); + + /* Make sure we check if we should bind to a cpu or not... */ + if (rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) + erts_smp_atomic_set(&esdp->chk_cpu_bind, 1); + else + rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; +} + +#define ERTS_RUNQ_RESET_SUSPEND_INFO(RQ, DBG_ID) \ +do { \ + int pix__; \ + (RQ)->misc.evac_runq = NULL; \ + (RQ)->ports.info.migrate.runq = NULL; \ + (RQ)->flags &= ~(ERTS_RUNQ_FLGS_IMMIGRATE_QMASK \ + | ERTS_RUNQ_FLGS_EMIGRATE_QMASK \ + | ERTS_RUNQ_FLGS_EVACUATE_QMASK \ + | ERTS_RUNQ_FLG_SUSPENDED); \ + (RQ)->flags |= (ERTS_RUNQ_FLG_OUT_OF_WORK \ + | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK); \ + (RQ)->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; \ + erts_smp_atomic_band(&(RQ)->info_flags, ~ERTS_RUNQ_IFLG_SUSPENDED); \ + for (pix__ = 0; pix__ < ERTS_NO_PROC_PRIO_LEVELS; pix__++) { \ + (RQ)->procs.prio_info[pix__].max_len = 0; \ + (RQ)->procs.prio_info[pix__].reds = 0; \ + ERTS_DBG_SET_INVALID_RUNQP((RQ)->procs.prio_info[pix__].migrate.runq,\ + (DBG_ID)); \ + } \ + (RQ)->ports.info.max_len = 0; \ + (RQ)->ports.info.reds = 0; \ +} while (0) + +#define ERTS_RUNQ_RESET_MIGRATION_PATHS__(RQ) \ +do { \ + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked((RQ))); \ + (RQ)->misc.evac_runq = NULL; \ + (RQ)->ports.info.migrate.runq = NULL; \ + (RQ)->flags &= ~(ERTS_RUNQ_FLGS_IMMIGRATE_QMASK \ + | ERTS_RUNQ_FLGS_EMIGRATE_QMASK \ + | ERTS_RUNQ_FLGS_EVACUATE_QMASK); \ +} while (0) + +#ifdef DEBUG +#define ERTS_RUNQ_RESET_MIGRATION_PATHS(RQ, DBG_ID) \ +do { \ + int pix__; \ + ERTS_RUNQ_RESET_MIGRATION_PATHS__((RQ)); \ + for (pix__ = 0; pix__ < ERTS_NO_PROC_PRIO_LEVELS; pix__++) \ + ERTS_DBG_SET_INVALID_RUNQP((RQ)->procs.prio_info[pix__].migrate.runq,\ + (DBG_ID)); \ +} while (0) +#else +#define ERTS_RUNQ_RESET_MIGRATION_PATHS(RQ, DBG_ID) \ + ERTS_RUNQ_RESET_MIGRATION_PATHS__((RQ)) +#endif + +ErtsSchedSuspendResult +erts_schedulers_state(Uint *total, + Uint *online, + Uint *active, + int yield_allowed) +{ + int res; + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + if (yield_allowed && schdlr_sspnd.changing) + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; + else { + *active = *online = schdlr_sspnd.online; + if (ongoing_multi_scheduling_block()) + *active = 1; + res = ERTS_SCHDLR_SSPND_DONE; + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + *total = erts_no_schedulers; + return res; +} + +ErtsSchedSuspendResult +erts_set_schedulers_online(Process *p, + ErtsProcLocks plocks, + Sint new_no, + Sint *old_no) +{ + int ix, res, no, have_unlocked_plocks; + + if (new_no < 1 || erts_no_schedulers < new_no) + return ERTS_SCHDLR_SSPND_EINVAL; + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + + have_unlocked_plocks = 0; + no = (int) new_no; + + if (schdlr_sspnd.changing) { + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; + } + else { + int online = *old_no = schdlr_sspnd.online; + if (no == schdlr_sspnd.online) { + res = ERTS_SCHDLR_SSPND_DONE; + } + else { + schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE; + schdlr_sspnd.online = no; + if (no > online) { + int ix; + schdlr_sspnd.wait_curr_online = no; + if (ongoing_multi_scheduling_block()) + /* No schedulers to resume */; + else if (erts_common_run_queue) { + for (ix = online; ix < no; ix++) + erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, + 0); + } + else { + if (plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_smp_mtx_lock(&balance_info.update_mtx); + for (ix = online; ix < no; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + erts_smp_runq_lock(rq); + ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x5); + erts_smp_runq_unlock(rq); + } + /* + * Spread evacuation paths among all online + * run queues. + */ + for (ix = no; ix < erts_no_run_queues; ix++) { + ErtsRunQueue *from_rq = ERTS_RUNQ_IX(ix); + ErtsRunQueue *to_rq = ERTS_RUNQ_IX(ix % no); + evacuate_run_queue(from_rq, to_rq); + } + erts_smp_atomic_set(&balance_info.used_runqs, no); + erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + } + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + res = ERTS_SCHDLR_SSPND_DONE; + } + else /* if (no < online) */ { + if (p->scheduler_data->no <= no) { + res = ERTS_SCHDLR_SSPND_DONE; + schdlr_sspnd.wait_curr_online = no; + } + else { + /* + * Yield! Current process needs to migrate + * before bif returns. + */ + res = ERTS_SCHDLR_SSPND_YIELD_DONE; + schdlr_sspnd.wait_curr_online = no+1; + } + + if (ongoing_multi_scheduling_block()) + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + else if (erts_common_run_queue) { + for (ix = no; ix < online; ix++) + erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, + 1); + wake_all_schedulers(); + } + else { + if (plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_smp_mtx_lock(&balance_info.update_mtx); + + for (ix = 0; ix < online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + erts_smp_runq_lock(rq); + ERTS_RUNQ_RESET_MIGRATION_PATHS(rq, 0x6); + erts_smp_runq_unlock(rq); + } + /* + * Evacutation order important! Newly suspended run queues + * has to be evacuated last. + */ + for (ix = erts_no_run_queues-1; ix >= no; ix--) + evacuate_run_queue(ERTS_RUNQ_IX(ix), + ERTS_RUNQ_IX(ix % no)); + erts_smp_atomic_set(&balance_info.used_runqs, no); + erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + ERTS_FOREACH_OP_RUNQ(rq, wake_scheduler(rq, 0)); + } + } + + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + } + } + + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (have_unlocked_plocks) + erts_smp_proc_lock(p, plocks); + + return res; +} + +ErtsSchedSuspendResult +erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) +{ + int ix, res, have_unlocked_plocks = 0; + ErtsProcList *plp; + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + if (on) { + if (schdlr_sspnd.changing) { + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; /* Yield */ + } + else if (erts_is_multi_scheduling_blocked()) { + plp = proclist_create(p); + plp->next = schdlr_sspnd.msb.procs; + schdlr_sspnd.msb.procs = plp; + p->flags |= F_HAVE_BLCKD_MSCHED; + ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1); + ASSERT(p->scheduler_data->no == 1); + res = 1; + } + else { + p->flags |= F_HAVE_BLCKD_MSCHED; + if (plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); + } + erts_smp_atomic_set(&schdlr_sspnd.msb.ongoing, 1); + if (schdlr_sspnd.online == 1) { + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1); + ASSERT(p->scheduler_data->no == 1); + } + else { + schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED; + if (p->scheduler_data->no == 1) { + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + schdlr_sspnd.msb.wait_active = 1; + } + else { + /* + * Yield! Current process needs to migrate + * before bif returns. + */ + res = ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED; + schdlr_sspnd.msb.wait_active = 2; + } + if (erts_common_run_queue) { + for (ix = 1; ix < schdlr_sspnd.online; ix++) + erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 1); + wake_all_schedulers(); + } + else { + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_smp_mtx_lock(&balance_info.update_mtx); + erts_smp_atomic_set(&balance_info.used_runqs, 1); + for (ix = 0; ix < schdlr_sspnd.online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + erts_smp_runq_lock(rq); + ERTS_RUNQ_RESET_MIGRATION_PATHS(rq, 0x7); + erts_smp_runq_unlock(rq); + } + /* + * Evacuate all activities in all other run queues + * into the first run queue. Note order is important, + * online run queues has to be evacuated last. + */ + for (ix = erts_no_run_queues-1; ix >= 1; ix--) + evacuate_run_queue(ERTS_RUNQ_IX(ix), ERTS_RUNQ_IX(0)); + erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + } + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + while (erts_smp_atomic_read(&schdlr_sspnd.active) + != schdlr_sspnd.msb.wait_active) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + } + plp = proclist_create(p); + plp->next = schdlr_sspnd.msb.procs; + schdlr_sspnd.msb.procs = plp; +#ifdef DEBUG + ERTS_FOREACH_RUNQ(srq, + { + if (srq != ERTS_RUNQ_IX(0)) { + ASSERT(ERTS_EMPTY_RUNQ(srq)); + ASSERT(srq->flags & ERTS_RUNQ_FLG_SUSPENDED); + } + }); +#endif + ASSERT(p->scheduler_data); + } + } + else if (!ongoing_multi_scheduling_block()) { + ASSERT(!schdlr_sspnd.msb.procs); + res = ERTS_SCHDLR_SSPND_DONE; + } + else { + if (p->flags & F_HAVE_BLCKD_MSCHED) { + ErtsProcList **plpp = &schdlr_sspnd.msb.procs; + plp = schdlr_sspnd.msb.procs; + + while (plp) { + if (!proclist_same(plp, p)){ + plpp = &plp->next; + plp = plp->next; + } + else { + *plpp = plp->next; + proclist_destroy(plp); + if (!all) + break; + plp = *plpp; + } + } + } + if (schdlr_sspnd.msb.procs) + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + else { + schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED; +#ifdef DEBUG + ERTS_FOREACH_RUNQ(rq, + { + if (rq != p->scheduler_data->run_queue) { + if (!ERTS_EMPTY_RUNQ(rq)) { + Process *rp; + int pix; + ASSERT(rq->ports.info.len == 0); + for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { + for (rp = rq->procs.prio[pix].first; + rp; + rp = rp->next) { + ASSERT(rp->bound_runq); + } + } + } + + ASSERT(rq->flags & ERTS_RUNQ_FLG_SUSPENDED); + } + }); +#endif + p->flags &= ~F_HAVE_BLCKD_MSCHED; + erts_smp_atomic_set(&schdlr_sspnd.msb.ongoing, 0); + if (schdlr_sspnd.online == 1) + /* No schedulers to resume */; + else if (erts_common_run_queue) { + for (ix = 1; ix < schdlr_sspnd.online; ix++) + erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 0); + wake_all_schedulers(); + } + else { + int online = schdlr_sspnd.online; + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); + } + erts_smp_mtx_lock(&balance_info.update_mtx); + + /* Resume all online run queues */ + for (ix = 1; ix < online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + erts_smp_runq_lock(rq); + ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x4); + erts_smp_runq_unlock(rq); + } + + /* Spread evacuation paths among all online run queues */ + for (ix = online; ix < erts_no_run_queues; ix++) + evacuate_run_queue(ERTS_RUNQ_IX(ix), + ERTS_RUNQ_IX(ix % online)); + + erts_smp_atomic_set(&balance_info.used_runqs, online); + /* Make sure that we balance soon... */ + balance_info.forced_check_balance = 1; + erts_smp_runq_lock(ERTS_RUNQ_IX(0)); + ERTS_RUNQ_IX(0)->check_balance_reds = 0; + erts_smp_runq_unlock(ERTS_RUNQ_IX(0)); + erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + } + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + schdlr_sspnd.changing = 0; + res = ERTS_SCHDLR_SSPND_DONE; + } + } + + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (have_unlocked_plocks) + erts_smp_proc_lock(p, plocks); + return res; +} + +#ifdef DEBUG +void +erts_dbg_multi_scheduling_return_trap(Process *p, Eterm return_value) +{ + if (return_value == am_blocked) { + long active = erts_smp_atomic_read(&schdlr_sspnd.active); + ASSERT(1 <= active && active <= 2); + ASSERT(ERTS_PROC_GET_SCHDATA(p)->no == 1); + } +} +#endif + +int +erts_is_multi_scheduling_blocked(void) +{ + return (erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing) + && erts_smp_atomic_read(&schdlr_sspnd.active) == 1); +} + +Eterm +erts_multi_scheduling_blockers(Process *p) +{ + Eterm res = NIL; + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + if (erts_is_multi_scheduling_blocked()) { + Eterm *hp, *hp_end; + ErtsProcList *plp1, *plp2; + Uint max_size; + ASSERT(schdlr_sspnd.msb.procs); + for (max_size = 0, plp1 = schdlr_sspnd.msb.procs; + plp1; + plp1 = plp1->next) { + max_size += 2; + } + ASSERT(max_size); + hp = HAlloc(p, max_size); + hp_end = hp + max_size; + for (plp1 = schdlr_sspnd.msb.procs; plp1; plp1 = plp1->next) { + for (plp2 = schdlr_sspnd.msb.procs; + plp2->pid != plp1->pid; + plp2 = plp2->next); + if (plp2 == plp1) { + res = CONS(hp, plp1->pid, res); + hp += 2; + } + /* else: already in result list */ + } + HRelease(p, hp_end, hp); + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + return res; +} + +static void * +sched_thread_func(void *vesdp) +{ +#ifdef ERTS_ENABLE_LOCK_CHECK + { + char buf[31]; + Uint no = ((ErtsSchedulerData *) vesdp)->no; + erts_snprintf(&buf[0], 31, "scheduler %bpu", no); + erts_lc_set_thread_name(&buf[0]); + } +#endif + erts_alloc_reg_scheduler_id(((ErtsSchedulerData *) vesdp)->no); + erts_tsd_set(sched_data_key, vesdp); +#ifdef ERTS_SMP + erts_proc_lock_prepare_proc_lock_waiter(); +#endif + erts_register_blockable_thread(); +#ifdef HIPE + hipe_thread_signal_init(); +#endif + erts_thread_init_float(); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + + ASSERT(schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE); + + schdlr_sspnd.curr_online--; + + if (((ErtsSchedulerData *) vesdp)->no != 1) { + if (schdlr_sspnd.online == schdlr_sspnd.curr_online) { + schdlr_sspnd.changing = 0; + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + } + } + else if (schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) + schdlr_sspnd.changing = 0; + else { + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + ASSERT(!schdlr_sspnd.changing); + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + + process_main(); + /* No schedulers should *ever* terminate */ + erl_exit(ERTS_ABORT_EXIT, "Scheduler thread number %bpu terminated\n", + ((ErtsSchedulerData *) vesdp)->no); + return NULL; +} + +void +erts_start_schedulers(void) +{ + int res = 0; + Uint actual = 0; + Uint wanted = erts_no_schedulers; + Uint wanted_no_schedulers = erts_no_schedulers; + ethr_thr_opts opts = ETHR_THR_OPTS_DEFAULT_INITER; + + opts.detached = 1; + opts.suggested_stack_size = erts_sched_thread_suggested_stack_size; + + if (wanted < 1) + wanted = 1; + if (wanted > ERTS_MAX_NO_OF_SCHEDULERS) { + wanted = ERTS_MAX_NO_OF_SCHEDULERS; + res = ENOTSUP; + } + + erts_block_system(0); + + while (actual < wanted) { + ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(actual); + actual++; + ASSERT(actual == esdp->no); +#ifdef ERTS_ENABLE_LOCK_COUNT + res = erts_lcnt_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts); +#else + res = ethr_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts); +#endif + if (res != 0) { + actual--; + break; + } + } + + erts_no_schedulers = actual; + erts_release_system(); + + if (actual < 1) + erl_exit(1, + "Failed to create any scheduler-threads: %s (%d)\n", + erl_errno_id(res), + res); + if (res != 0) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + ASSERT(actual != wanted_no_schedulers); + erts_dsprintf(dsbufp, + "Failed to create %bpu scheduler-threads (%s:%d); " + "only %bpu scheduler-thread%s created.\n", + wanted_no_schedulers, erl_errno_id(res), res, + actual, actual == 1 ? " was" : "s were"); + erts_send_error_to_logger_nogl(dsbufp); + } +} + +#endif /* ERTS_SMP */ + +static int +int_cmp(const void *vx, const void *vy) +{ + return *((int *) vx) - *((int *) vy); +} + +static int +cpu_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->core != y->core) + return x->core - y->core; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->node != y->node) + return x->node - y->node; + return 0; +} + +static int +cpu_processor_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + if (x->node != y->node) + return x->node - y->node; + if (x->processor != y->processor) + return x->processor - y->processor; + return 0; +} + +static int +cpu_thread_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->node != y->node) + return x->node - y->node; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + return 0; +} + +static int +cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->node != y->node) + return x->node - y->node; + if (x->core != y->core) + return x->core - y->core; + if (x->processor != y->processor) + return x->processor - y->processor; + return 0; +} + +static int +cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->node != y->node) + return x->node - y->node; + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->core != y->core) + return x->core - y->core; + if (x->processor != y->processor) + return x->processor - y->processor; + return 0; +} + +static int +cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->node != y->node) + return x->node - y->node; + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->core != y->core) + return x->core - y->core; + return 0; +} + +static int +cpu_no_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->node != y->node) + return x->node - y->node; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + if (x->thread != y->thread) + return x->thread - y->thread; + return 0; +} + +static ERTS_INLINE void +make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node) +{ + int ix; + int node = -1; + int processor = -1; + int processor_node = -1; + int processor_node_node = -1; + int core = -1; + int thread = -1; + int old_node = -1; + int old_processor = -1; + int old_processor_node = -1; + int old_core = -1; + int old_thread = -1; + + for (ix = 0; ix < size; ix++) { + if (!no_node || cpudata[ix].node >= 0) { + if (old_node == cpudata[ix].node) + cpudata[ix].node = node; + else { + old_node = cpudata[ix].node; + old_processor = processor = -1; + if (!no_node) + old_processor_node = processor_node = -1; + old_core = core = -1; + old_thread = thread = -1; + if (no_node || cpudata[ix].node >= 0) + cpudata[ix].node = ++node; + } + } + if (old_processor == cpudata[ix].processor) + cpudata[ix].processor = processor; + else { + old_processor = cpudata[ix].processor; + if (!no_node) + processor_node_node = old_processor_node = processor_node = -1; + old_core = core = -1; + old_thread = thread = -1; + cpudata[ix].processor = ++processor; + } + if (no_node && cpudata[ix].processor_node < 0) + old_processor_node = -1; + else { + if (old_processor_node == cpudata[ix].processor_node) { + if (no_node) + cpudata[ix].node = cpudata[ix].processor_node = node; + else { + if (processor_node_node >= 0) + cpudata[ix].node = processor_node_node; + cpudata[ix].processor_node = processor_node; + } + } + else { + old_processor_node = cpudata[ix].processor_node; + old_core = core = -1; + old_thread = thread = -1; + if (no_node) + cpudata[ix].node = cpudata[ix].processor_node = ++node; + else { + cpudata[ix].node = processor_node_node = ++node; + cpudata[ix].processor_node = ++processor_node; + } + } + } + if (!no_node && cpudata[ix].processor_node < 0) + cpudata[ix].processor_node = 0; + if (old_core == cpudata[ix].core) + cpudata[ix].core = core; + else { + old_core = cpudata[ix].core; + old_thread = thread = -1; + cpudata[ix].core = ++core; + } + if (old_thread == cpudata[ix].thread) + cpudata[ix].thread = thread; + else + old_thread = cpudata[ix].thread = ++thread; + } +} + +static void +cpu_bind_order_sort(erts_cpu_topology_t *cpudata, + int size, + ErtsCpuBindOrder bind_order, + int mk_seq) +{ + if (size > 1) { + int no_node = 0; + int (*cmp_func)(const void *, const void *); + switch (bind_order) { + case ERTS_CPU_BIND_SPREAD: + cmp_func = cpu_spread_order_cmp; + break; + case ERTS_CPU_BIND_PROCESSOR_SPREAD: + cmp_func = cpu_processor_spread_order_cmp; + break; + case ERTS_CPU_BIND_THREAD_SPREAD: + cmp_func = cpu_thread_spread_order_cmp; + break; + case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: + no_node = 1; + cmp_func = cpu_thread_no_node_processor_spread_order_cmp; + break; + case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: + no_node = 1; + cmp_func = cpu_no_node_processor_spread_order_cmp; + break; + case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: + no_node = 1; + cmp_func = cpu_no_node_thread_spread_order_cmp; + break; + case ERTS_CPU_BIND_NO_SPREAD: + cmp_func = cpu_no_spread_order_cmp; + break; + default: + cmp_func = NULL; + erl_exit(ERTS_ABORT_EXIT, + "Bad cpu bind type: %d\n", + (int) cpu_bind_order); + break; + } + + if (mk_seq) + make_cpudata_id_seq(cpudata, size, no_node); + + qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func); + } +} + +static int +processor_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->node != y->node) + return x->node - y->node; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + if (x->thread != y->thread) + return x->thread - y->thread; + return 0; +} + +static void +check_cpu_bind(ErtsSchedulerData *esdp) +{ + int res; + int cpu_id; + erts_smp_runq_unlock(esdp->run_queue); + erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); + cpu_id = scheduler2cpu_map[esdp->no].bind_id; + if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) { + res = erts_bind_to_cpu(erts_cpuinfo, cpu_id); + if (res == 0) + esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id; + else { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n", + (int) esdp->no, cpu_id, erl_errno_id(-res)); + erts_send_error_to_logger_nogl(dsbufp); + if (scheduler2cpu_map[esdp->no].bound_id >= 0) + goto unbind; + } + } + else if (cpu_id < 0 && scheduler2cpu_map[esdp->no].bound_id >= 0) { + unbind: + /* Get rid of old binding */ + res = erts_unbind_from_cpu(erts_cpuinfo); + if (res == 0) + esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; + else { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n", + (int) esdp->no, cpu_id, erl_errno_id(-res)); + erts_send_error_to_logger_nogl(dsbufp); + } + } + erts_smp_runq_lock(esdp->run_queue); +#ifdef ERTS_SMP + if (erts_common_run_queue) + erts_smp_atomic_set(&esdp->chk_cpu_bind, 0); + else { + esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND; + } +#endif + erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); + +} + +static void +signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size) +{ + int s_ix = 1; + int cpu_ix; + + if (cpu_bind_order != ERTS_CPU_BIND_NONE) { + + cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1); + + for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++) + if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical)) + scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical; + } + + if (s_ix <= erts_no_schedulers) + for (; s_ix <= erts_no_schedulers; s_ix++) + scheduler2cpu_map[s_ix].bind_id = -1; + +#ifdef ERTS_SMP + if (erts_common_run_queue) { + for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++) + erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1); + wake_all_schedulers(); + } + else { + ERTS_FOREACH_RUNQ(rq, + { + rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; + wake_scheduler(rq, 0); + }); + } +#else + check_cpu_bind(erts_get_scheduler_data()); +#endif +} + +int +erts_init_scheduler_bind_type(char *how) +{ + if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) + return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED; + + if (!system_cpudata && !user_cpudata) + return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY; + + if (sys_strcmp(how, "s") == 0) + cpu_bind_order = ERTS_CPU_BIND_SPREAD; + else if (sys_strcmp(how, "ps") == 0) + cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; + else if (sys_strcmp(how, "ts") == 0) + cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; + else if (sys_strcmp(how, "db") == 0 + || sys_strcmp(how, "tnnps") == 0) + cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; + else if (sys_strcmp(how, "nnps") == 0) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; + else if (sys_strcmp(how, "nnts") == 0) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; + else if (sys_strcmp(how, "ns") == 0) + cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; + else if (sys_strcmp(how, "u") == 0) + cpu_bind_order = ERTS_CPU_BIND_NONE; + else + return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE; + + return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS; +} + +typedef struct { + int *id; + int used; + int size; +} ErtsCpuTopIdSeq; + +typedef struct { + ErtsCpuTopIdSeq logical; + ErtsCpuTopIdSeq thread; + ErtsCpuTopIdSeq core; + ErtsCpuTopIdSeq processor_node; + ErtsCpuTopIdSeq processor; + ErtsCpuTopIdSeq node; +} ErtsCpuTopEntry; + +static void +init_cpu_top_entry(ErtsCpuTopEntry *cte) +{ + int size = 10; + cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->logical.size = size; + cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->thread.size = size; + cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->core.size = size; + cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->processor_node.size = size; + cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->processor.size = size; + cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->node.size = size; +} + +static void +destroy_cpu_top_entry(ErtsCpuTopEntry *cte) +{ + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id); +} + +static int +get_cput_value_or_range(int *v, int *vr, char **str) +{ + long l; + char *c = *str; + errno = 0; + if (!isdigit((unsigned char)*c)) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; + l = strtol(c, &c, 10); + if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; + *v = (int) l; + if (*c == '-') { + c++; + if (!isdigit((unsigned char)*c)) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + l = strtol(c, &c, 10); + if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + *vr = (int) l; + } + *str = c; + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +static int +get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str) +{ + int ix = 0; + int need_size = 0; + char *c = *str; + + while (1) { + int res; + int val; + int nids; + int val_range = -1; + res = get_cput_value_or_range(&val, &val_range, &c); + if (res != ERTS_INIT_CPU_TOPOLOGY_OK) + return res; + if (val_range < 0 || val_range == val) + nids = 1; + else { + if (val_range > val) + nids = val_range - val + 1; + else + nids = val - val_range + 1; + } + need_size += nids; + if (need_size > idseq->size) { + idseq->size = need_size + 10; + idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS, + idseq->id, + sizeof(int)*idseq->size); + } + if (nids == 1) + idseq->id[ix++] = val; + else if (val_range > val) { + for (; val <= val_range; val++) + idseq->id[ix++] = val; + } + else { + for (; val >= val_range; val--) + idseq->id[ix++] = val; + } + if (*c != ',') + break; + c++; + } + *str = c; + idseq->used = ix; + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +static int +get_cput_entry(ErtsCpuTopEntry *cput, char **str) +{ + int h; + char *c = *str; + + cput->logical.used = 0; + cput->thread.id[0] = 0; + cput->thread.used = 1; + cput->core.id[0] = 0; + cput->core.used = 1; + cput->processor_node.id[0] = -1; + cput->processor_node.used = 1; + cput->processor.id[0] = 0; + cput->processor.used = 1; + cput->node.id[0] = -1; + cput->node.used = 1; + + h = ERTS_TOPOLOGY_MAX_DEPTH; + while (*c != ':' && *c != '\0') { + int res; + ErtsCpuTopIdSeq *idseqp; + switch (*c++) { + case 'L': + if (h <= ERTS_TOPOLOGY_LOGICAL) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->logical; + h = ERTS_TOPOLOGY_LOGICAL; + break; + case 't': + case 'T': + if (h <= ERTS_TOPOLOGY_THREAD) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->thread; + h = ERTS_TOPOLOGY_THREAD; + break; + case 'c': + case 'C': + if (h <= ERTS_TOPOLOGY_CORE) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->core; + h = ERTS_TOPOLOGY_CORE; + break; + case 'p': + case 'P': + if (h <= ERTS_TOPOLOGY_PROCESSOR) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->processor; + h = ERTS_TOPOLOGY_PROCESSOR; + break; + case 'n': + case 'N': + if (h <= ERTS_TOPOLOGY_PROCESSOR) { + do_node: + if (h <= ERTS_TOPOLOGY_NODE) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->node; + h = ERTS_TOPOLOGY_NODE; + } + else { + int p_node = 0; + char *p_chk = c; + while (*p_chk != '\0' && *p_chk != ':') { + if (*p_chk == 'p' || *p_chk == 'P') { + p_node = 1; + break; + } + p_chk++; + } + if (!p_node) + goto do_node; + if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->processor_node; + h = ERTS_TOPOLOGY_PROCESSOR_NODE; + } + break; + default: + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE; + } + res = get_cput_id_seq(idseqp, &c); + if (res != ERTS_INIT_CPU_TOPOLOGY_OK) + return res; + } + + if (cput->logical.used < 1) + return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID; + + if (*c == ':') { + c++; + } + + if (cput->thread.used != 1 + && cput->thread.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->core.used != 1 + && cput->core.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->processor_node.used != 1 + && cput->processor_node.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->processor.used != 1 + && cput->processor.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->node.used != 1 + && cput->node.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + + *str = c; + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +static int +verify_topology(erts_cpu_topology_t *cpudata, int size) +{ + if (size > 0) { + int *logical; + int node, processor, no_nodes, i; + + /* Verify logical ids */ + logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size); + + for (i = 0; i < user_cpudata_size; i++) + logical[i] = user_cpudata[i].logical; + + qsort(logical, user_cpudata_size, sizeof(int), int_cmp); + for (i = 0; i < user_cpudata_size-1; i++) { + if (logical[i] == logical[i+1]) { + erts_free(ERTS_ALC_T_TMP, logical); + return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS; + } + } + + erts_free(ERTS_ALC_T_TMP, logical); + + qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp); + + /* Verify unique entities */ + + for (i = 1; i < user_cpudata_size; i++) { + if (user_cpudata[i-1].processor == user_cpudata[i].processor + && user_cpudata[i-1].node == user_cpudata[i].node + && (user_cpudata[i-1].processor_node + == user_cpudata[i].processor_node) + && user_cpudata[i-1].core == user_cpudata[i].core + && user_cpudata[i-1].thread == user_cpudata[i].thread) { + return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES; + } + } + + /* Verify numa nodes */ + node = cpudata[0].node; + processor = cpudata[0].processor; + no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0; + for (i = 1; i < size; i++) { + if (no_nodes) { + if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + } + else { + if (cpudata[i].processor == processor && cpudata[i].node != node) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + node = cpudata[i].node; + processor = cpudata[i].processor; + if (node >= 0 && cpudata[i].processor_node >= 0) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + if (node < 0 && cpudata[i].processor_node < 0) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + } + } + } + + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +int +erts_init_cpu_topology(char *topology_str) +{ + ErtsCpuTopEntry cput; + int need_size; + char *c; + int ix; + int error = ERTS_INIT_CPU_TOPOLOGY_OK; + + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata_size = 10; + + user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, + (sizeof(erts_cpu_topology_t) + * user_cpudata_size)); + + init_cpu_top_entry(&cput); + + ix = 0; + need_size = 0; + + c = topology_str; + if (*c == '\0') { + error = ERTS_INIT_CPU_TOPOLOGY_MISSING; + goto fail; + } + do { + int r; + error = get_cput_entry(&cput, &c); + if (error != ERTS_INIT_CPU_TOPOLOGY_OK) + goto fail; + need_size += cput.logical.used; + if (user_cpudata_size < need_size) { + user_cpudata_size = need_size + 10; + user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, + user_cpudata, + (sizeof(erts_cpu_topology_t) + * user_cpudata_size)); + } + + ASSERT(cput.thread.used == 1 + || cput.thread.used == cput.logical.used); + ASSERT(cput.core.used == 1 + || cput.core.used == cput.logical.used); + ASSERT(cput.processor_node.used == 1 + || cput.processor_node.used == cput.logical.used); + ASSERT(cput.processor.used == 1 + || cput.processor.used == cput.logical.used); + ASSERT(cput.node.used == 1 + || cput.node.used == cput.logical.used); + + for (r = 0; r < cput.logical.used; r++) { + user_cpudata[ix].logical = cput.logical.id[r]; + user_cpudata[ix].thread = + cput.thread.id[cput.thread.used == 1 ? 0 : r]; + user_cpudata[ix].core = + cput.core.id[cput.core.used == 1 ? 0 : r]; + user_cpudata[ix].processor_node = + cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r]; + user_cpudata[ix].processor = + cput.processor.id[cput.processor.used == 1 ? 0 : r]; + user_cpudata[ix].node = + cput.node.id[cput.node.used == 1 ? 0 : r]; + ix++; + } + } while (*c != '\0'); + + if (user_cpudata_size != ix) { + user_cpudata_size = ix; + user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, + user_cpudata, + (sizeof(erts_cpu_topology_t) + * user_cpudata_size)); + } + + error = verify_topology(user_cpudata, user_cpudata_size); + if (error == ERTS_INIT_CPU_TOPOLOGY_OK) { + destroy_cpu_top_entry(&cput); + return ERTS_INIT_CPU_TOPOLOGY_OK; + } + + fail: + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata_size = 0; + destroy_cpu_top_entry(&cput); + return error; +} + +#define ERTS_GET_CPU_TOPOLOGY_ERROR -1 +#define ERTS_GET_USED_CPU_TOPOLOGY 0 +#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1 +#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2 + +static Eterm get_cpu_topology_term(Process *c_p, int type); + +Eterm +erts_set_cpu_topology(Process *c_p, Eterm term) +{ + erts_cpu_topology_t *cpudata = NULL; + int cpudata_size = 0; + Eterm res; + + erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); + res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY); + if (term == am_undefined) { + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata = NULL; + user_cpudata_size = 0; + + if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) { + cpudata_size = system_cpudata_size; + cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * cpudata_size)); + + sys_memcpy((void *) cpudata, + (void *) system_cpudata, + sizeof(erts_cpu_topology_t)*cpudata_size); + } + } + else if (is_not_list(term)) { + error: + res = THE_NON_VALUE; + goto done; + } + else { + Eterm list = term; + int ix = 0; + + cpudata_size = 100; + cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * cpudata_size)); + + while (is_list(list)) { + Eterm *lp = list_val(list); + Eterm cpu = CAR(lp); + Eterm* tp; + Sint id; + + if (is_not_tuple(cpu)) + goto error; + + tp = tuple_val(cpu); + + if (arityval(tp[0]) != 7 || tp[1] != am_cpu) + goto error; + + if (ix >= cpudata_size) { + cpudata_size += 100; + cpudata = erts_realloc(ERTS_ALC_T_TMP, + cpudata, + (sizeof(erts_cpu_topology_t) + * cpudata_size)); + } + + id = signed_val(tp[2]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].node = (int) id; + + id = signed_val(tp[3]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].processor = (int) id; + + id = signed_val(tp[4]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].processor_node = (int) id; + + id = signed_val(tp[5]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].core = (int) id; + + id = signed_val(tp[6]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].thread = (int) id; + + id = signed_val(tp[7]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].logical = (int) id; + + list = CDR(lp); + ix++; + } + + if (is_not_nil(list)) + goto error; + + cpudata_size = ix; + + if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size)) + goto error; + + if (user_cpudata_size != cpudata_size) { + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, + sizeof(erts_cpu_topology_t)*cpudata_size); + user_cpudata_size = cpudata_size; + } + + sys_memcpy((void *) user_cpudata, + (void *) cpudata, + sizeof(erts_cpu_topology_t)*cpudata_size); + } + + signal_schedulers_bind_change(cpudata, cpudata_size); + + done: + erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); + + if (cpudata) + erts_free(ERTS_ALC_T_TMP, cpudata); + + return res; +} + +static Eterm +bound_schedulers_term(ErtsCpuBindOrder order) +{ + switch (order) { + case ERTS_CPU_BIND_SPREAD: { + ERTS_DECL_AM(spread); + return AM_spread; + } + case ERTS_CPU_BIND_PROCESSOR_SPREAD: { + ERTS_DECL_AM(processor_spread); + return AM_processor_spread; + } + case ERTS_CPU_BIND_THREAD_SPREAD: { + ERTS_DECL_AM(thread_spread); + return AM_thread_spread; + } + case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: { + ERTS_DECL_AM(thread_no_node_processor_spread); + return AM_thread_no_node_processor_spread; + } + case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: { + ERTS_DECL_AM(no_node_processor_spread); + return AM_no_node_processor_spread; + } + case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: { + ERTS_DECL_AM(no_node_thread_spread); + return AM_no_node_thread_spread; + } + case ERTS_CPU_BIND_NO_SPREAD: { + ERTS_DECL_AM(no_spread); + return AM_no_spread; + } + case ERTS_CPU_BIND_NONE: { + ERTS_DECL_AM(unbound); + return AM_unbound; + } + default: + ASSERT(0); + return THE_NON_VALUE; + } +} + +Eterm +erts_bound_schedulers_term(Process *c_p) +{ + ErtsCpuBindOrder order; + erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); + order = cpu_bind_order; + erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + return bound_schedulers_term(order); +} + +static void +create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size) +{ + if (user_cpudata) { + *cpudata_size = user_cpudata_size; + *cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * (*cpudata_size))); + sys_memcpy((void *) *cpudata, + (void *) user_cpudata, + sizeof(erts_cpu_topology_t)*(*cpudata_size)); + } + else if (system_cpudata) { + *cpudata_size = system_cpudata_size; + *cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * (*cpudata_size))); + sys_memcpy((void *) *cpudata, + (void *) system_cpudata, + sizeof(erts_cpu_topology_t)*(*cpudata_size)); + } + else { + *cpudata = NULL; + *cpudata_size = 0; + } +} + +static void +destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata) +{ + if (cpudata) + erts_free(ERTS_ALC_T_TMP, cpudata); +} + +Eterm +erts_bind_schedulers(Process *c_p, Eterm how) +{ + Eterm res; + erts_cpu_topology_t *cpudata; + int cpudata_size; + ErtsCpuBindOrder old_cpu_bind_order; + + erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); + + if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) { + ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP); + } + else { + + old_cpu_bind_order = cpu_bind_order; + + if (ERTS_IS_ATOM_STR("spread", how)) + cpu_bind_order = ERTS_CPU_BIND_SPREAD; + else if (ERTS_IS_ATOM_STR("processor_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("thread_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("default_bind", how) + || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("no_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; + else if (ERTS_IS_ATOM_STR("unbound", how)) + cpu_bind_order = ERTS_CPU_BIND_NONE; + else { + cpu_bind_order = old_cpu_bind_order; + ERTS_BIF_PREP_ERROR(res, c_p, BADARG); + goto done; + } + + create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); + + if (!cpudata) { + cpu_bind_order = old_cpu_bind_order; + ERTS_BIF_PREP_ERROR(res, c_p, BADARG); + goto done; + } + + signal_schedulers_bind_change(cpudata, cpudata_size); + + destroy_tmp_cpu_topology_copy(cpudata); + + res = bound_schedulers_term(old_cpu_bind_order); + } + + done: + + erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); + + return res; +} + +Eterm +erts_fake_scheduler_bindings(Process *p, Eterm how) +{ + ErtsCpuBindOrder fake_cpu_bind_order; + erts_cpu_topology_t *cpudata; + int cpudata_size; + Eterm res; + + if (ERTS_IS_ATOM_STR("spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD; + else if (ERTS_IS_ATOM_STR("processor_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("thread_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("default_bind", how) + || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("no_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; + else if (ERTS_IS_ATOM_STR("unbound", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NONE; + else { + ERTS_BIF_PREP_ERROR(res, p, BADARG); + return res; + } + + erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); + create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); + erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + + if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE) + ERTS_BIF_PREP_RET(res, am_false); + else { + int i; + Eterm *hp; + + cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1); + +#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA + + erts_fprintf(stderr, "node: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].node); + erts_fprintf(stderr, "\n"); + erts_fprintf(stderr, "processor: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].processor); + erts_fprintf(stderr, "\n"); + if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD + && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD + && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) { + erts_fprintf(stderr, "processor_node:"); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].processor_node); + erts_fprintf(stderr, "\n"); + } + erts_fprintf(stderr, "core: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].core); + erts_fprintf(stderr, "\n"); + erts_fprintf(stderr, "thread: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].thread); + erts_fprintf(stderr, "\n"); + erts_fprintf(stderr, "logical: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].logical); + erts_fprintf(stderr, "\n"); +#endif + + hp = HAlloc(p, cpudata_size+1); + ERTS_BIF_PREP_RET(res, make_tuple(hp)); + *hp++ = make_arityval((Uint) cpudata_size); + for (i = 0; i < cpudata_size; i++) + *hp++ = make_small((Uint) cpudata[i].logical); + } + + destroy_tmp_cpu_topology_copy(cpudata); + + return res; +} + +Eterm +erts_get_schedulers_binds(Process *c_p) +{ + int ix; + ERTS_DECL_AM(unbound); + Eterm *hp = HAlloc(c_p, erts_no_schedulers+1); + Eterm res = make_tuple(hp); + + *(hp++) = make_arityval(erts_no_schedulers); + erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); + for (ix = 1; ix <= erts_no_schedulers; ix++) + *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0 + ? make_small(scheduler2cpu_map[ix].bound_id) + : AM_unbound); + erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + return res; +} + +static Eterm +bld_topology_term(Eterm **hpp, + Uint *hszp, + erts_cpu_topology_t *cpudata, + int size) +{ + Eterm res = NIL; + int i; + + if (size == 0) + return am_undefined; + + for (i = size-1; i >= 0; i--) { + res = erts_bld_cons(hpp, + hszp, + erts_bld_tuple(hpp, + hszp, + 7, + am_cpu, + make_small(cpudata[i].node), + make_small(cpudata[i].processor), + make_small(cpudata[i].processor_node), + make_small(cpudata[i].core), + make_small(cpudata[i].thread), + make_small(cpudata[i].logical)), + res); + } + return res; +} + +static Eterm +get_cpu_topology_term(Process *c_p, int type) +{ +#ifdef DEBUG + Eterm *hp_end; +#endif + Eterm *hp; + Uint hsz; + Eterm res = THE_NON_VALUE; + erts_cpu_topology_t *cpudata = NULL; + int size = 0; + + switch (type) { + case ERTS_GET_USED_CPU_TOPOLOGY: + if (user_cpudata) + goto defined; + else + goto detected; + case ERTS_GET_DETECTED_CPU_TOPOLOGY: + detected: + if (!system_cpudata) + res = am_undefined; + else { + size = system_cpudata_size; + cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * size)); + sys_memcpy((void *) cpudata, + (void *) system_cpudata, + sizeof(erts_cpu_topology_t)*size); + } + break; + case ERTS_GET_DEFINED_CPU_TOPOLOGY: + defined: + if (!user_cpudata) + res = am_undefined; + else { + size = user_cpudata_size; + cpudata = user_cpudata; + } + break; + default: + erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type); + break; + } + + if (res == am_undefined) { + ASSERT(!cpudata); + return res; + } + + hsz = 0; + + bld_topology_term(NULL, &hsz, + cpudata, size); + + hp = HAlloc(c_p, hsz); + +#ifdef DEBUG + hp_end = hp + hsz; +#endif + + res = bld_topology_term(&hp, NULL, + cpudata, size); + + ASSERT(hp_end == hp); + + if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata) + erts_free(ERTS_ALC_T_TMP, cpudata); + + return res; +} + +Eterm +erts_get_cpu_topology_term(Process *c_p, Eterm which) +{ + Eterm res; + int type; + erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); + if (ERTS_IS_ATOM_STR("used", which)) + type = ERTS_GET_USED_CPU_TOPOLOGY; + else if (ERTS_IS_ATOM_STR("detected", which)) + type = ERTS_GET_DETECTED_CPU_TOPOLOGY; + else if (ERTS_IS_ATOM_STR("defined", which)) + type = ERTS_GET_DEFINED_CPU_TOPOLOGY; + else + type = ERTS_GET_CPU_TOPOLOGY_ERROR; + if (type == ERTS_GET_CPU_TOPOLOGY_ERROR) + res = THE_NON_VALUE; + else + res = get_cpu_topology_term(c_p, type); + erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + return res; +} + +static void +early_cpu_bind_init(void) +{ + user_cpudata = NULL; + user_cpudata_size = 0; + + system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo); + system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, + (sizeof(erts_cpu_topology_t) + * system_cpudata_size)); + + cpu_bind_order = ERTS_CPU_BIND_NONE; + + if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata) + || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata, + system_cpudata_size)) { + erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); + system_cpudata = NULL; + system_cpudata_size = 0; + } +} + +static void +late_cpu_bind_init(void) +{ + int ix; + + erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind"); + + scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA, + (sizeof(ErtsCpuBindData) + * (erts_no_schedulers+1))); + for (ix = 1; ix <= erts_no_schedulers; ix++) { + scheduler2cpu_map[ix].bind_id = -1; + scheduler2cpu_map[ix].bound_id = -1; + } + + if (cpu_bind_order != ERTS_CPU_BIND_NONE) { + erts_cpu_topology_t *cpudata; + int cpudata_size; + create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); + ASSERT(cpudata); + signal_schedulers_bind_change(cpudata, cpudata_size); + destroy_tmp_cpu_topology_copy(cpudata); + } +} + +#ifdef ERTS_SMP + +static void +add_pend_suspend(Process *suspendee, + Eterm originator_pid, + void (*handle_func)(Process *, + ErtsProcLocks, + int, + Eterm)) +{ + ErtsPendingSuspend *psp = erts_alloc(ERTS_ALC_T_PEND_SUSPEND, + sizeof(ErtsPendingSuspend)); + psp->next = NULL; +#ifdef DEBUG +#ifdef ARCH_64 + psp->end = (ErtsPendingSuspend *) 0xdeaddeaddeaddead; +#else + psp->end = (ErtsPendingSuspend *) 0xdeaddead; +#endif +#endif + psp->pid = originator_pid; + psp->handle_func = handle_func; + + if (suspendee->pending_suspenders) + suspendee->pending_suspenders->end->next = psp; + else + suspendee->pending_suspenders = psp; + suspendee->pending_suspenders->end = psp; +} + +static void +handle_pending_suspend(Process *p, ErtsProcLocks p_locks) +{ + ErtsPendingSuspend *psp; + int is_alive = !ERTS_PROC_IS_EXITING(p); + + ERTS_SMP_LC_ASSERT(p_locks & ERTS_PROC_LOCK_STATUS); + + /* + * New pending suspenders might appear while we are processing + * (since we may release the status lock on p while processing). + */ + while (p->pending_suspenders) { + psp = p->pending_suspenders; + p->pending_suspenders = NULL; + while (psp) { + ErtsPendingSuspend *free_psp; + (*psp->handle_func)(p, p_locks, is_alive, psp->pid); + free_psp = psp; + psp = psp->next; + erts_free(ERTS_ALC_T_PEND_SUSPEND, (void *) free_psp); + } + } + +} + +static ERTS_INLINE void +cancel_suspend_of_suspendee(Process *p, ErtsProcLocks p_locks) +{ + if (is_not_nil(p->suspendee)) { + Process *rp; + if (!(p_locks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + rp = erts_pid2proc(p, p_locks|ERTS_PROC_LOCK_STATUS, + p->suspendee, ERTS_PROC_LOCK_STATUS); + if (rp) { + erts_resume(rp, ERTS_PROC_LOCK_STATUS); + erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + } + if (!(p_locks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + p->suspendee = NIL; + } +} + +static void +handle_pend_sync_suspend(Process *suspendee, + ErtsProcLocks suspendee_locks, + int suspendee_alive, + Eterm suspender_pid) +{ + Process *suspender; + + ERTS_SMP_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); + + suspender = erts_pid2proc(suspendee, + suspendee_locks, + suspender_pid, + ERTS_PROC_LOCK_STATUS); + if (suspender) { + ASSERT(is_nil(suspender->suspendee)); + if (suspendee_alive) { + ErtsRunQueue *rq = erts_get_runq_proc(suspendee); + erts_smp_runq_lock(rq); + suspend_process(rq, suspendee); + erts_smp_runq_unlock(rq); + suspender->suspendee = suspendee->id; + } + /* suspender is suspended waiting for suspendee to suspend; + resume suspender */ + resume_process(suspender); + erts_smp_proc_unlock(suspender, ERTS_PROC_LOCK_STATUS); + } +} + +/* + * Like erts_pid2proc() but: + * + * * At least ERTS_PROC_LOCK_MAIN have to be held on c_p. + * * At least ERTS_PROC_LOCK_MAIN have to be taken on pid. + * * It also waits for proc to be in a state != running and garbing. + * * If ERTS_PROC_LOCK_BUSY is returned, the calling process has to + * yield (ERTS_BIF_YIELD[0-3]()). c_p might in this case have been + * suspended. + */ + + +Process * +erts_pid2proc_not_running(Process *c_p, ErtsProcLocks c_p_locks, + Eterm pid, ErtsProcLocks pid_locks) +{ + Process *rp; + int unlock_c_p_status; + + ERTS_SMP_LC_ASSERT(c_p_locks == erts_proc_lc_my_proc_locks(c_p)); + + ERTS_SMP_LC_ASSERT(c_p_locks & ERTS_PROC_LOCK_MAIN); + ERTS_SMP_LC_ASSERT(pid_locks & (ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS)); + + if (c_p->id == pid) + return erts_pid2proc(c_p, c_p_locks, pid, pid_locks); + + if (c_p_locks & ERTS_PROC_LOCK_STATUS) + unlock_c_p_status = 0; + else { + unlock_c_p_status = 1; + erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + } + + if (c_p->suspendee == pid) { + /* Process previously suspended by c_p (below)... */ + ErtsProcLocks rp_locks = pid_locks|ERTS_PROC_LOCK_STATUS; + rp = erts_pid2proc(c_p, c_p_locks|ERTS_PROC_LOCK_STATUS, pid, rp_locks); + c_p->suspendee = NIL; + ASSERT(c_p->flags & F_P2PNR_RESCHED); + c_p->flags &= ~F_P2PNR_RESCHED; + if (rp) + resume_process(rp); + } + else { + ErtsRunQueue *cp_rq, *rp_rq; + + rp = erts_pid2proc(c_p, c_p_locks|ERTS_PROC_LOCK_STATUS, + pid, ERTS_PROC_LOCK_STATUS); + + if (!rp) { + c_p->flags &= ~F_P2PNR_RESCHED; + goto done; + } + + ASSERT(!(c_p->flags & F_P2PNR_RESCHED)); + + cp_rq = erts_get_runq_proc(c_p); + rp_rq = erts_get_runq_proc(rp); + erts_smp_runqs_lock(cp_rq, rp_rq); + if (rp->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) { + running: + /* Phiu... */ + + /* + * If we got pending suspenders and suspend ourselves waiting + * to suspend another process we might deadlock. + * In this case we have to yield, be suspended by + * someone else and then do it all over again. + */ + if (!c_p->pending_suspenders) { + /* Mark rp pending for suspend by c_p */ + add_pend_suspend(rp, c_p->id, handle_pend_sync_suspend); + ASSERT(is_nil(c_p->suspendee)); + + /* Suspend c_p; when rp is suspended c_p will be resumed. */ + suspend_process(cp_rq, c_p); + c_p->flags |= F_P2PNR_RESCHED; + } + /* Yield (caller is assumed to yield immediately in bif). */ + erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + rp = ERTS_PROC_LOCK_BUSY; + } + else { + ErtsProcLocks need_locks = pid_locks & ~ERTS_PROC_LOCK_STATUS; + if (need_locks && erts_smp_proc_trylock(rp, need_locks) == EBUSY) { + erts_smp_runqs_unlock(cp_rq, rp_rq); + erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + rp = erts_pid2proc(c_p, c_p_locks|ERTS_PROC_LOCK_STATUS, + pid, pid_locks|ERTS_PROC_LOCK_STATUS); + if (!rp) + goto done; + /* run-queues may have changed */ + cp_rq = erts_get_runq_proc(c_p); + rp_rq = erts_get_runq_proc(rp); + erts_smp_runqs_lock(cp_rq, rp_rq); + if (rp->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) { + /* Ahh... */ + erts_smp_proc_unlock(rp, + pid_locks & ~ERTS_PROC_LOCK_STATUS); + goto running; + } + } + + /* rp is not running and we got the locks we want... */ + } + erts_smp_runqs_unlock(cp_rq, rp_rq); + } + + done: + if (rp && rp != ERTS_PROC_LOCK_BUSY && !(pid_locks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + if (unlock_c_p_status) + erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); + return rp; +} + +/* + * erts_pid2proc_nropt() is normally the same as + * erts_pid2proc_not_running(). However it is only + * to be used when 'not running' is a pure optimization, + * not a requirement. + */ + +Process * +erts_pid2proc_nropt(Process *c_p, ErtsProcLocks c_p_locks, + Eterm pid, ErtsProcLocks pid_locks) +{ + if (erts_disable_proc_not_running_opt) + return erts_pid2proc(c_p, c_p_locks, pid, pid_locks); + else + return erts_pid2proc_not_running(c_p, c_p_locks, pid, pid_locks); +} + +static ERTS_INLINE void +do_bif_suspend_process(ErtsSuspendMonitor *smon, + Process *suspendee, + ErtsRunQueue *locked_runq) +{ + ASSERT(suspendee); + ASSERT(!suspendee->is_exiting); + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS + & erts_proc_lc_my_proc_locks(suspendee)); + if (smon) { + if (!smon->active) { + ErtsRunQueue *rq; + + if (locked_runq) + rq = locked_runq; + else { + rq = erts_get_runq_proc(suspendee); + erts_smp_runq_lock(rq); + } + + suspend_process(rq, suspendee); + + if (!locked_runq) + erts_smp_runq_unlock(rq); + } + smon->active += smon->pending; + ASSERT(smon->active); + smon->pending = 0; + } + +} + +static void +handle_pend_bif_sync_suspend(Process *suspendee, + ErtsProcLocks suspendee_locks, + int suspendee_alive, + Eterm suspender_pid) +{ + Process *suspender; + + ERTS_SMP_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); + + suspender = erts_pid2proc(suspendee, + suspendee_locks, + suspender_pid, + ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS); + if (suspender) { + ASSERT(is_nil(suspender->suspendee)); + if (!suspendee_alive) + erts_delete_suspend_monitor(&suspender->suspend_monitors, + suspendee->id); + else { + ErtsSuspendMonitor *smon; + smon = erts_lookup_suspend_monitor(suspender->suspend_monitors, + suspendee->id); + do_bif_suspend_process(smon, suspendee, NULL); + suspender->suspendee = suspendee->id; + } + /* suspender is suspended waiting for suspendee to suspend; + resume suspender */ + resume_process(suspender); + erts_smp_proc_unlock(suspender, + ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS); + } +} + +static void +handle_pend_bif_async_suspend(Process *suspendee, + ErtsProcLocks suspendee_locks, + int suspendee_alive, + Eterm suspender_pid) +{ + + Process *suspender; + + ERTS_SMP_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); + + suspender = erts_pid2proc(suspendee, + suspendee_locks, + suspender_pid, + ERTS_PROC_LOCK_LINK); + if (suspender) { + ASSERT(is_nil(suspender->suspendee)); + if (!suspendee_alive) + erts_delete_suspend_monitor(&suspender->suspend_monitors, + suspendee->id); + else { + ErtsSuspendMonitor *smon; + smon = erts_lookup_suspend_monitor(suspender->suspend_monitors, + suspendee->id); + do_bif_suspend_process(smon, suspendee, NULL); + } + erts_smp_proc_unlock(suspender, ERTS_PROC_LOCK_LINK); + } +} + +#endif /* ERTS_SMP */ + +/* + * The erlang:suspend_process/2 BIF + */ + +BIF_RETTYPE +suspend_process_2(BIF_ALIST_2) +{ + Eterm res; + Process* suspendee = NULL; + ErtsSuspendMonitor *smon; + ErtsProcLocks xlocks = (ErtsProcLocks) 0; + + /* Options and default values: */ + int asynchronous = 0; + int unless_suspending = 0; + + + if (BIF_P->id == BIF_ARG_1) + goto badarg; /* We are not allowed to suspend ourselves */ + + if (is_not_nil(BIF_ARG_2)) { + /* Parse option list */ + Eterm arg = BIF_ARG_2; + + while (is_list(arg)) { + Eterm *lp = list_val(arg); + arg = CAR(lp); + switch (arg) { + case am_unless_suspending: + unless_suspending = 1; + break; + case am_asynchronous: + asynchronous = 1; + break; + default: + goto badarg; + } + arg = CDR(lp); + } + if (is_not_nil(arg)) + goto badarg; + } + + xlocks = ERTS_PROC_LOCK_LINK | (asynchronous + ? (ErtsProcLocks) 0 + : ERTS_PROC_LOCK_STATUS); + + erts_smp_proc_lock(BIF_P, xlocks); + + suspendee = erts_pid2proc(BIF_P, + ERTS_PROC_LOCK_MAIN|xlocks, + BIF_ARG_1, + ERTS_PROC_LOCK_STATUS); + if (!suspendee) + goto no_suspendee; + + smon = erts_add_or_lookup_suspend_monitor(&BIF_P->suspend_monitors, + BIF_ARG_1); +#ifndef ERTS_SMP /* no ERTS_SMP */ + + /* This is really a piece of cake without SMP support... */ + if (!smon->active) { + suspend_process(erts_common_run_queue, suspendee); + smon->active++; + res = am_true; + } + else if (unless_suspending) + res = am_false; + else if (smon->active == INT_MAX) + goto system_limit; + else { + smon->active++; + res = am_true; + } + +#else /* ERTS_SMP */ + + /* ... but a little trickier with SMP support ... */ + + if (asynchronous) { + /* --- Asynchronous suspend begin ---------------------------------- */ + + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_LINK + & erts_proc_lc_my_proc_locks(BIF_P)); + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS + == erts_proc_lc_my_proc_locks(suspendee)); + + if (smon->active) { + smon->active += smon->pending; + smon->pending = 0; + if (unless_suspending) + res = am_false; + else if (smon->active == INT_MAX) + goto system_limit; + else { + smon->active++; + res = am_true; + } + /* done */ + } + else { + /* We havn't got any active suspends on the suspendee */ + if (smon->pending && unless_suspending) + res = am_false; + else { + ErtsRunQueue *rq; + if (smon->pending == INT_MAX) + goto system_limit; + + smon->pending++; + rq = erts_get_runq_proc(suspendee); + erts_smp_runq_lock(rq); + + if (suspendee->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) + add_pend_suspend(suspendee, + BIF_P->id, + handle_pend_bif_async_suspend); + else + do_bif_suspend_process(smon, suspendee, rq); + erts_smp_runq_unlock(rq); + + res = am_true; + } + /* done */ + } + /* --- Asynchronous suspend end ------------------------------------ */ + } + else /* if (!asynchronous) */ { + /* --- Synchronous suspend begin ----------------------------------- */ + + ERTS_SMP_LC_ASSERT(((ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS) + & erts_proc_lc_my_proc_locks(BIF_P)) + == (ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS)); + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS + == erts_proc_lc_my_proc_locks(suspendee)); + + if (BIF_P->suspendee == BIF_ARG_1) { + /* We are back after a yield and the suspendee + has been suspended on behalf of us. */ + ASSERT(smon->active >= 1); + BIF_P->suspendee = NIL; + res = (!unless_suspending || smon->active == 1 + ? am_true + : am_false); + /* done */ + } + else if (smon->active) { + if (unless_suspending) + res = am_false; + else { + smon->active++; + res = am_true; + } + /* done */ + } + else { + ErtsRunQueue *cp_rq, *s_rq; + /* We haven't got any active suspends on the suspendee */ + + /* + * If we have pending suspenders and suspend ourselves waiting + * to suspend another process, or suspend another process + * we might deadlock. In this case we have to yield, + * be suspended by someone else, and then do it all over again. + */ + if (BIF_P->pending_suspenders) + goto yield; + + if (!unless_suspending && smon->pending == INT_MAX) + goto system_limit; + if (!unless_suspending || smon->pending == 0) + smon->pending++; + + cp_rq = erts_get_runq_proc(BIF_P); + s_rq = erts_get_runq_proc(suspendee); + erts_smp_runqs_lock(cp_rq, s_rq); + if (!(suspendee->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING)) { + do_bif_suspend_process(smon, suspendee, s_rq); + erts_smp_runqs_unlock(cp_rq, s_rq); + res = (!unless_suspending || smon->active == 1 + ? am_true + : am_false); + /* done */ + } + else { + /* Mark suspendee pending for suspend by BIF_P */ + add_pend_suspend(suspendee, + BIF_P->id, + handle_pend_bif_sync_suspend); + + ASSERT(is_nil(BIF_P->suspendee)); + + /* + * Suspend BIF_P; when suspendee is suspended, BIF_P + * will be resumed and this BIF will be called again. + * This time with BIF_P->suspendee == BIF_ARG_1 (see + * above). + */ + suspend_process(cp_rq, BIF_P); + erts_smp_runqs_unlock(cp_rq, s_rq); + goto yield; + } + } + /* --- Synchronous suspend end ------------------------------------- */ + } + +#endif /* ERTS_SMP */ + + ASSERT(suspendee->status == P_SUSPENDED || (asynchronous && smon->pending)); + ASSERT(suspendee->status == P_SUSPENDED || !smon->active); + + erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + erts_smp_proc_unlock(BIF_P, xlocks); + BIF_RET(res); + + system_limit: + ERTS_BIF_PREP_ERROR(res, BIF_P, SYSTEM_LIMIT); + goto do_return; + + no_suspendee: +#ifdef ERTS_SMP + BIF_P->suspendee = NIL; +#endif + erts_delete_suspend_monitor(&BIF_P->suspend_monitors, BIF_ARG_1); + + badarg: + ERTS_BIF_PREP_ERROR(res, BIF_P, BADARG); +#ifdef ERTS_SMP + goto do_return; + + yield: + ERTS_BIF_PREP_YIELD2(res, bif_export[BIF_suspend_process_2], + BIF_P, BIF_ARG_1, BIF_ARG_2); +#endif + + do_return: + if (suspendee) + erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + if (xlocks) + erts_smp_proc_unlock(BIF_P, xlocks); + return res; + +} + + +/* + * The erlang:resume_process/1 BIF + */ + +BIF_RETTYPE +resume_process_1(BIF_ALIST_1) +{ + ErtsSuspendMonitor *smon; + Process *suspendee; + int is_active; + + if (BIF_P->id == BIF_ARG_1) + BIF_ERROR(BIF_P, BADARG); + + erts_smp_proc_lock(BIF_P, ERTS_PROC_LOCK_LINK); + smon = erts_lookup_suspend_monitor(BIF_P->suspend_monitors, BIF_ARG_1); + + if (!smon) { + /* No previous suspend or dead suspendee */ + goto error; + } + else if (smon->pending) { + smon->pending--; + ASSERT(smon->pending >= 0); + if (smon->active) { + smon->active += smon->pending; + smon->pending = 0; + } + is_active = smon->active; + } + else if (smon->active) { + smon->active--; + ASSERT(smon->pending >= 0); + is_active = 1; + } + else { + /* No previous suspend or dead suspendee */ + goto error; + } + + if (smon->active || smon->pending || !is_active) { + /* Leave the suspendee as it is; just verify that it is still alive */ + suspendee = erts_pid2proc(BIF_P, + ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_LINK, + BIF_ARG_1, + 0); + if (!suspendee) + goto no_suspendee; + + } + else { + /* Resume */ + suspendee = erts_pid2proc(BIF_P, + ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_LINK, + BIF_ARG_1, + ERTS_PROC_LOCK_STATUS); + if (!suspendee) + goto no_suspendee; + + ASSERT(suspendee->status == P_SUSPENDED + || (suspendee->status == P_GARBING + && suspendee->gcstatus == P_SUSPENDED)); + resume_process(suspendee); + + erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + } + + if (!smon->active && !smon->pending) + erts_delete_suspend_monitor(&BIF_P->suspend_monitors, BIF_ARG_1); + + erts_smp_proc_unlock(BIF_P, ERTS_PROC_LOCK_LINK); + + BIF_RET(am_true); + + no_suspendee: + /* cleanup */ + erts_delete_suspend_monitor(&BIF_P->suspend_monitors, BIF_ARG_1); + + error: + erts_smp_proc_unlock(BIF_P, ERTS_PROC_LOCK_LINK); + BIF_ERROR(BIF_P, BADARG); +} + +Uint +erts_run_queues_len(Uint *qlen) +{ + int i = 0; + Uint len = 0; + ERTS_ATOMIC_FOREACH_RUNQ(rq, + { + if (qlen) + qlen[i++] = rq->procs.len; + len += rq->procs.len; + } + ); + return len; +} + +#ifdef HARDDEBUG_RUNQS +static void +check_procs_runq(ErtsRunQueue *runq, Process *p_in_q, Process *p_not_in_q) +{ + int len[ERTS_NO_PROC_PRIO_LEVELS] = {0}; + int tot_len; + int prioq, prio; + int found_p_in_q; + Process *p, *prevp; + + found_p_in_q = 0; + for (prioq = 0; prioq < ERTS_NO_PROC_PRIO_LEVELS - 1; prioq++) { + prevp = NULL; + for (p = runq->procs.prio[prioq].first; p; p = p->next) { + ASSERT(p != p_not_in_q); + if (p == p_in_q) + found_p_in_q = 1; + switch (p->prio) { + case PRIORITY_MAX: + case PRIORITY_HIGH: + case PRIORITY_NORMAL: + ASSERT(prioq == p->prio); + break; + case PRIORITY_LOW: + ASSERT(prioq == PRIORITY_NORMAL); + break; + default: + ASSERT(!"Bad prio on process"); + } + len[p->prio]++; + ASSERT(prevp == p->prev); + if (p->prev) { + ASSERT(p->prev->next == p); + } + else { + ASSERT(runq->procs.prio[prioq].first == p); + } + if (p->next) { + ASSERT(p->next->prev == p); + } + else { + ASSERT(runq->procs.prio[prioq].last == p); + } + ASSERT(p->run_queue == runq); + prevp = p; + } + } + + ASSERT(!p_in_q || found_p_in_q); + + tot_len = 0; + for (prio = 0; prio < ERTS_NO_PROC_PRIO_LEVELS; prio++) { + ASSERT(len[prio] == runq->procs.prio_info[prio].len); + if (len[prio]) { + ASSERT(runq->flags & (1 << prio)); + } + else { + ASSERT(!(runq->flags & (1 << prio))); + } + tot_len += len[prio]; + } + ASSERT(runq->procs.len == tot_len); +} +# define ERTS_DBG_CHK_PROCS_RUNQ(RQ) check_procs_runq((RQ), NULL, NULL) +# define ERTS_DBG_CHK_PROCS_RUNQ_PROC(RQ, P) check_procs_runq((RQ), (P), NULL) +# define ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(RQ, P) check_procs_runq((RQ), NULL, (P)) +#else +# define ERTS_DBG_CHK_PROCS_RUNQ(RQ) +# define ERTS_DBG_CHK_PROCS_RUNQ_PROC(RQ, P) +# define ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(RQ, P) +#endif + + +static ERTS_INLINE void +enqueue_process(ErtsRunQueue *runq, Process *p) +{ + ErtsRunPrioQueue *rpq; + ErtsRunQueueInfo *rqi; + + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq)); + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + + ASSERT(p->bound_runq || !(runq->flags & ERTS_RUNQ_FLG_SUSPENDED)); + + rqi = &runq->procs.prio_info[p->prio]; + rqi->len++; + if (rqi->max_len < rqi->len) + rqi->max_len = rqi->len; + + runq->procs.len++; + runq->len++; + if (runq->max_len < runq->len) + runq->max_len = runq->len; + + runq->flags |= (1 << p->prio); + + rpq = (p->prio == PRIORITY_LOW + ? &runq->procs.prio[PRIORITY_NORMAL] + : &runq->procs.prio[p->prio]); + + p->next = NULL; + p->prev = rpq->last; + if (rpq->last) + rpq->last->next = p; + else + rpq->first = p; + rpq->last = p; + + switch (p->status) { + case P_EXITING: + break; + case P_GARBING: + p->gcstatus = P_RUNABLE; + break; + default: + p->status = P_RUNABLE; + break; + } + +#ifdef ERTS_SMP + p->status_flags |= ERTS_PROC_SFLG_INRUNQ; +#endif + + ERTS_DBG_CHK_PROCS_RUNQ_PROC(runq, p); +} + + +static ERTS_INLINE int +dequeue_process(ErtsRunQueue *runq, Process *p) +{ + ErtsRunPrioQueue *rpq; + int res = 1; + + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq)); + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + + ERTS_DBG_CHK_PROCS_RUNQ(runq); + + rpq = &runq->procs.prio[p->prio == PRIORITY_LOW ? PRIORITY_NORMAL : p->prio]; + if (p->prev) { + p->prev->next = p->next; + } + else if (rpq->first == p) { + rpq->first = p->next; + } + else { + res = 0; + } + if (p->next) { + p->next->prev = p->prev; + } + else if (rpq->last == p) { + rpq->last = p->prev; + } + else { + ASSERT(res == 0); + } + + if (res) { + + if (--runq->procs.prio_info[p->prio].len == 0) + runq->flags &= ~(1 << p->prio); + runq->procs.len--; + runq->len--; + +#ifdef ERTS_SMP + p->status_flags &= ~ERTS_PROC_SFLG_INRUNQ; +#endif + } + + ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(runq, p); + return res; +} + +/* schedule a process */ +static ERTS_INLINE void +internal_add_to_runq(ErtsRunQueue *runq, Process *p) +{ + Uint32 prev_status = p->status; + ErtsRunQueue *add_runq; +#ifdef ERTS_SMP + + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq)); + + if (p->status_flags & ERTS_PROC_SFLG_INRUNQ) + return; + else if (p->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) { + ASSERT(p->status != P_SUSPENDED); + ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(runq, p); + p->status_flags |= ERTS_PROC_SFLG_PENDADD2SCHEDQ; + return; + } + ASSERT(!p->scheduler_data); +#endif + + ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(runq, p); +#ifndef ERTS_SMP + /* Never schedule a suspended process (ok in smp case) */ + ASSERT(p->status != P_SUSPENDED); + add_runq = runq; + +#else + ASSERT(!p->bound_runq || p->bound_runq == p->run_queue); + if (p->bound_runq) { + if (p->bound_runq == runq) + add_runq = runq; + else { + add_runq = p->bound_runq; + erts_smp_xrunq_lock(runq, add_runq); + } + } + else { + add_runq = erts_check_emigration_need(runq, p->prio); + if (!add_runq) + add_runq = runq; + else /* Process emigrated */ + p->run_queue = add_runq; + } +#endif + + /* Enqueue the process */ + enqueue_process(add_runq, p); + + if ((erts_system_profile_flags.runnable_procs) + && (prev_status == P_WAITING + || prev_status == P_SUSPENDED)) { + profile_runnable_proc(p, am_active); + } + + smp_notify_inc_runq(add_runq); + + if (add_runq != runq) + erts_smp_runq_unlock(add_runq); +} + + +void +erts_add_to_runq(Process *p) +{ + ErtsRunQueue *runq = erts_get_runq_proc(p); + erts_smp_runq_lock(runq); + internal_add_to_runq(runq, p); + erts_smp_runq_unlock(runq); +} + +/* Possibly remove a scheduled process we need to suspend */ + +static int +remove_proc_from_runq(ErtsRunQueue *rq, Process *p, int to_inactive) +{ + int res; + + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + +#ifdef ERTS_SMP + if (p->status_flags & ERTS_PROC_SFLG_PENDADD2SCHEDQ) { + p->status_flags &= ~ERTS_PROC_SFLG_PENDADD2SCHEDQ; + ASSERT(!remove_proc_from_runq(rq, p, 0)); + return 1; + } +#endif + + res = dequeue_process(rq, p); + + if (res && erts_system_profile_flags.runnable_procs && to_inactive) + profile_runnable_proc(p, am_inactive); + +#ifdef ERTS_SMP + ASSERT(!(p->status_flags & ERTS_PROC_SFLG_INRUNQ)); +#endif + + return res; +} + +#ifdef ERTS_SMP + +ErtsMigrateResult +erts_proc_migrate(Process *p, ErtsProcLocks *plcks, + ErtsRunQueue *from_rq, int *from_locked, + ErtsRunQueue *to_rq, int *to_locked) +{ + ERTS_SMP_LC_ASSERT(*plcks == erts_proc_lc_my_proc_locks(p)); + ERTS_SMP_LC_ASSERT((ERTS_PROC_LOCK_STATUS & *plcks) + || from_locked); + ERTS_SMP_LC_CHK_RUNQ_LOCK(from_rq, *from_locked); + ERTS_SMP_LC_CHK_RUNQ_LOCK(to_rq, *to_locked); + + ASSERT(!erts_common_run_queue); + + /* + * If we have the lock on the run queue to migrate to, + * check that it isn't suspended. If it is suspended, + * we will refuse to migrate to it anyway. + */ + if (*to_locked && (to_rq->flags & ERTS_RUNQ_FLG_SUSPENDED)) + return ERTS_MIGRATE_FAILED_RUNQ_SUSPENDED; + + /* We need status lock on process and locks on both run queues */ + + if (!(ERTS_PROC_LOCK_STATUS & *plcks)) { + if (erts_smp_proc_trylock(p, ERTS_PROC_LOCK_STATUS) == EBUSY) { + ErtsProcLocks lcks = *plcks; + Eterm pid = p->id; + Process *proc = *plcks ? p : NULL; + + if (*from_locked) { + *from_locked = 0; + erts_smp_runq_unlock(from_rq); + } + if (*to_locked) { + *to_locked = 0; + erts_smp_runq_unlock(to_rq); + } + + proc = erts_pid2proc_opt(proc, + lcks, + pid, + lcks|ERTS_PROC_LOCK_STATUS, + ERTS_P2P_FLG_ALLOW_OTHER_X); + if (!proc) { + *plcks = 0; + return ERTS_MIGRATE_FAILED_NOT_IN_RUNQ; + } + ASSERT(proc == p); + } + *plcks |= ERTS_PROC_LOCK_STATUS; + } + + ASSERT(!p->bound_runq); + + ERTS_SMP_LC_CHK_RUNQ_LOCK(from_rq, *from_locked); + ERTS_SMP_LC_CHK_RUNQ_LOCK(to_rq, *to_locked); + + if (p->run_queue != from_rq) + return ERTS_MIGRATE_FAILED_RUNQ_CHANGED; + + if (!*from_locked || !*to_locked) { + if (from_rq < to_rq) { + if (!*to_locked) { + if (!*from_locked) + erts_smp_runq_lock(from_rq); + erts_smp_runq_lock(to_rq); + } + else if (erts_smp_runq_trylock(from_rq) == EBUSY) { + erts_smp_runq_unlock(to_rq); + erts_smp_runq_lock(from_rq); + erts_smp_runq_lock(to_rq); + } + } + else { + if (!*from_locked) { + if (!*to_locked) + erts_smp_runq_lock(to_rq); + erts_smp_runq_lock(from_rq); + } + else if (erts_smp_runq_trylock(to_rq) == EBUSY) { + erts_smp_runq_unlock(from_rq); + erts_smp_runq_lock(to_rq); + erts_smp_runq_lock(from_rq); + } + } + *to_locked = *from_locked = 1; + } + + ERTS_SMP_LC_CHK_RUNQ_LOCK(from_rq, *from_locked); + ERTS_SMP_LC_CHK_RUNQ_LOCK(to_rq, *to_locked); + + /* Ok we now got all locks we need; do it... */ + + /* Refuse to migrate to a suspended run queue */ + if (to_rq->flags & ERTS_RUNQ_FLG_SUSPENDED) + return ERTS_MIGRATE_FAILED_RUNQ_SUSPENDED; + + if ((p->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) + || !(p->status_flags & ERTS_PROC_SFLG_INRUNQ)) + return ERTS_MIGRATE_FAILED_NOT_IN_RUNQ; + + dequeue_process(from_rq, p); + p->run_queue = to_rq; + enqueue_process(to_rq, p); + + return ERTS_MIGRATE_SUCCESS; +} +#endif /* ERTS_SMP */ + +Eterm +erts_process_status(Process *c_p, ErtsProcLocks c_p_locks, + Process *rp, Eterm rpid) +{ + Eterm res = am_undefined; + Process *p; + + if (rp) { + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS + & erts_proc_lc_my_proc_locks(rp)); + p = rp; + } + else { + p = erts_pid2proc_opt(c_p, c_p_locks, + rpid, ERTS_PROC_LOCK_STATUS, + ERTS_P2P_FLG_ALLOW_OTHER_X); + } + + if (p) { + switch (p->status) { + case P_RUNABLE: + res = am_runnable; + break; + case P_WAITING: + res = am_waiting; + break; + case P_RUNNING: + res = am_running; + break; + case P_EXITING: + res = am_exiting; + break; + case P_GARBING: + res = am_garbage_collecting; + break; + case P_SUSPENDED: + res = am_suspended; + break; + case P_FREE: /* We cannot look up a process in P_FREE... */ + default: /* Not a valid status... */ + erl_exit(1, "Bad status (%b32u) found for process %T\n", + p->status, p->id); + break; + } + +#ifdef ERTS_SMP + if (!rp && (p != c_p || !(ERTS_PROC_LOCK_STATUS & c_p_locks))) + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } + else { + int i; + ErtsSchedulerData *esdp; + + if (erts_common_run_queue) + erts_smp_runq_lock(erts_common_run_queue); + + for (i = 0; i < erts_no_schedulers; i++) { + esdp = ERTS_SCHEDULER_IX(i); + if (!erts_common_run_queue) + erts_smp_runq_lock(esdp->run_queue); + if (esdp->free_process && esdp->free_process->id == rpid) { + res = am_free; + if (!erts_common_run_queue) + erts_smp_runq_unlock(esdp->run_queue); + break; + } + if (!erts_common_run_queue) + erts_smp_runq_unlock(esdp->run_queue); + } + + if (erts_common_run_queue) + erts_smp_runq_unlock(erts_common_run_queue); +#endif + + } + + return res; +} + +/* +** Suspend a process +** If we are to suspend on a port the busy_port is the thing +** otherwise busy_port is NIL +*/ + +void +erts_suspend(Process* process, ErtsProcLocks process_locks, Port *busy_port) +{ + ErtsRunQueue *rq; + + ERTS_SMP_LC_ASSERT(process_locks == erts_proc_lc_my_proc_locks(process)); + if (!(process_locks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_lock(process, ERTS_PROC_LOCK_STATUS); + + rq = erts_get_runq_proc(process); + + erts_smp_runq_lock(rq); + + suspend_process(rq, process); + + erts_smp_runq_unlock(rq); + + if (busy_port) + erts_wake_process_later(busy_port, process); + + if (!(process_locks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(process, ERTS_PROC_LOCK_STATUS); + +} + +void +erts_resume(Process* process, ErtsProcLocks process_locks) +{ + ERTS_SMP_LC_ASSERT(process_locks == erts_proc_lc_my_proc_locks(process)); + if (!(process_locks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_lock(process, ERTS_PROC_LOCK_STATUS); + resume_process(process); + if (!(process_locks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(process, ERTS_PROC_LOCK_STATUS); +} + +int +erts_resume_processes(ErtsProcList *plp) +{ + int nresumed = 0; + while (plp) { + Process *proc; + ErtsProcList *fplp; + ASSERT(is_internal_pid(plp->pid)); + proc = erts_pid2proc(NULL, 0, plp->pid, ERTS_PROC_LOCK_STATUS); + if (proc) { + if (proclist_same(plp, proc)) { + resume_process(proc); + nresumed++; + } + erts_smp_proc_unlock(proc, ERTS_PROC_LOCK_STATUS); + } + fplp = plp; + plp = plp->next; + proclist_destroy(fplp); + } + return nresumed; +} + +Eterm +erts_get_process_priority(Process *p) +{ + ErtsRunQueue *rq; + Eterm value; + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + rq = erts_get_runq_proc(p); + erts_smp_runq_lock(rq); + switch(p->prio) { + case PRIORITY_MAX: value = am_max; break; + case PRIORITY_HIGH: value = am_high; break; + case PRIORITY_NORMAL: value = am_normal; break; + case PRIORITY_LOW: value = am_low; break; + default: ASSERT(0); value = am_undefined; break; + } + erts_smp_runq_unlock(rq); + return value; +} + +Eterm +erts_set_process_priority(Process *p, Eterm new_value) +{ + ErtsRunQueue *rq; + Eterm old_value; + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + rq = erts_get_runq_proc(p); +#ifdef ERTS_SMP + ASSERT(!(p->status_flags & ERTS_PROC_SFLG_INRUNQ)); +#endif + erts_smp_runq_lock(rq); + switch(p->prio) { + case PRIORITY_MAX: old_value = am_max; break; + case PRIORITY_HIGH: old_value = am_high; break; + case PRIORITY_NORMAL: old_value = am_normal; break; + case PRIORITY_LOW: old_value = am_low; break; + default: ASSERT(0); old_value = am_undefined; break; + } + switch (new_value) { + case am_max: p->prio = PRIORITY_MAX; break; + case am_high: p->prio = PRIORITY_HIGH; break; + case am_normal: p->prio = PRIORITY_NORMAL; break; + case am_low: p->prio = PRIORITY_LOW; break; + default: old_value = THE_NON_VALUE; break; + } + erts_smp_runq_unlock(rq); + return old_value; +} + +#ifdef ERTS_SMP + +static ERTS_INLINE int +prepare_for_sys_schedule(void) +{ + while (!erts_port_task_have_outstanding_io_tasks() + && !erts_smp_atomic_xchg(&doing_sys_schedule, 1)) { + if (!erts_port_task_have_outstanding_io_tasks()) + return 1; + erts_smp_atomic_set(&doing_sys_schedule, 0); + } + return 0; +} + +#else + +static ERTS_INLINE int +prepare_for_sys_schedule(void) +{ + return !erts_port_task_have_outstanding_io_tasks(); +} + +#endif + +/* note that P_RUNNING is only set so that we don't try to remove +** running processes from the schedule queue if they exit - a running +** process not being in the schedule queue!! +** Schedule for up to INPUT_REDUCTIONS context switches, +** return 1 if more to do. +*/ + +/* + * schedule() is called from BEAM (process_main()) or HiPE + * (hipe_mode_switch()) when the current process is to be + * replaced by a new process. 'calls' is the number of reduction + * steps the current process consumed. + * schedule() returns the new process, and the new process' + * ->fcalls field is initialised with its allowable number of + * reduction steps. + * + * When no process is runnable, or when sufficiently many reduction + * steps have been made, schedule() calls erl_sys_schedule() to + * schedule system-level activities. + * + * We use the same queue for normal and low prio processes. + * We reschedule low prio processes a certain number of times + * so that normal processes get to run more frequently. + */ + +Process *schedule(Process *p, int calls) +{ + ErtsRunQueue *rq; + ErtsRunPrioQueue *rpq; + long dt; + ErtsSchedulerData *esdp; + int context_reds; + long fcalls; + int input_reductions; + int actual_reds; + int reds; + + if (ERTS_USE_MODIFIED_TIMING()) { + context_reds = ERTS_MODIFIED_TIMING_CONTEXT_REDS; + input_reductions = ERTS_MODIFIED_TIMING_INPUT_REDS; + } + else { + context_reds = CONTEXT_REDS; + input_reductions = INPUT_REDUCTIONS; + } + + ERTS_SMP_LC_ASSERT(!ERTS_LC_IS_BLOCKING); + + /* + * Clean up after the process being scheduled out. + */ + if (!p) { /* NULL in the very first schedule() call */ + esdp = erts_get_scheduler_data(); + rq = erts_get_runq_current(esdp); + ASSERT(esdp); + fcalls = erts_smp_atomic_read(&function_calls); + actual_reds = reds = 0; + erts_smp_runq_lock(rq); + } else { +#ifdef ERTS_SMP + ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); + esdp = p->scheduler_data; + ASSERT(esdp->current_process == p + || esdp->free_process == p); +#else + esdp = erts_scheduler_data; + ASSERT(esdp->current_process == p); +#endif + reds = actual_reds = calls - esdp->virtual_reds; + if (reds < ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST) + reds = ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST; + esdp->virtual_reds = 0; + + fcalls = erts_smp_atomic_addtest(&function_calls, reds); + ASSERT(esdp && esdp == erts_get_scheduler_data()); + + rq = erts_get_runq_current(esdp); + + p->reds += actual_reds; + + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + + if ((erts_system_profile_flags.runnable_procs) + && (p->status == P_WAITING)) { + profile_runnable_proc(p, am_inactive); + } + + if (IS_TRACED(p)) { + switch (p->status) { + case P_EXITING: + if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT)) + trace_sched(p, am_out_exiting); + break; + case P_FREE: + if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT)) + trace_sched(p, am_out_exited); + break; + default: + if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED)) + trace_sched(p, am_out); + else if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_PROCS)) + trace_virtual_sched(p, am_out); + break; + } + } + +#ifdef ERTS_SMP + if (ERTS_PROC_PENDING_EXIT(p)) { + erts_handle_pending_exit(p, + ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + p->status_flags |= ERTS_PROC_SFLG_PENDADD2SCHEDQ; + } + + if (p->pending_suspenders) { + handle_pending_suspend(p, + ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + ASSERT(!(p->status_flags & ERTS_PROC_SFLG_PENDADD2SCHEDQ) + || p->status != P_SUSPENDED); + } +#endif + erts_smp_runq_lock(rq); + + ERTS_PROC_REDUCTIONS_EXECUTED(rq, p->prio, reds, actual_reds); + + esdp->current_process = NULL; +#ifdef ERTS_SMP + p->scheduler_data = NULL; + p->runq_flags &= ~ERTS_PROC_RUNQ_FLG_RUNNING; + p->status_flags &= ~ERTS_PROC_SFLG_RUNNING; + + if (p->status_flags & ERTS_PROC_SFLG_PENDADD2SCHEDQ) { + p->status_flags &= ~ERTS_PROC_SFLG_PENDADD2SCHEDQ; + internal_add_to_runq(rq, p); + } +#endif + + + if (p->status == P_FREE) { +#ifdef ERTS_SMP + ASSERT(esdp->free_process == p); + esdp->free_process = NULL; + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + erts_smp_proc_dec_refc(p); +#else + erts_free_proc(p); +#endif + } else { + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + } + +#ifdef ERTS_SMP + { + ErtsProcList *pnd_xtrs = rq->procs.pending_exiters; + rq->procs.pending_exiters = NULL; + + if (pnd_xtrs) { + erts_smp_runq_unlock(rq); + handle_pending_exiters(pnd_xtrs); + erts_smp_runq_lock(rq); + } + + } + ASSERT(!esdp->free_process); +#endif + ASSERT(!esdp->current_process); + + ERTS_SMP_CHK_NO_PROC_LOCKS; + + dt = do_time_read_and_reset(); + if (dt) { + erts_smp_runq_unlock(rq); + bump_timer(dt); + erts_smp_runq_lock(rq); + } + BM_STOP_TIMER(system); + + } + + ERTS_SMP_LC_ASSERT(!ERTS_LC_IS_BLOCKING); + check_activities_to_run: { + +#ifdef ERTS_SMP + + if (!(rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) + && rq->check_balance_reds <= 0) { + check_balance(rq); + } + + ERTS_SMP_LC_ASSERT(!ERTS_LC_IS_BLOCKING); + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + + if (rq->flags & ERTS_RUNQ_FLGS_IMMIGRATE_QMASK) + immigrate(rq); + + continue_check_activities_to_run: + + if (rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ + | ERTS_RUNQ_FLG_CHK_CPU_BIND + | ERTS_RUNQ_FLG_SUSPENDED)) { + if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED) + || erts_smp_atomic_read(&esdp->suspended)) { + suspend_scheduler(esdp); + } + if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND) + || erts_smp_atomic_read(&esdp->chk_cpu_bind)) { + check_cpu_bind(esdp); + } + } + +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN + if (esdp->check_children) { + esdp->check_children = 0; + erts_smp_runq_unlock(rq); + erts_check_children(); + erts_smp_runq_lock(rq); + } +#endif + + erts_smp_chk_system_block(prepare_for_block, + resume_after_block, + (void *) rq); + + ERTS_SMP_LC_ASSERT(!ERTS_LC_IS_BLOCKING); + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + +#endif + + ASSERT(rq->len == rq->procs.len + rq->ports.info.len); + +#ifndef ERTS_SMP + + if (rq->len == 0 && !rq->misc.start) + goto do_sys_schedule; + +#else /* ERTS_SMP */ + if (rq->len == 0 && !rq->misc.start) { + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + + rq->wakeup_other = 0; + rq->wakeup_other_reds = 0; + + empty_runq(rq); + + if (rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ + | ERTS_RUNQ_FLG_SUSPENDED)) { + if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED) + || erts_smp_atomic_read(&esdp->suspended)) { + non_empty_runq(rq); + goto continue_check_activities_to_run; + } + } + else if (!(rq->flags & ERTS_RUNQ_FLG_INACTIVE)) { + /* + * Check for ERTS_RUNQ_FLG_SUSPENDED has to be done + * after trying to steal a task. + */ + if (try_steal_task(rq) + || (rq->flags & ERTS_RUNQ_FLG_SUSPENDED)) { + non_empty_runq(rq); + goto continue_check_activities_to_run; + } + } + + if (prepare_for_sys_schedule()) { + erts_smp_atomic_set(&function_calls, 0); + fcalls = 0; + sched_sys_wait(esdp->no, rq); + erts_smp_atomic_set(&doing_sys_schedule, 0); + } + else { + /* If all schedulers are waiting, one of them *should* + be waiting in erl_sys_schedule() */ + sched_cnd_wait(esdp->no, rq); + } + + non_empty_runq(rq); + + goto check_activities_to_run; + } + else +#endif /* ERTS_SMP */ + if (fcalls > input_reductions && prepare_for_sys_schedule()) { + int runnable; + +#ifdef ERTS_SMP + runnable = 1; +#else + do_sys_schedule: + runnable = rq->len != 0; + if (!runnable) + sched_waiting_sys(esdp->no, rq); +#endif + + /* + * Schedule system-level activities. + */ + + erts_smp_atomic_set(&function_calls, 0); + fcalls = 0; + ASSERT(!erts_port_task_have_outstanding_io_tasks()); +#ifdef ERTS_SMP + /* erts_sys_schedule_interrupt(0); */ +#endif + erts_smp_runq_unlock(rq); + erl_sys_schedule(runnable); + dt = do_time_read_and_reset(); + if (dt) bump_timer(dt); +#ifdef ERTS_SMP + erts_smp_runq_lock(rq); + erts_smp_atomic_set(&doing_sys_schedule, 0); + goto continue_check_activities_to_run; +#else + if (!runnable) + sched_active_sys(esdp->no, rq); + goto check_activities_to_run; +#endif + } + + if (rq->misc.start) + exec_misc_ops(rq); + +#ifdef ERTS_SMP + { + int wo_reds = rq->wakeup_other_reds; + if (wo_reds) { + if (rq->len < 2) { + rq->wakeup_other -= ERTS_WAKEUP_OTHER_DEC*wo_reds; + if (rq->wakeup_other < 0) + rq->wakeup_other = 0; + } + else if (rq->wakeup_other < ERTS_WAKEUP_OTHER_LIMIT) + rq->wakeup_other += rq->len*wo_reds + ERTS_WAKEUP_OTHER_FIXED_INC; + else { + if (erts_common_run_queue) { + if (erts_common_run_queue->waiting) + wake_one_scheduler(); + } + else if (erts_smp_atomic_read(&no_empty_run_queues) != 0) { + wake_scheduler_on_empty_runq(rq); + rq->wakeup_other = 0; + } + rq->wakeup_other = 0; + } + } + rq->wakeup_other_reds = 0; + } +#endif + + /* + * Find a new port to run. + */ + + if (rq->ports.info.len) { + int have_outstanding_io; + have_outstanding_io = erts_port_task_execute(rq, &esdp->current_port); + if (have_outstanding_io && fcalls > 2*input_reductions) { + /* + * If we have performed more than 2*INPUT_REDUCTIONS since + * last call to erl_sys_schedule() and we still haven't + * handled all I/O tasks we stop running processes and + * focus completely on ports. + * + * One could argue that this is a strange behavior. The + * reason for doing it this way is that it is similar + * to the behavior before port tasks were introduced. + * We don't want to change the behavior too much, at + * least not at the time of writing. This behavior + * might change in the future. + * + * /rickard + */ + goto check_activities_to_run; + } + } + + /* + * Find a new process to run. + */ + pick_next_process: + + ERTS_DBG_CHK_PROCS_RUNQ(rq); + + switch (rq->flags & ERTS_RUNQ_FLGS_PROCS_QMASK) { + case MAX_BIT: + case MAX_BIT|HIGH_BIT: + case MAX_BIT|NORMAL_BIT: + case MAX_BIT|LOW_BIT: + case MAX_BIT|HIGH_BIT|NORMAL_BIT: + case MAX_BIT|HIGH_BIT|LOW_BIT: + case MAX_BIT|NORMAL_BIT|LOW_BIT: + case MAX_BIT|HIGH_BIT|NORMAL_BIT|LOW_BIT: + rpq = &rq->procs.prio[PRIORITY_MAX]; + break; + case HIGH_BIT: + case HIGH_BIT|NORMAL_BIT: + case HIGH_BIT|LOW_BIT: + case HIGH_BIT|NORMAL_BIT|LOW_BIT: + rpq = &rq->procs.prio[PRIORITY_HIGH]; + break; + case NORMAL_BIT: + rpq = &rq->procs.prio[PRIORITY_NORMAL]; + break; + case LOW_BIT: + rpq = &rq->procs.prio[PRIORITY_NORMAL]; + break; + case NORMAL_BIT|LOW_BIT: + rpq = &rq->procs.prio[PRIORITY_NORMAL]; + ASSERT(rpq->first != NULL); + p = rpq->first; + if (p->prio == PRIORITY_LOW) { + if (p == rpq->last || p->skipped >= RESCHEDULE_LOW-1) + p->skipped = 0; + else { + /* skip it */ + p->skipped++; + rpq->first = p->next; + rpq->first->prev = NULL; + rpq->last->next = p; + p->prev = rpq->last; + p->next = NULL; + rpq->last = p; + goto pick_next_process; + } + } + break; + case 0: /* No process at all */ + default: + ASSERT((rq->flags & ERTS_RUNQ_FLGS_PROCS_QMASK) == 0); + ASSERT(rq->procs.len == 0); + goto check_activities_to_run; + } + + BM_START_TIMER(system); + + /* + * Take the chosen process out of the queue. + */ + ASSERT(rpq->first); /* Wrong qmask in rq->flags? */ + p = rpq->first; +#ifdef ERTS_SMP + ERTS_SMP_LC_ASSERT(rq == p->run_queue); +#endif + rpq->first = p->next; + if (!rpq->first) + rpq->last = NULL; + else + rpq->first->prev = NULL; + + p->next = p->prev = NULL; + + if (--rq->procs.prio_info[p->prio].len == 0) + rq->flags &= ~(1 << p->prio); + ASSERT(rq->procs.len > 0); + rq->procs.len--; + ASSERT(rq->len > 0); + rq->len--; + + { + Uint32 ee_flgs = (ERTS_RUNQ_FLG_EVACUATE(p->prio) + | ERTS_RUNQ_FLG_EMIGRATE(p->prio)); + + if ((rq->flags & (ERTS_RUNQ_FLG_SUSPENDED|ee_flgs)) == ee_flgs) + ERTS_UNSET_RUNQ_FLG_EVACUATE(rq->flags, p->prio); + } + + ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(rq, p); + + rq->procs.context_switches++; + + esdp->current_process = p; + +#ifdef ERTS_SMP + p->runq_flags |= ERTS_PROC_RUNQ_FLG_RUNNING; + erts_smp_runq_unlock(rq); + + ERTS_SMP_CHK_NO_PROC_LOCKS; + + erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + + if (erts_sched_stat.enabled) { + Uint old = ERTS_PROC_SCHED_ID(p, + (ERTS_PROC_LOCK_MAIN + | ERTS_PROC_LOCK_STATUS), + esdp->no); + int migrated = old && old != esdp->no; + + erts_smp_spin_lock(&erts_sched_stat.lock); + erts_sched_stat.prio[p->prio].total_executed++; + erts_sched_stat.prio[p->prio].executed++; + if (migrated) { + erts_sched_stat.prio[p->prio].total_migrated++; + erts_sched_stat.prio[p->prio].migrated++; + } + erts_smp_spin_unlock(&erts_sched_stat.lock); + } + + p->status_flags |= ERTS_PROC_SFLG_RUNNING; + p->status_flags &= ~ERTS_PROC_SFLG_INRUNQ; + if (ERTS_PROC_PENDING_EXIT(p)) { + erts_handle_pending_exit(p, + ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + } + ASSERT(!p->scheduler_data); + p->scheduler_data = esdp; + +#endif + ASSERT(p->status != P_SUSPENDED); /* Never run a suspended process */ + + ACTIVATE(p); + reds = context_reds; + + if (IS_TRACED(p)) { + switch (p->status) { + case P_EXITING: + if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT)) + trace_sched(p, am_in_exiting); + break; + default: + if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED)) + trace_sched(p, am_in); + else if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_PROCS)) + trace_virtual_sched(p, am_in); + break; + } + } + if (p->status != P_EXITING) + p->status = P_RUNNING; + + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + +#ifdef ERTS_SMP + if (is_not_nil(p->tracer_proc)) + erts_check_my_tracer_proc(p); +#endif + + if ((FLAGS(p) & F_FORCE_GC) || (MSO(p).overhead >= BIN_VHEAP_SZ(p))) { + reds -= erts_garbage_collect(p, 0, p->arg_reg, p->arity); + if (reds < 0) { + reds = 1; + } + } + + p->fcalls = reds; + ASSERT(IS_ACTIVE(p)); + ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); + return p; + } +} + +void +erts_sched_stat_modify(int what) +{ + int ix; + switch (what) { + case ERTS_SCHED_STAT_MODIFY_ENABLE: + erts_smp_block_system(0); + erts_sched_stat.enabled = 1; + erts_smp_release_system(); + break; + case ERTS_SCHED_STAT_MODIFY_DISABLE: + erts_smp_block_system(0); + erts_sched_stat.enabled = 1; + erts_smp_release_system(); + break; + case ERTS_SCHED_STAT_MODIFY_CLEAR: + erts_smp_spin_lock(&erts_sched_stat.lock); + for (ix = 0; ix < ERTS_NO_PRIO_LEVELS; ix++) { + erts_sched_stat.prio[ix].total_executed = 0; + erts_sched_stat.prio[ix].executed = 0; + erts_sched_stat.prio[ix].total_migrated = 0; + erts_sched_stat.prio[ix].migrated = 0; + } + erts_smp_spin_unlock(&erts_sched_stat.lock); + break; + } +} + +Eterm +erts_sched_stat_term(Process *p, int total) +{ + Uint sz; + Uint *hp; + Eterm prio[ERTS_NO_PRIO_LEVELS]; + Uint executed[ERTS_NO_PRIO_LEVELS]; + Uint migrated[ERTS_NO_PRIO_LEVELS]; + + erts_smp_spin_lock(&erts_sched_stat.lock); + if (total) { + int i; + for (i = 0; i < ERTS_NO_PRIO_LEVELS; i++) { + prio[i] = erts_sched_stat.prio[i].name; + executed[i] = erts_sched_stat.prio[i].total_executed; + migrated[i] = erts_sched_stat.prio[i].total_migrated; + } + } + else { + int i; + for (i = 0; i < ERTS_NO_PRIO_LEVELS; i++) { + prio[i] = erts_sched_stat.prio[i].name; + executed[i] = erts_sched_stat.prio[i].executed; + erts_sched_stat.prio[i].executed = 0; + migrated[i] = erts_sched_stat.prio[i].migrated; + erts_sched_stat.prio[i].migrated = 0; + } + } + erts_smp_spin_unlock(&erts_sched_stat.lock); + + sz = 0; + (void) erts_bld_atom_2uint_3tup_list(NULL, &sz, ERTS_NO_PRIO_LEVELS, + prio, executed, migrated); + hp = HAlloc(p, sz); + return erts_bld_atom_2uint_3tup_list(&hp, NULL, ERTS_NO_PRIO_LEVELS, + prio, executed, migrated); +} + +/* + * Scheduling of misc stuff + */ + +void +erts_schedule_misc_op(void (*func)(void *), void *arg) +{ + ErtsRunQueue *rq = erts_get_runq_current(NULL); + ErtsMiscOpList *molp = misc_op_list_alloc(); + + erts_smp_runq_lock(rq); + + while (rq->misc.evac_runq) { + ErtsRunQueue *tmp_rq = rq->misc.evac_runq; + erts_smp_runq_unlock(rq); + rq = tmp_rq; + erts_smp_runq_lock(rq); + } + + ASSERT(!(rq->flags & ERTS_RUNQ_FLG_SUSPENDED)); + + molp->next = NULL; + molp->func = func; + molp->arg = arg; + if (rq->misc.end) + rq->misc.end->next = molp; + else + rq->misc.start = molp; + rq->misc.end = molp; + smp_notify_inc_runq(rq); + erts_smp_runq_unlock(rq); +} + +static void +exec_misc_ops(ErtsRunQueue *rq) +{ + int i; + ErtsMiscOpList *molp = rq->misc.start; + ErtsMiscOpList *tmp_molp = molp; + + for (i = 0; i < ERTS_MAX_MISC_OPS-1; i++) { + if (!tmp_molp) + goto mtq; + tmp_molp = tmp_molp->next; + } + + if (!tmp_molp) { + mtq: + rq->misc.start = NULL; + rq->misc.end = NULL; + } + else { + rq->misc.start = tmp_molp->next; + tmp_molp->next = NULL; + if (!rq->misc.start) + rq->misc.end = NULL; + } + + erts_smp_runq_unlock(rq); + + while (molp) { + tmp_molp = molp; + (*molp->func)(molp->arg); + molp = molp->next; + misc_op_list_free(tmp_molp); + } + + erts_smp_runq_lock(rq); +} + +Uint +erts_get_total_context_switches(void) +{ + Uint res = 0; + ERTS_ATOMIC_FOREACH_RUNQ(rq, res += rq->procs.context_switches); + return res; +} + +void +erts_get_total_reductions(Uint *redsp, Uint *diffp) +{ + Uint reds = 0; + ERTS_ATOMIC_FOREACH_RUNQ_X(rq, + + reds += rq->procs.reductions, + + if (redsp) *redsp = reds; + if (diffp) *diffp = reds - last_reductions; + last_reductions = reds); +} + +void +erts_get_exact_total_reductions(Process *c_p, Uint *redsp, Uint *diffp) +{ + Uint reds = erts_current_reductions(c_p, c_p); + int ix; + erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_MAIN); + /* + * Wait for other schedulers to schedule out their processes + * and update 'reductions'. + */ + erts_smp_block_system(0); + for (reds = 0, ix = 0; ix < erts_no_run_queues; ix++) + reds += ERTS_RUNQ_IX(ix)->procs.reductions; + if (redsp) + *redsp = reds; + if (diffp) + *diffp = reds - last_exact_reductions; + last_exact_reductions = reds; + erts_smp_release_system(); + erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_MAIN); +} + +/* + * erts_test_next_pid() is only used for testing. + */ +Sint +erts_test_next_pid(int set, Uint next) +{ + Sint res; + Sint p_prev; + + + erts_smp_mtx_lock(&proc_tab_mtx); + + if (!set) { + res = p_next < 0 ? -1 : (p_serial << p_serial_shift | p_next); + } + else { + + p_serial = (Sint) ((next >> p_serial_shift) & p_serial_mask); + p_next = (Sint) (erts_process_tab_index_mask & next); + + if (p_next >= erts_max_processes) { + p_next = 0; + p_serial++; + p_serial &= p_serial_mask; + } + + p_prev = p_next; + + do { + if (!process_tab[p_next]) + break; + p_next++; + if(p_next >= erts_max_processes) { + p_next = 0; + p_serial++; + p_serial &= p_serial_mask; + } + } while (p_prev != p_next); + + res = process_tab[p_next] ? -1 : (p_serial << p_serial_shift | p_next); + + } + + erts_smp_mtx_unlock(&proc_tab_mtx); + + return res; + +} + +Uint erts_process_count(void) +{ + long res = erts_smp_atomic_read(&process_count); + ASSERT(res >= 0); + return (Uint) res; +} + +void +erts_free_proc(Process *p) +{ +#if defined(ERTS_ENABLE_LOCK_COUNT) && defined(ERTS_SMP) + erts_lcnt_proc_lock_destroy(p); +#endif + erts_free(ERTS_ALC_T_PROC, (void *) p); +} + + +/* +** Allocate process and find out where to place next process. +*/ +static Process* +alloc_process(void) +{ +#ifdef ERTS_SMP + erts_pix_lock_t *pix_lock; +#endif + Process* p; + int p_prev; + + erts_smp_mtx_lock(&proc_tab_mtx); + + if (p_next == -1) { + p = NULL; + goto error; /* Process table full! */ + } + + p = (Process*) erts_alloc_fnf(ERTS_ALC_T_PROC, sizeof(Process)); + if (!p) + goto error; /* ENOMEM */ + + p_last = p_next; + + erts_get_emu_time(&p->started); + +#ifdef ERTS_SMP + pix_lock = ERTS_PIX2PIXLOCK(p_next); + erts_pix_lock(pix_lock); +#endif + ASSERT(!process_tab[p_next]); + + process_tab[p_next] = p; + erts_smp_atomic_inc(&process_count); + p->id = make_internal_pid(p_serial << p_serial_shift | p_next); + if (p->id == ERTS_INVALID_PID) { + /* Do not use the invalid pid; change serial */ + p_serial++; + p_serial &= p_serial_mask; + p->id = make_internal_pid(p_serial << p_serial_shift | p_next); + ASSERT(p->id != ERTS_INVALID_PID); + } + ASSERT(internal_pid_serial(p->id) <= (erts_use_r9_pids_ports + ? ERTS_MAX_PID_R9_SERIAL + : ERTS_MAX_PID_SERIAL)); + +#ifdef ERTS_SMP + erts_proc_lock_init(p); /* All locks locked */ + erts_pix_unlock(pix_lock); +#endif + + p->rstatus = P_FREE; + p->rcount = 0; + + /* + * set p_next to the next available slot + */ + + p_prev = p_next; + + while (1) { + p_next++; + if(p_next >= erts_max_processes) { + p_serial++; + p_serial &= p_serial_mask; + p_next = 0; + } + + if (p_prev == p_next) { + p_next = -1; + break; /* Table full! */ + } + + if (!process_tab[p_next]) + break; /* found a free slot */ + } + + error: + + erts_smp_mtx_unlock(&proc_tab_mtx); + + return p; + +} + +Eterm +erl_create_process(Process* parent, /* Parent of process (default group leader). */ + Eterm mod, /* Tagged atom for module. */ + Eterm func, /* Tagged atom for function. */ + Eterm args, /* Arguments for function (must be well-formed list). */ + ErlSpawnOpts* so) /* Options for spawn. */ +{ + ErtsRunQueue *rq; + Process *p; + Sint arity; /* Number of arguments. */ +#ifndef HYBRID + Uint arg_size; /* Size of arguments. */ +#endif + Uint sz; /* Needed words on heap. */ + Uint heap_need; /* Size needed on heap. */ + Eterm res = THE_NON_VALUE; + +#ifdef ERTS_SMP + erts_smp_proc_lock(parent, ERTS_PROC_LOCKS_ALL_MINOR); +#endif + +#ifdef HYBRID + /* + * Copy the arguments to the global heap + * Since global GC might occur we want to do this before adding the + * new process to the process_tab. + */ + BM_SWAP_TIMER(system,copy); + LAZY_COPY(parent,args); + BM_SWAP_TIMER(copy,system); + heap_need = 0; +#endif /* HYBRID */ + /* + * Check for errors. + */ + + if (is_not_atom(mod) || is_not_atom(func) || ((arity = list_length(args)) < 0)) { + so->error_code = BADARG; + goto error; + } + p = alloc_process(); /* All proc locks are locked by this thread + on success */ + if (!p) { + erts_send_error_to_logger_str(parent->group_leader, + "Too many processes\n"); + so->error_code = SYSTEM_LIMIT; + goto error; + } + + processes_busy++; + BM_COUNT(processes_spawned); + +#ifndef HYBRID + BM_SWAP_TIMER(system,size); + arg_size = size_object(args); + BM_SWAP_TIMER(size,system); + heap_need = arg_size; +#endif + + p->flags = erts_default_process_flags; + + /* Scheduler queue mutex should be locked when changeing + * prio. In this case we don't have to lock it, since + * noone except us has access to the process. + */ + if (so->flags & SPO_USE_ARGS) { + p->min_heap_size = so->min_heap_size; + p->prio = so->priority; + p->max_gen_gcs = so->max_gen_gcs; + } else { + p->min_heap_size = H_MIN_SIZE; + p->prio = PRIORITY_NORMAL; + p->max_gen_gcs = (Uint16) erts_smp_atomic_read(&erts_max_gen_gcs); + } + p->skipped = 0; + ASSERT(p->min_heap_size == erts_next_heap_size(p->min_heap_size, 0)); + + p->initial[INITIAL_MOD] = mod; + p->initial[INITIAL_FUN] = func; + p->initial[INITIAL_ARI] = (Uint) arity; + + /* + * Must initialize binary lists here before copying binaries to process. + */ + p->off_heap.mso = NULL; +#ifndef HYBRID /* FIND ME! */ + p->off_heap.funs = NULL; +#endif + p->off_heap.externals = NULL; + p->off_heap.overhead = 0; + + heap_need += + IS_CONST(parent->group_leader) ? 0 : NC_HEAP_SIZE(parent->group_leader); + + if (heap_need < p->min_heap_size) { + sz = heap_need = p->min_heap_size; + } else { + sz = erts_next_heap_size(heap_need, 0); + } + +#ifdef HIPE + hipe_init_process(&p->hipe); +#ifdef ERTS_SMP + hipe_init_process_smp(&p->hipe_smp); +#endif +#endif + + p->heap = (Eterm *) ERTS_HEAP_ALLOC(ERTS_ALC_T_HEAP, sizeof(Eterm)*sz); + p->old_hend = p->old_htop = p->old_heap = NULL; + p->high_water = p->heap; +#ifdef INCREMENTAL + p->scan_top = p->high_water; +#endif + p->gen_gcs = 0; + p->stop = p->hend = p->heap + sz; + p->htop = p->heap; + p->heap_sz = sz; + p->catches = 0; + + p->bin_vheap_sz = H_MIN_SIZE; + p->bin_old_vheap_sz = H_MIN_SIZE; + p->bin_old_vheap = 0; + + /* No need to initialize p->fcalls. */ + + p->current = p->initial+INITIAL_MOD; + + p->i = (Eterm *) beam_apply; + p->cp = (Eterm *) beam_apply+1; + + p->arg_reg = p->def_arg_reg; + p->max_arg_reg = sizeof(p->def_arg_reg)/sizeof(p->def_arg_reg[0]); + p->arg_reg[0] = mod; + p->arg_reg[1] = func; + BM_STOP_TIMER(system); + BM_MESSAGE(args,p,parent); + BM_START_TIMER(system); +#ifdef HYBRID + p->arg_reg[2] = args; +#ifdef INCREMENTAL + p->active = 0; + if (ptr_val(args) >= inc_fromspc && ptr_val(args) < inc_fromend) + INC_ACTIVATE(p); +#endif +#else + BM_SWAP_TIMER(system,copy); + p->arg_reg[2] = copy_struct(args, arg_size, &p->htop, &p->off_heap); + BM_MESSAGE_COPIED(arg_size); + BM_SWAP_TIMER(copy,system); +#endif + p->arity = 3; + + p->fvalue = NIL; + p->freason = EXC_NULL; + p->ftrace = NIL; + p->reds = 0; + +#ifdef ERTS_SMP + p->u.ptimer = NULL; +#else + sys_memset(&p->u.tm, 0, sizeof(ErlTimer)); +#endif + + p->reg = NULL; + p->nlinks = NULL; + p->monitors = NULL; + p->nodes_monitors = NULL; + p->suspend_monitors = NULL; + + ASSERT(is_pid(parent->group_leader)); + + if (parent->group_leader == ERTS_INVALID_PID) + p->group_leader = p->id; + else { + /* Needs to be done after the heap has been set up */ + p->group_leader = + IS_CONST(parent->group_leader) + ? parent->group_leader + : STORE_NC(&p->htop, &p->off_heap.externals, parent->group_leader); + } + + erts_get_default_tracing(&p->trace_flags, &p->tracer_proc); + + p->msg.first = NULL; + p->msg.last = &p->msg.first; + p->msg.save = &p->msg.first; + p->msg.len = 0; +#ifdef ERTS_SMP + p->msg_inq.first = NULL; + p->msg_inq.last = &p->msg_inq.first; + p->msg_inq.len = 0; + p->bound_runq = NULL; +#endif + p->bif_timers = NULL; + p->mbuf = NULL; + p->mbuf_sz = 0; + p->psd = NULL; + p->dictionary = NULL; + p->seq_trace_lastcnt = 0; + p->seq_trace_clock = 0; + SEQ_TRACE_TOKEN(p) = NIL; + p->parent = parent->id == ERTS_INVALID_PID ? NIL : parent->id; + +#ifdef HYBRID + p->rrma = NULL; + p->rrsrc = NULL; + p->nrr = 0; + p->rrsz = 0; +#endif + + INIT_HOLE_CHECK(p); +#ifdef DEBUG + p->last_old_htop = NULL; +#endif + + if (IS_TRACED(parent)) { + if (parent->trace_flags & F_TRACE_SOS) { + p->trace_flags |= (parent->trace_flags & TRACEE_FLAGS); + p->tracer_proc = parent->tracer_proc; + } + if (ARE_TRACE_FLAGS_ON(parent, F_TRACE_PROCS)) { + trace_proc_spawn(parent, p->id, mod, func, args); + } + if (parent->trace_flags & F_TRACE_SOS1) { /* Overrides TRACE_CHILDREN */ + p->trace_flags |= (parent->trace_flags & TRACEE_FLAGS); + p->tracer_proc = parent->tracer_proc; + p->trace_flags &= ~(F_TRACE_SOS1 | F_TRACE_SOS); + parent->trace_flags &= ~(F_TRACE_SOS1 | F_TRACE_SOS); + } + } + + /* + * Check if this process should be initially linked to its parent. + */ + + if (so->flags & SPO_LINK) { +#ifdef DEBUG + int ret; +#endif + if (IS_TRACED_FL(parent, F_TRACE_PROCS)) { + trace_proc(parent, parent, am_link, p->id); + } + +#ifdef DEBUG + ret = erts_add_link(&(parent->nlinks), LINK_PID, p->id); + ASSERT(ret == 0); + ret = erts_add_link(&(p->nlinks), LINK_PID, parent->id); + ASSERT(ret == 0); +#else + erts_add_link(&(parent->nlinks), LINK_PID, p->id); + erts_add_link(&(p->nlinks), LINK_PID, parent->id); +#endif + + if (IS_TRACED(parent)) { + if (parent->trace_flags & (F_TRACE_SOL|F_TRACE_SOL1)) { + p->trace_flags |= (parent->trace_flags & TRACEE_FLAGS); + p->tracer_proc = parent->tracer_proc; /* maybe steal */ + + if (parent->trace_flags & F_TRACE_SOL1) { /* maybe override */ + p ->trace_flags &= ~(F_TRACE_SOL1 | F_TRACE_SOL); + parent->trace_flags &= ~(F_TRACE_SOL1 | F_TRACE_SOL); + } + } + } + } + + /* + * Test whether this process should be initially monitored by its parent. + */ + if (so->flags & SPO_MONITOR) { + Eterm mref; + + mref = erts_make_ref(parent); + erts_add_monitor(&(parent->monitors), MON_ORIGIN, mref, p->id, NIL); + erts_add_monitor(&(p->monitors), MON_TARGET, mref, parent->id, NIL); + so->mref = mref; + } + +#ifdef HYBRID + /* + * Add process to the array of active processes. + */ + ACTIVATE(p); + p->active_index = erts_num_active_procs++; + erts_active_procs[p->active_index] = p; +#endif + +#ifdef ERTS_SMP + p->scheduler_data = NULL; + p->is_exiting = 0; + p->status_flags = 0; + p->runq_flags = 0; + p->suspendee = NIL; + p->pending_suspenders = NULL; + p->pending_exit.reason = THE_NON_VALUE; + p->pending_exit.bp = NULL; +#endif + +#if !defined(NO_FPE_SIGNALS) + p->fp_exception = 0; +#endif + + /* + * Schedule process for execution. + */ + + if (!((so->flags & SPO_USE_ARGS) && so->scheduler)) + rq = erts_get_runq_proc(parent); + else { + int ix = so->scheduler-1; + ASSERT(0 <= ix && ix < erts_no_run_queues); + rq = ERTS_RUNQ_IX(ix); + p->bound_runq = rq; + } + + erts_smp_runq_lock(rq); + +#ifdef ERTS_SMP + p->run_queue = rq; +#endif + + p->status = P_WAITING; + internal_add_to_runq(rq, p); + + erts_smp_runq_unlock(rq); + + res = p->id; + erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); + + VERBOSE(DEBUG_PROCESSES, ("Created a new process: %T\n",p->id)); + + error: + + erts_smp_proc_unlock(parent, ERTS_PROC_LOCKS_ALL_MINOR); + + return res; +} + +/* + * Initiates a pseudo process that can be used + * for arithmetic BIFs. + */ + +void erts_init_empty_process(Process *p) +{ + p->htop = NULL; + p->stop = NULL; + p->hend = NULL; + p->heap = NULL; + p->gen_gcs = 0; + p->max_gen_gcs = 0; + p->min_heap_size = 0; + p->status = P_RUNABLE; + p->gcstatus = P_RUNABLE; + p->rstatus = P_RUNABLE; + p->rcount = 0; + p->id = ERTS_INVALID_PID; + p->prio = PRIORITY_NORMAL; + p->reds = 0; + p->tracer_proc = NIL; + p->trace_flags = F_INITIAL_TRACE_FLAGS; + p->group_leader = ERTS_INVALID_PID; + p->flags = 0; + p->fvalue = NIL; + p->freason = EXC_NULL; + p->ftrace = NIL; + p->fcalls = 0; + + p->bin_vheap_sz=H_MIN_SIZE; + p->bin_old_vheap_sz=H_MIN_SIZE; + p->bin_old_vheap = 0; +#ifdef ERTS_SMP + p->u.ptimer = NULL; + p->bound_runq = NULL; +#else + memset(&(p->u.tm), 0, sizeof(ErlTimer)); +#endif + p->next = NULL; + p->off_heap.mso = NULL; +#ifndef HYBRID /* FIND ME! */ + p->off_heap.funs = NULL; +#endif + p->off_heap.externals = NULL; + p->off_heap.overhead = 0; + p->reg = NULL; + p->heap_sz = 0; + p->high_water = NULL; +#ifdef INCREMENTAL + p->scan_top = NULL; +#endif + p->old_hend = NULL; + p->old_htop = NULL; + p->old_heap = NULL; + p->mbuf = NULL; + p->mbuf_sz = 0; + p->psd = NULL; + p->monitors = NULL; + p->nlinks = NULL; /* List of links */ + p->nodes_monitors = NULL; + p->suspend_monitors = NULL; + p->msg.first = NULL; + p->msg.last = &p->msg.first; + p->msg.save = &p->msg.first; + p->msg.len = 0; + p->bif_timers = NULL; + p->dictionary = NULL; + p->seq_trace_clock = 0; + p->seq_trace_lastcnt = 0; + p->seq_trace_token = NIL; + p->initial[0] = 0; + p->initial[1] = 0; + p->initial[2] = 0; + p->catches = 0; + p->cp = NULL; + p->i = NULL; + p->current = NULL; + + /* + * Saved x registers. + */ + p->arity = 0; + p->arg_reg = NULL; + p->max_arg_reg = 0; + p->def_arg_reg[0] = 0; + p->def_arg_reg[1] = 0; + p->def_arg_reg[2] = 0; + p->def_arg_reg[3] = 0; + p->def_arg_reg[4] = 0; + p->def_arg_reg[5] = 0; + + p->parent = NIL; + p->started.tv_sec = 0; + p->started.tv_usec = 0; + +#ifdef HIPE + hipe_init_process(&p->hipe); +#ifdef ERTS_SMP + hipe_init_process_smp(&p->hipe_smp); +#endif +#endif + + ACTIVATE(p); + +#ifdef HYBRID + p->rrma = NULL; + p->rrsrc = NULL; + p->nrr = 0; + p->rrsz = 0; +#endif + INIT_HOLE_CHECK(p); +#ifdef DEBUG + p->last_old_htop = NULL; +#endif + + +#ifdef ERTS_SMP + p->scheduler_data = NULL; + p->is_exiting = 0; + p->status_flags = 0; + p->runq_flags = 0; + p->msg_inq.first = NULL; + p->msg_inq.last = &p->msg_inq.first; + p->msg_inq.len = 0; + p->suspendee = NIL; + p->pending_suspenders = NULL; + p->pending_exit.reason = THE_NON_VALUE; + p->pending_exit.bp = NULL; + erts_proc_lock_init(p); + erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); + p->run_queue = ERTS_RUNQ_IX(0); +#endif + +#if !defined(NO_FPE_SIGNALS) + p->fp_exception = 0; +#endif + +} + +#ifdef DEBUG + +void +erts_debug_verify_clean_empty_process(Process* p) +{ + /* Things that erts_cleanup_empty_process() will *not* cleanup... */ + ASSERT(p->htop == NULL); + ASSERT(p->stop == NULL); + ASSERT(p->hend == NULL); + ASSERT(p->heap == NULL); + ASSERT(p->id == ERTS_INVALID_PID); + ASSERT(p->tracer_proc == NIL); + ASSERT(p->trace_flags == F_INITIAL_TRACE_FLAGS); + ASSERT(p->group_leader == ERTS_INVALID_PID); + ASSERT(p->next == NULL); + ASSERT(p->reg == NULL); + ASSERT(p->heap_sz == 0); + ASSERT(p->high_water == NULL); +#ifdef INCREMENTAL + ASSERT(p->scan_top == NULL); +#endif + ASSERT(p->old_hend == NULL); + ASSERT(p->old_htop == NULL); + ASSERT(p->old_heap == NULL); + + ASSERT(p->monitors == NULL); + ASSERT(p->nlinks == NULL); + ASSERT(p->nodes_monitors == NULL); + ASSERT(p->suspend_monitors == NULL); + ASSERT(p->msg.first == NULL); + ASSERT(p->msg.len == 0); + ASSERT(p->bif_timers == NULL); + ASSERT(p->dictionary == NULL); + ASSERT(p->catches == 0); + ASSERT(p->cp == NULL); + ASSERT(p->i == NULL); + ASSERT(p->current == NULL); + + ASSERT(p->parent == NIL); + +#ifdef ERTS_SMP + ASSERT(p->msg_inq.first == NULL); + ASSERT(p->msg_inq.len == 0); + ASSERT(p->suspendee == NIL); + ASSERT(p->pending_suspenders == NULL); + ASSERT(p->pending_exit.reason == THE_NON_VALUE); + ASSERT(p->pending_exit.bp == NULL); +#endif + + /* Thing that erts_cleanup_empty_process() cleans up */ + + ASSERT(p->off_heap.mso == NULL); +#ifndef HYBRID /* FIND ME! */ + ASSERT(p->off_heap.funs == NULL); +#endif + ASSERT(p->off_heap.externals == NULL); + ASSERT(p->off_heap.overhead == 0); + + ASSERT(p->mbuf == NULL); +} + +#endif + +void +erts_cleanup_empty_process(Process* p) +{ + ErlHeapFragment* mbufp; + + /* We only check fields that are known to be used... */ + + erts_cleanup_offheap(&p->off_heap); + p->off_heap.mso = NULL; +#ifndef HYBRID /* FIND ME! */ + p->off_heap.funs = NULL; +#endif + p->off_heap.externals = NULL; + p->off_heap.overhead = 0; + + mbufp = p->mbuf; + while (mbufp) { + ErlHeapFragment *next = mbufp->next; + free_message_buffer(mbufp); + mbufp = next; + } + p->mbuf = NULL; +#if defined(ERTS_ENABLE_LOCK_COUNT) && defined(ERTS_SMP) + erts_lcnt_proc_lock_destroy(p); +#endif +#ifdef DEBUG + erts_debug_verify_clean_empty_process(p); +#endif +} + +/* + * p must be the currently executing process. + */ +static void +delete_process(Process* p) +{ + ErlMessage* mp; + ErlHeapFragment* bp; + + VERBOSE(DEBUG_PROCESSES, ("Removing process: %T\n",p->id)); + + /* Cleanup psd */ + + if (p->psd) + erts_free(ERTS_ALC_T_PSD, p->psd); + + /* Clean binaries and funs */ + erts_cleanup_offheap(&p->off_heap); + + /* + * The mso list should not be used anymore, but if it is, make sure that + * we'll notice. + */ + p->off_heap.mso = (void *) 0x8DEFFACD; + + if (p->arg_reg != p->def_arg_reg) { + erts_free(ERTS_ALC_T_ARG_REG, p->arg_reg); + } + + /* + * Release heaps. Clobber contents in DEBUG build. + */ + + +#ifdef DEBUG + sys_memset(p->heap, DEBUG_BAD_BYTE, p->heap_sz*sizeof(Eterm)); +#endif + +#ifdef HIPE + hipe_delete_process(&p->hipe); +#endif + + ERTS_HEAP_FREE(ERTS_ALC_T_HEAP, (void*) p->heap, p->heap_sz*sizeof(Eterm)); + if (p->old_heap != NULL) { + +#ifdef DEBUG + sys_memset(p->old_heap, DEBUG_BAD_BYTE, + (p->old_hend-p->old_heap)*sizeof(Eterm)); +#endif + ERTS_HEAP_FREE(ERTS_ALC_T_OLD_HEAP, + p->old_heap, + (p->old_hend-p->old_heap)*sizeof(Eterm)); + } + + /* + * Free all pending message buffers. + */ + bp = p->mbuf; + while (bp != NULL) { + ErlHeapFragment* next_bp = bp->next; + free_message_buffer(bp); + bp = next_bp; + } + + erts_erase_dicts(p); + + /* free all pending messages */ + mp = p->msg.first; + while(mp != NULL) { + ErlMessage* next_mp = mp->next; + if (mp->data.attached) { + if (is_value(mp->m[0])) + free_message_buffer(mp->data.heap_frag); + else { + if (is_not_nil(mp->m[1])) { + ErlHeapFragment *heap_frag; + heap_frag = (ErlHeapFragment *) mp->data.dist_ext->ext_endp; + erts_cleanup_offheap(&heap_frag->off_heap); + } + erts_free_dist_ext_copy(mp->data.dist_ext); + } + } + free_message(mp); + mp = next_mp; + } + + ASSERT(!p->monitors); + ASSERT(!p->nlinks); + ASSERT(!p->nodes_monitors); + ASSERT(!p->suspend_monitors); + + p->fvalue = NIL; + +#ifdef HYBRID + erts_active_procs[p->active_index] = + erts_active_procs[--erts_num_active_procs]; + erts_active_procs[p->active_index]->active_index = p->active_index; +#ifdef INCREMENTAL + if (INC_IS_ACTIVE(p)) + INC_DEACTIVATE(p); +#endif + + if (p->rrma != NULL) { + erts_free(ERTS_ALC_T_ROOTSET,p->rrma); + erts_free(ERTS_ALC_T_ROOTSET,p->rrsrc); + } +#endif + +} + +static ERTS_INLINE void +set_proc_exiting(Process *p, Eterm reason, ErlHeapFragment *bp) +{ +#ifdef ERTS_SMP + erts_pix_lock_t *pix_lock = ERTS_PID2PIXLOCK(p->id); + ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(p) == ERTS_PROC_LOCKS_ALL); + /* + * You are required to have all proc locks and the pix lock when going + * to status P_EXITING. This makes it is enough to take any lock when + * looking up a process (pid2proc()) to prevent the looked up process + * from exiting until the lock has been released. + */ + + erts_pix_lock(pix_lock); + p->is_exiting = 1; +#endif + p->status = P_EXITING; +#ifdef ERTS_SMP + erts_pix_unlock(pix_lock); +#endif + p->fvalue = reason; + if (bp) + erts_link_mbuf_to_proc(p, bp); + /* + * We used to set freason to EXC_EXIT here, but there is no need to + * save the stack trace since this process irreversibly is going to + * exit. + */ + p->freason = EXTAG_EXIT; + KILL_CATCHES(p); + cancel_timer(p); + p->i = (Eterm *) beam_exit; +} + + +#ifdef ERTS_SMP + +void +erts_handle_pending_exit(Process *c_p, ErtsProcLocks locks) +{ + ErtsProcLocks xlocks; + ASSERT(is_value(c_p->pending_exit.reason)); + ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == locks); + ERTS_SMP_LC_ASSERT(locks & ERTS_PROC_LOCK_MAIN); + ERTS_SMP_LC_ASSERT(c_p->status != P_EXITING); + ERTS_SMP_LC_ASSERT(c_p->status != P_FREE); + + /* Ensure that all locks on c_p are locked before proceeding... */ + if (locks == ERTS_PROC_LOCKS_ALL) + xlocks = 0; + else { + xlocks = ~locks & ERTS_PROC_LOCKS_ALL; + if (erts_smp_proc_trylock(c_p, xlocks) == EBUSY) { + erts_smp_proc_unlock(c_p, locks & ~ERTS_PROC_LOCK_MAIN); + erts_smp_proc_lock(c_p, ERTS_PROC_LOCKS_ALL_MINOR); + } + } + + set_proc_exiting(c_p, c_p->pending_exit.reason, c_p->pending_exit.bp); + c_p->pending_exit.reason = THE_NON_VALUE; + c_p->pending_exit.bp = NULL; + + if (xlocks) + erts_smp_proc_unlock(c_p, xlocks); +} + +static void +handle_pending_exiters(ErtsProcList *pnd_xtrs) +{ + ErtsProcList *plp = pnd_xtrs; + ErtsProcList *free_plp; + while (plp) { + Process *p = erts_pid2proc(NULL, 0, plp->pid, ERTS_PROC_LOCKS_ALL); + if (p) { + if (proclist_same(plp, p) + && !(p->status_flags & ERTS_PROC_SFLG_RUNNING)) { + ASSERT(p->status_flags & ERTS_PROC_SFLG_INRUNQ); + ASSERT(ERTS_PROC_PENDING_EXIT(p)); + erts_handle_pending_exit(p, ERTS_PROC_LOCKS_ALL); + } + erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); + } + free_plp = plp; + plp = plp->next; + proclist_destroy(free_plp); + } +} + +static void +save_pending_exiter(Process *p) +{ + ErtsProcList *plp; + ErtsRunQueue *rq; + + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + + rq = erts_get_runq_current(NULL); + + plp = proclist_create(p); + + erts_smp_runq_lock(rq); + + plp->next = rq->procs.pending_exiters; + rq->procs.pending_exiters = plp; + + erts_smp_runq_unlock(rq); + +} + +#endif + +/* + * This function delivers an EXIT message to a process + * which is trapping EXITs. + */ + +static ERTS_INLINE void +send_exit_message(Process *to, ErtsProcLocks *to_locksp, + Eterm exit_term, Uint term_size, Eterm token) +{ + if (token == NIL) { + Eterm* hp; + Eterm mess; + ErlHeapFragment* bp; + ErlOffHeap *ohp; + + hp = erts_alloc_message_heap(term_size, &bp, &ohp, to, to_locksp); + mess = copy_struct(exit_term, term_size, &hp, ohp); + erts_queue_message(to, to_locksp, bp, mess, NIL); + } else { + ErlHeapFragment* bp; + Eterm* hp; + Eterm mess; + Eterm temp_token; + Uint sz_token; + + ASSERT(is_tuple(token)); + sz_token = size_object(token); + bp = new_message_buffer(term_size+sz_token); + hp = bp->mem; + mess = copy_struct(exit_term, term_size, &hp, &bp->off_heap); + /* the trace token must in this case be updated by the caller */ + seq_trace_output(token, mess, SEQ_TRACE_SEND, to->id, NULL); + temp_token = copy_struct(token, sz_token, &hp, &bp->off_heap); + erts_queue_message(to, to_locksp, bp, mess, temp_token); + } +} + +/* + * + * *** Exit signal behavior *** + * + * Exit signals are asynchronous (truly asynchronous in the + * SMP emulator). When the signal is received the receiver receives an + * 'EXIT' message if it is trapping exits; otherwise, it will either + * ignore the signal if the exit reason is normal, or go into an + * exiting state (status P_EXITING). When a process has gone into the + * exiting state it will not execute any more Erlang code, but it might + * take a while before it actually exits. The exit signal is being + * received when the 'EXIT' message is put in the message queue, the + * signal is dropped, or when it changes state into exiting. The time it + * is in the exiting state before actually exiting is undefined (it + * might take a really long time under certain conditions). The + * receiver of the exit signal does not break links or trigger monitors + * until it actually exits. + * + * Exit signals and other signals, e.g. messages, have to be received + * by a receiver in the same order as sent by a sender. + * + * + * + * Exit signal implementation in the SMP emulator: + * + * If the receiver is trapping exits, the signal is transformed + * into an 'EXIT' message and sent as a normal message, if the + * reason is normal the signal is dropped; otherwise, the process + * is determined to be exited. The interesting case is when the + * process is to be exited and this is what is described below. + * + * If it is possible, the receiver is set in the exiting state straight + * away and we are done; otherwise, the sender places the exit reason + * in the pending_exit field of the process struct and if necessary + * adds the receiver to the run queue. It is typically not possible + * to set a scheduled process or a process which we cannot get all locks + * on without releasing locks on it in an exiting state straight away. + * + * The receiver will poll the pending_exit field when it reach certain + * places during it's execution. When it discovers the pending exit + * it will change state into the exiting state. If the receiver wasn't + * scheduled when the pending exit was set, the first scheduler that + * schedules a new process will set the receiving process in the exiting + * state just before it schedules next process. + * + * When the exit signal is placed in the pending_exit field, the signal + * is considered as being in transit on the Erlang level. The signal is + * actually in some kind of semi transit state, since we have already + * determined how it should be received. It will exit the process no + * matter what if it is received (the process may exit by itself before + * reception of the exit signal). The signal is received when it is + * discovered in the pending_exit field by the receiver. + * + * The receiver have to poll the pending_exit field at least before: + * - moving messages from the message in queue to the private message + * queue. This in order to preserve signal order. + * - unlink. Otherwise the process might get exited on a link that + * have been removed. + * - changing the trap_exit flag to true. This in order to simplify the + * implementation; otherwise, we would have to transform the signal + * into an 'EXIT' message when setting the trap_exit flag to true. We + * would also have to maintain a queue of exit signals in transit. + * - being scheduled in or out. + */ + +static ERTS_INLINE int +send_exit_signal(Process *c_p, /* current process if and only + if reason is stored on it */ + Eterm from, /* Id of sender of signal */ + Process *rp, /* receiving process */ + ErtsProcLocks *rp_locks,/* current locks on receiver */ + Eterm reason, /* exit reason */ + Eterm exit_tuple, /* Prebuild exit tuple + or THE_NON_VALUE */ + Uint exit_tuple_sz, /* Size of prebuilt exit tuple + (if exit_tuple != THE_NON_VALUE) */ + Eterm token, /* token */ + Process *token_update, /* token updater */ + Uint32 flags /* flags */ + ) +{ + Eterm rsn = reason == am_kill ? am_killed : reason; + + ERTS_SMP_LC_ASSERT(*rp_locks == erts_proc_lc_my_proc_locks(rp)); + ERTS_SMP_LC_ASSERT((*rp_locks & ERTS_PROC_LOCKS_XSIG_SEND) + == ERTS_PROC_LOCKS_XSIG_SEND); + + ASSERT(reason != THE_NON_VALUE); + + if (ERTS_PROC_IS_TRAPPING_EXITS(rp) + && (reason != am_kill || (flags & ERTS_XSIG_FLG_IGN_KILL))) { + if (is_not_nil(token) && token_update) + seq_trace_update_send(token_update); + if (is_value(exit_tuple)) + send_exit_message(rp, rp_locks, exit_tuple, exit_tuple_sz, token); + else + erts_deliver_exit_message(from, rp, rp_locks, rsn, token); + return 1; /* Receiver will get a message */ + } + else if (reason != am_normal || (flags & ERTS_XSIG_FLG_NO_IGN_NORMAL)) { +#ifdef ERTS_SMP + if (!ERTS_PROC_PENDING_EXIT(rp) && !rp->is_exiting) { + ASSERT(rp->status != P_EXITING); + ASSERT(rp->status != P_FREE); + ASSERT(!rp->pending_exit.bp); + + if (rp == c_p && (*rp_locks & ERTS_PROC_LOCK_MAIN)) { + /* Ensure that all locks on c_p are locked before + proceeding... */ + if (*rp_locks != ERTS_PROC_LOCKS_ALL) { + ErtsProcLocks need_locks = (~(*rp_locks) + & ERTS_PROC_LOCKS_ALL); + if (erts_smp_proc_trylock(c_p, need_locks) == EBUSY) { + erts_smp_proc_unlock(c_p, + *rp_locks & ~ERTS_PROC_LOCK_MAIN); + erts_smp_proc_lock(c_p, ERTS_PROC_LOCKS_ALL_MINOR); + } + *rp_locks = ERTS_PROC_LOCKS_ALL; + } + set_proc_exiting(c_p, rsn, NULL); + } + else if (!(rp->status_flags & ERTS_PROC_SFLG_RUNNING)) { + /* Process not running ... */ + ErtsProcLocks need_locks = ~(*rp_locks) & ERTS_PROC_LOCKS_ALL; + if (need_locks + && erts_smp_proc_trylock(rp, need_locks) == EBUSY) { + /* ... but we havn't got all locks on it ... */ + save_pending_exiter(rp); + /* + * The pending exit will be discovered when next + * process is scheduled in + */ + goto set_pending_exit; + } + else { + /* ...and we have all locks on it... */ + *rp_locks = ERTS_PROC_LOCKS_ALL; + set_proc_exiting(rp, + (is_immed(rsn) + ? rsn + : copy_object(rsn, rp)), + NULL); + } + } + else { /* Process running... */ + + /* + * The pending exit will be discovered when the process + * is scheduled out if not discovered earlier. + */ + + set_pending_exit: + if (is_immed(rsn)) { + rp->pending_exit.reason = rsn; + } + else { + Eterm *hp; + Uint sz = size_object(rsn); + ErlHeapFragment *bp = new_message_buffer(sz); + + hp = &bp->mem[0]; + rp->pending_exit.reason = copy_struct(rsn, + sz, + &hp, + &bp->off_heap); + rp->pending_exit.bp = bp; + } + ASSERT(ERTS_PROC_PENDING_EXIT(rp)); + } + if (!(rp->status_flags + & (ERTS_PROC_SFLG_INRUNQ|ERTS_PROC_SFLG_RUNNING))) + erts_add_to_runq(rp); + } + /* else: + * + * The receiver already has a pending exit (or is exiting) + * so we drop this signal. + * + * NOTE: dropping this exit signal is based on the assumption + * that the receiver *will* exit; either on the pending + * exit or by itself before seeing the pending exit. + */ +#else /* !ERTS_SMP */ + if (c_p == rp) { + rp->status = P_EXITING; + c_p->fvalue = rsn; + } + else if (rp->status != P_EXITING) { /* No recursive process exits /PaN */ + Eterm old_status = rp->status; + set_proc_exiting(rp, + is_immed(rsn) ? rsn : copy_object(rsn, rp), + NULL); + ACTIVATE(rp); + if (old_status != P_RUNABLE && old_status != P_RUNNING) + erts_add_to_runq(rp); + } +#endif + return -1; /* Receiver will exit */ + } + + return 0; /* Receiver unaffected */ +} + + +int +erts_send_exit_signal(Process *c_p, + Eterm from, + Process *rp, + ErtsProcLocks *rp_locks, + Eterm reason, + Eterm token, + Process *token_update, + Uint32 flags) +{ + return send_exit_signal(c_p, + from, + rp, + rp_locks, + reason, + THE_NON_VALUE, + 0, + token, + token_update, + flags); +} + +typedef struct { + Eterm reason; + Process *p; +} ExitMonitorContext; + +static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) +{ + ExitMonitorContext *pcontext = vpcontext; + DistEntry *dep; + ErtsMonitor *rmon; + Process *rp; + + if (mon->type == MON_ORIGIN) { + /* We are monitoring someone else, we need to demonitor that one.. */ + if (is_atom(mon->pid)) { /* remote by name */ + ASSERT(is_node_name_atom(mon->pid)); + dep = erts_sysname_to_connected_dist_entry(mon->pid); + if (dep) { + erts_smp_de_links_lock(dep); + rmon = erts_remove_monitor(&(dep->monitors), mon->ref); + erts_smp_de_links_unlock(dep); + if (rmon) { + ErtsDSigData dsd; + int code = erts_dsig_prepare(&dsd, dep, NULL, + ERTS_DSP_NO_LOCK, 0); + if (code == ERTS_DSIG_PREP_CONNECTED) { + code = erts_dsig_send_demonitor(&dsd, + rmon->pid, + mon->name, + mon->ref, + 1); + ASSERT(code == ERTS_DSIG_SEND_OK); + } + erts_destroy_monitor(rmon); + } + erts_deref_dist_entry(dep); + } + } else { + ASSERT(is_pid(mon->pid)); + if (is_internal_pid(mon->pid)) { /* local by pid or name */ + rp = erts_pid2proc(NULL, 0, mon->pid, ERTS_PROC_LOCK_LINK); + if (!rp) { + goto done; + } + rmon = erts_remove_monitor(&(rp->monitors),mon->ref); + erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_LINK); + if (rmon == NULL) { + goto done; + } + erts_destroy_monitor(rmon); + } else { /* remote by pid */ + ASSERT(is_external_pid(mon->pid)); + dep = external_pid_dist_entry(mon->pid); + ASSERT(dep != NULL); + if (dep) { + erts_smp_de_links_lock(dep); + rmon = erts_remove_monitor(&(dep->monitors), mon->ref); + erts_smp_de_links_unlock(dep); + if (rmon) { + ErtsDSigData dsd; + int code = erts_dsig_prepare(&dsd, dep, NULL, + ERTS_DSP_NO_LOCK, 0); + if (code == ERTS_DSIG_PREP_CONNECTED) { + code = erts_dsig_send_demonitor(&dsd, + rmon->pid, + mon->pid, + mon->ref, + 1); + ASSERT(code == ERTS_DSIG_SEND_OK); + } + erts_destroy_monitor(rmon); + } + } + } + } + } else { /* type == MON_TARGET */ + ASSERT(mon->type == MON_TARGET); + ASSERT(is_pid(mon->pid) || is_internal_port(mon->pid)); + if (is_internal_port(mon->pid)) { + Port *prt = erts_id2port(mon->pid, NULL, 0); + if (prt == NULL) { + goto done; + } + erts_fire_port_monitor(prt, mon->ref); + erts_port_release(prt); + } else if (is_internal_pid(mon->pid)) {/* local by name or pid */ + Eterm watched; + Eterm lhp[3]; + ErtsProcLocks rp_locks = (ERTS_PROC_LOCK_LINK + | ERTS_PROC_LOCKS_MSG_SEND); + rp = erts_pid2proc(NULL, 0, mon->pid, rp_locks); + if (rp == NULL) { + goto done; + } + rmon = erts_remove_monitor(&(rp->monitors),mon->ref); + if (rmon) { + erts_destroy_monitor(rmon); + watched = (is_atom(mon->name) + ? TUPLE2(lhp, mon->name, + erts_this_dist_entry->sysname) + : pcontext->p->id); + erts_queue_monitor_message(rp, &rp_locks, mon->ref, am_process, + watched, pcontext->reason); + } + /* else: demonitor while we exited, i.e. do nothing... */ + erts_smp_proc_unlock(rp, rp_locks); + } else { /* external by pid or name */ + ASSERT(is_external_pid(mon->pid)); + dep = external_pid_dist_entry(mon->pid); + ASSERT(dep != NULL); + if (dep) { + erts_smp_de_links_lock(dep); + rmon = erts_remove_monitor(&(dep->monitors), mon->ref); + erts_smp_de_links_unlock(dep); + if (rmon) { + ErtsDSigData dsd; + int code = erts_dsig_prepare(&dsd, dep, NULL, + ERTS_DSP_NO_LOCK, 0); + if (code == ERTS_DSIG_PREP_CONNECTED) { + code = erts_dsig_send_m_exit(&dsd, + mon->pid, + (rmon->name != NIL + ? rmon->name + : rmon->pid), + mon->ref, + pcontext->reason); + ASSERT(code == ERTS_DSIG_SEND_OK); + } + erts_destroy_monitor(rmon); + } + } + } + } + done: + /* As the monitors are previously removed from the process, + distribution operations will not cause monitors to disappear, + we can safely delete it. */ + + erts_destroy_monitor(mon); +} + +typedef struct { + Process *p; + Eterm reason; + Eterm exit_tuple; + Uint exit_tuple_sz; +} ExitLinkContext; + +static void doit_exit_link(ErtsLink *lnk, void *vpcontext) +{ + ExitLinkContext *pcontext = vpcontext; + /* Unpack context, it's readonly */ + Process *p = pcontext->p; + Eterm reason = pcontext->reason; + Eterm exit_tuple = pcontext->exit_tuple; + Uint exit_tuple_sz = pcontext->exit_tuple_sz; + Eterm item = lnk->pid; + ErtsLink *rlnk; + DistEntry *dep; + Process *rp; + + switch(lnk->type) { + case LINK_PID: + if(is_internal_port(item)) { + Port *prt = erts_id2port(item, NULL, 0); + if (prt) { + rlnk = erts_remove_link(&prt->nlinks, p->id); + if (rlnk) + erts_destroy_link(rlnk); + erts_do_exit_port(prt, p->id, reason); + erts_port_release(prt); + } + } + else if(is_external_port(item)) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, + "Erroneous link between %T and external port %T " + "found\n", + p->id, + item); + erts_send_error_to_logger_nogl(dsbufp); + ASSERT(0); /* It isn't possible to setup such a link... */ + } + else if (is_internal_pid(item)) { + ErtsProcLocks rp_locks = (ERTS_PROC_LOCK_LINK + | ERTS_PROC_LOCKS_XSIG_SEND); + rp = erts_pid2proc(NULL, 0, item, rp_locks); + if (rp) { + rlnk = erts_remove_link(&(rp->nlinks), p->id); + /* If rlnk == NULL, we got unlinked while exiting, + i.e., do nothing... */ + if (rlnk) { + int xres; + erts_destroy_link(rlnk); + xres = send_exit_signal(NULL, + p->id, + rp, + &rp_locks, + reason, + exit_tuple, + exit_tuple_sz, + SEQ_TRACE_TOKEN(p), + p, + ERTS_XSIG_FLG_IGN_KILL); + if (xres >= 0 && IS_TRACED_FL(rp, F_TRACE_PROCS)) { + /* We didn't exit the process and it is traced */ + if (IS_TRACED_FL(rp, F_TRACE_PROCS)) { + trace_proc(p, rp, am_getting_unlinked, p->id); + } + } + } + ASSERT(rp != p); + erts_smp_proc_unlock(rp, rp_locks); + } + } + else if (is_external_pid(item)) { + dep = external_pid_dist_entry(item); + if(dep != erts_this_dist_entry) { + ErtsDSigData dsd; + int code; + ErtsDistLinkData dld; + erts_remove_dist_link(&dld, p->id, item, dep); + erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN); + code = erts_dsig_prepare(&dsd, dep, p, ERTS_DSP_NO_LOCK, 0); + if (code == ERTS_DSIG_PREP_CONNECTED) { + code = erts_dsig_send_exit_tt(&dsd, p->id, item, reason, + SEQ_TRACE_TOKEN(p)); + ASSERT(code == ERTS_DSIG_SEND_OK); + } + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_MAIN); + erts_destroy_dist_link(&dld); + } + } + break; + case LINK_NODE: + ASSERT(is_node_name_atom(item)); + dep = erts_sysname_to_connected_dist_entry(item); + if(dep) { + /* dist entries have node links in a separate structure to + avoid confusion */ + erts_smp_de_links_lock(dep); + rlnk = erts_remove_link(&(dep->node_links), p->id); + erts_smp_de_links_unlock(dep); + if (rlnk) + erts_destroy_link(rlnk); + erts_deref_dist_entry(dep); + } else { +#ifndef ERTS_SMP + /* XXX Is this possible? Shouldn't this link + previously have been removed if the node + had previously been disconnected. */ + ASSERT(0); +#endif + /* This is possible when smp support has been enabled, + and dist port and process exits simultaneously. */ + } + break; + + default: + erl_exit(1, "bad type in link list\n"); + break; + } + erts_destroy_link(lnk); +} + +static void +resume_suspend_monitor(ErtsSuspendMonitor *smon, void *vc_p) +{ + Process *suspendee = erts_pid2proc((Process *) vc_p, ERTS_PROC_LOCK_MAIN, + smon->pid, ERTS_PROC_LOCK_STATUS); + if (suspendee) { + if (smon->active) + resume_process(suspendee); + erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + } + erts_destroy_suspend_monitor(smon); +} + +static void +continue_exit_process(Process *p +#ifdef ERTS_SMP + , erts_pix_lock_t *pix_lock +#endif + ); + +/* this function fishishes a process and propagates exit messages - called + by process_main when a process dies */ +void +erts_do_exit_process(Process* p, Eterm reason) +{ +#ifdef ERTS_SMP + erts_pix_lock_t *pix_lock = ERTS_PID2PIXLOCK(p->id); +#endif + + p->arity = 0; /* No live registers */ + p->fvalue = reason; + +#ifdef ERTS_SMP + ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); + /* By locking all locks (main lock is already locked) when going + to status P_EXITING, it is enough to take any lock when + looking up a process (erts_pid2proc()) to prevent the looked up + process from exiting until the lock has been released. */ + erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); +#endif + + if (erts_system_profile_flags.runnable_procs && (p->status != P_WAITING)) { + profile_runnable_proc(p, am_inactive); + } + +#ifdef ERTS_SMP + erts_pix_lock(pix_lock); + p->is_exiting = 1; +#endif + + p->status = P_EXITING; + +#ifdef ERTS_SMP + erts_pix_unlock(pix_lock); + + if (ERTS_PROC_PENDING_EXIT(p)) { + /* Process exited before pending exit was received... */ + p->pending_exit.reason = THE_NON_VALUE; + if (p->pending_exit.bp) { + free_message_buffer(p->pending_exit.bp); + p->pending_exit.bp = NULL; + } + } + + cancel_suspend_of_suspendee(p, ERTS_PROC_LOCKS_ALL); + + ERTS_SMP_MSGQ_MV_INQ2PRIVQ(p); +#endif + + if (IS_TRACED_FL(p,F_TRACE_PROCS)) + trace_proc(p, p, am_exit, reason); + + erts_trace_check_exiting(p->id); + + ASSERT((p->trace_flags & F_INITIAL_TRACE_FLAGS) == F_INITIAL_TRACE_FLAGS); + + cancel_timer(p); /* Always cancel timer just in case */ + + /* + * The timer of this process can *not* be used anymore. The field used + * for the timer is now used for misc exiting data. + */ + p->u.exit_data = NULL; + + if (p->bif_timers) + erts_cancel_bif_timers(p, ERTS_PROC_LOCKS_ALL); + +#ifdef ERTS_SMP + if (p->flags & F_HAVE_BLCKD_MSCHED) + erts_block_multi_scheduling(p, ERTS_PROC_LOCKS_ALL, 0, 1); +#endif + + erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL_MINOR); + +#ifdef ERTS_SMP + continue_exit_process(p, pix_lock); +#else + continue_exit_process(p); +#endif +} + +void +erts_continue_exit_process(Process *c_p) +{ +#ifdef ERTS_SMP + continue_exit_process(c_p, ERTS_PID2PIXLOCK(c_p->id)); +#else + continue_exit_process(c_p); +#endif +} + +static void +continue_exit_process(Process *p +#ifdef ERTS_SMP + , erts_pix_lock_t *pix_lock +#endif + ) +{ + ErtsLink* lnk; + ErtsMonitor *mon; + ErtsProcLocks curr_locks = ERTS_PROC_LOCK_MAIN; + Eterm reason = p->fvalue; + DistEntry *dep; + struct saved_calls *scb; +#ifdef DEBUG + int yield_allowed = 1; +#endif + + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(p)); + +#ifdef DEBUG + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + ASSERT(p->status == P_EXITING); + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); +#endif + + if (p->flags & F_USING_DB) { + if (erts_db_process_exiting(p, ERTS_PROC_LOCK_MAIN)) + goto yield; + p->flags &= ~F_USING_DB; + } + + if (p->flags & F_USING_DDLL) { + erts_ddll_proc_dead(p, ERTS_PROC_LOCK_MAIN); + p->flags &= ~F_USING_DDLL; + } + + if (p->nodes_monitors) { + erts_delete_nodes_monitors(p, ERTS_PROC_LOCK_MAIN); + p->nodes_monitors = NULL; + } + + + if (p->suspend_monitors) { + erts_sweep_suspend_monitors(p->suspend_monitors, + resume_suspend_monitor, + p); + p->suspend_monitors = NULL; + } + + /* + * The registered name *should* be the last "erlang resource" to + * cleanup. + */ + if (p->reg) { + (void) erts_unregister_name(p, ERTS_PROC_LOCK_MAIN, NULL, THE_NON_VALUE); + ASSERT(!p->reg); + } + + erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); + curr_locks = ERTS_PROC_LOCKS_ALL; + + /* + * From this point on we are no longer allowed to yield + * this process. + */ +#ifdef DEBUG + yield_allowed = 0; +#endif + + { + int pix; + /* Do *not* use erts_get_runq_proc() */ + ErtsRunQueue *rq; + rq = erts_get_runq_current(ERTS_GET_SCHEDULER_DATA_FROM_PROC(p)); + + ASSERT(internal_pid_index(p->id) < erts_max_processes); + pix = internal_pid_index(p->id); + + erts_smp_mtx_lock(&proc_tab_mtx); + erts_smp_runq_lock(rq); + +#ifdef ERTS_SMP + erts_pix_lock(pix_lock); + + ASSERT(p->scheduler_data); + ASSERT(p->scheduler_data->current_process == p); + ASSERT(p->scheduler_data->free_process == NULL); + + p->scheduler_data->current_process = NULL; + p->scheduler_data->free_process = p; + p->status_flags = 0; +#endif + process_tab[pix] = NULL; /* Time of death! */ + ASSERT(erts_smp_atomic_read(&process_count) > 0); + erts_smp_atomic_dec(&process_count); + +#ifdef ERTS_SMP + erts_pix_unlock(pix_lock); +#endif + erts_smp_runq_unlock(rq); + + if (p_next < 0) { + if (p_last >= p_next) { + p_serial++; + p_serial &= p_serial_mask; + } + p_next = pix; + } + + ERTS_MAYBE_SAVE_TERMINATING_PROCESS(p); + + erts_smp_mtx_unlock(&proc_tab_mtx); + } + + /* + * All "erlang resources" have to be deallocated before this point, + * e.g. registered name, so monitoring and linked processes can + * be sure that all interesting resources have been deallocated + * when the monitors and/or links hit. + */ + + mon = p->monitors; + p->monitors = NULL; /* to avoid recursive deletion during traversal */ + + lnk = p->nlinks; + p->nlinks = NULL; + p->status = P_FREE; + dep = ((p->flags & F_DISTRIBUTION) + ? ERTS_PROC_SET_DIST_ENTRY(p, ERTS_PROC_LOCKS_ALL, NULL) + : NULL); + scb = ERTS_PROC_SET_SAVED_CALLS_BUF(p, ERTS_PROC_LOCKS_ALL, NULL); + + erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); + processes_busy--; + + if (dep) { + erts_do_net_exits(dep, reason); + if(dep) + erts_deref_dist_entry(dep); + } + + /* + * Pre-build the EXIT tuple if there are any links. + */ + if (lnk) { + Eterm tmp_heap[4]; + Eterm exit_tuple; + Uint exit_tuple_sz; + Eterm* hp; + + hp = &tmp_heap[0]; + + exit_tuple = TUPLE3(hp, am_EXIT, p->id, reason); + + exit_tuple_sz = size_object(exit_tuple); + + { + ExitLinkContext context = {p, reason, exit_tuple, exit_tuple_sz}; + erts_sweep_links(lnk, &doit_exit_link, &context); + } + } + + { + ExitMonitorContext context = {reason, p}; + erts_sweep_monitors(mon,&doit_exit_monitor,&context); + } + + if (scb) + erts_free(ERTS_ALC_T_CALLS_BUF, (void *) scb); + + delete_process(p); + + erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN); + ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); + + return; + + yield: + +#ifdef DEBUG + ASSERT(yield_allowed); +#endif + + ERTS_SMP_LC_ASSERT(curr_locks == erts_proc_lc_my_proc_locks(p)); + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN & curr_locks); + + ASSERT(p->status == P_EXITING); + + p->i = (Eterm *) beam_continue_exit; + + if (!(curr_locks & ERTS_PROC_LOCK_STATUS)) { + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + curr_locks |= ERTS_PROC_LOCK_STATUS; + } + + erts_add_to_runq(p); + + if (curr_locks != ERTS_PROC_LOCK_MAIN) + erts_smp_proc_unlock(p, ~ERTS_PROC_LOCK_MAIN & curr_locks); + + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(p)); + +} + +/* Callback for process timeout */ +static void +timeout_proc(Process* p) +{ + p->i = (Eterm *) p->def_arg_reg[0]; + p->flags |= F_TIMO; + p->flags &= ~F_INSLPQUEUE; + + if (p->status == P_WAITING) + erts_add_to_runq(p); + if (p->status == P_SUSPENDED) + p->rstatus = P_RUNABLE; /* MUST set resume status to runnable */ +} + + +void +cancel_timer(Process* p) +{ + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(p)); + p->flags &= ~(F_INSLPQUEUE|F_TIMO); +#ifdef ERTS_SMP + erts_cancel_smp_ptimer(p->u.ptimer); +#else + erl_cancel_timer(&p->u.tm); +#endif +} + +/* + * Insert a process into the time queue, with a timeout 'timeout' in ms. + */ +void +set_timer(Process* p, Uint timeout) +{ + ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(p)); + + /* check for special case timeout=0 DONT ADD TO time queue */ + if (timeout == 0) { + p->flags |= F_TIMO; + return; + } + p->flags |= F_INSLPQUEUE; + p->flags &= ~F_TIMO; + +#ifdef ERTS_SMP + erts_create_smp_ptimer(&p->u.ptimer, + p->id, + (ErlTimeoutProc) timeout_proc, + timeout); +#else + erl_set_timer(&p->u.tm, + (ErlTimeoutProc) timeout_proc, + NULL, + (void*) p, + timeout); +#endif +} + +/* + * Stack dump functions follow. + */ + +void +erts_stack_dump(int to, void *to_arg, Process *p) +{ + Eterm* sp; + int yreg = -1; + + if (p->trace_flags & F_SENSITIVE) { + return; + } + erts_program_counter_info(to, to_arg, p); + for (sp = p->stop; sp < STACK_START(p); sp++) { + yreg = stack_element_dump(to, to_arg, p, sp, yreg); + } +} + +void +erts_program_counter_info(int to, void *to_arg, Process *p) +{ + int i; + + erts_print(to, to_arg, "Program counter: %p (", p->i); + print_function_from_pc(to, to_arg, p->i); + erts_print(to, to_arg, ")\n"); + erts_print(to, to_arg, "CP: %p (", p->cp); + print_function_from_pc(to, to_arg, p->cp); + erts_print(to, to_arg, ")\n"); + if (!((p->status == P_RUNNING) || (p->status == P_GARBING))) { + erts_print(to, to_arg, "arity = %d\n",p->arity); + if (!ERTS_IS_CRASH_DUMPING) { + /* + * Only print the arguments if we are not writing a + * crash dump file. The arguments cannot be interpreted + * by the crashdump_viewer application and will therefore + * only cause problems. + */ + for (i = 0; i < p->arity; i++) + erts_print(to, to_arg, " %T\n", p->arg_reg[i]); + } + } +} + +static void +print_function_from_pc(int to, void *to_arg, Eterm* x) +{ + Eterm* addr = find_function_from_pc(x); + if (addr == NULL) { + if (x == beam_exit) { + erts_print(to, to_arg, "<terminate process>"); + } else if (x == beam_continue_exit) { + erts_print(to, to_arg, "<continue terminate process>"); + } else if (x == beam_apply+1) { + erts_print(to, to_arg, "<terminate process normally>"); + } else if (x == 0) { + erts_print(to, to_arg, "invalid"); + } else { + erts_print(to, to_arg, "unknown function"); + } + } else { + erts_print(to, to_arg, "%T:%T/%d + %d", + addr[0], addr[1], addr[2], ((x-addr)-2) * sizeof(Eterm)); + } +} + +static int +stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg) +{ + Eterm x = *sp; + + if (yreg < 0 || is_CP(x)) { + erts_print(to, to_arg, "\n%p ", sp); + } else { + char sbuf[16]; + sprintf(sbuf, "y(%d)", yreg); + erts_print(to, to_arg, "%-8s ", sbuf); + yreg++; + } + + if (is_CP(x)) { + erts_print(to, to_arg, "Return addr %p (", (Eterm *) x); + print_function_from_pc(to, to_arg, cp_val(x)); + erts_print(to, to_arg, ")\n"); + yreg = 0; + } else if is_catch(x) { + erts_print(to, to_arg, "Catch %p (", catch_pc(x)); + print_function_from_pc(to, to_arg, catch_pc(x)); + erts_print(to, to_arg, ")\n"); + } else { + erts_print(to, to_arg, "%T\n", x); + } + return yreg; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ + * The processes/0 BIF implementation. * +\* */ + + +#define ERTS_PROCESSES_BIF_TAB_INSPECT_INDICES_PER_RED 25 +#define ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE 1000 +#define ERTS_PROCESSES_BIF_MIN_START_REDS \ + (ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE \ + / ERTS_PROCESSES_BIF_TAB_INSPECT_INDICES_PER_RED) + +#define ERTS_PROCESSES_BIF_TAB_FREE_TERM_PROC_REDS 1 + +#define ERTS_PROCESSES_BIF_INSPECT_TERM_PROC_PER_RED 10 + +#define ERTS_PROCESSES_INSPECT_TERM_PROC_MAX_REDS \ + (ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE \ + / ERTS_PROCESSES_BIF_TAB_INSPECT_INDICES_PER_RED) + + +#define ERTS_PROCESSES_BIF_BUILD_RESULT_CONSES_PER_RED 75 + +#define ERTS_PROCS_DBG_DO_TRACE 0 + +#ifdef DEBUG +# define ERTS_PROCESSES_BIF_DEBUGLEVEL 100 +#else +# define ERTS_PROCESSES_BIF_DEBUGLEVEL 0 +#endif + +#define ERTS_PROCS_DBGLVL_CHK_HALLOC 1 +#define ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS 5 +#define ERTS_PROCS_DBGLVL_CHK_PIDS 10 +#define ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST 20 +#define ERTS_PROCS_DBGLVL_CHK_RESLIST 20 + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL == 0 +# define ERTS_PROCS_ASSERT(EXP) +#else +# define ERTS_PROCS_ASSERT(EXP) \ + ((void) ((EXP) \ + ? 1 \ + : (debug_processes_assert_error(#EXP, __FILE__, __LINE__), 0))) +#endif + + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_HALLOC +# define ERTS_PROCS_DBG_SAVE_HEAP_ALLOC(PBDP, HP, SZ) \ +do { \ + ERTS_PROCS_ASSERT(!(PBDP)->debug.heap); \ + ERTS_PROCS_ASSERT(!(PBDP)->debug.heap_size); \ + (PBDP)->debug.heap = (HP); \ + (PBDP)->debug.heap_size = (SZ); \ +} while (0) +# define ERTS_PROCS_DBG_VERIFY_HEAP_ALLOC_USED(PBDP, HP) \ +do { \ + ERTS_PROCS_ASSERT((PBDP)->debug.heap); \ + ERTS_PROCS_ASSERT((PBDP)->debug.heap_size); \ + ERTS_PROCS_ASSERT((PBDP)->debug.heap + (PBDP)->debug.heap_size == (HP));\ + (PBDP)->debug.heap = NULL; \ + (PBDP)->debug.heap_size = 0; \ +} while (0) +# define ERTS_PROCS_DBG_HEAP_ALLOC_INIT(PBDP) \ +do { \ + (PBDP)->debug.heap = NULL; \ + (PBDP)->debug.heap_size = 0; \ +} while (0) +#else +# define ERTS_PROCS_DBG_SAVE_HEAP_ALLOC(PBDP, HP, SZ) +# define ERTS_PROCS_DBG_VERIFY_HEAP_ALLOC_USED(PBDP, HP) +# define ERTS_PROCS_DBG_HEAP_ALLOC_INIT(PBDP) +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_RESLIST +# define ERTS_PROCS_DBG_CHK_RESLIST(R) debug_processes_check_res_list((R)) +#else +# define ERTS_PROCS_DBG_CHK_RESLIST(R) +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_PIDS +# define ERTS_PROCS_DBG_SAVE_PIDS(PBDP) debug_processes_save_all_pids((PBDP)) +# define ERTS_PROCS_DBG_VERIFY_PIDS(PBDP) \ +do { \ + if (!(PBDP)->debug.correct_pids_verified) \ + debug_processes_verify_all_pids((PBDP)); \ +} while (0) +# define ERTS_PROCS_DBG_CLEANUP_CHK_PIDS(PBDP) \ +do { \ + if ((PBDP)->debug.correct_pids) { \ + erts_free(ERTS_ALC_T_PROCS_PIDS, \ + (PBDP)->debug.correct_pids); \ + (PBDP)->debug.correct_pids = NULL; \ + } \ +} while(0) +# define ERTS_PROCS_DBG_CHK_PIDS_INIT(PBDP) \ +do { \ + (PBDP)->debug.correct_pids_verified = 0; \ + (PBDP)->debug.correct_pids = NULL; \ +} while (0) +#else +# define ERTS_PROCS_DBG_SAVE_PIDS(PBDP) +# define ERTS_PROCS_DBG_VERIFY_PIDS(PBDP) +# define ERTS_PROCS_DBG_CLEANUP_CHK_PIDS(PBDP) +# define ERTS_PROCS_DBG_CHK_PIDS_INIT(PBDP) +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS +# define ERTS_PROCS_DBG_CHK_PID_FOUND(PBDP, PID, TVP) \ + debug_processes_check_found_pid((PBDP), (PID), (TVP), 1) +# define ERTS_PROCS_DBG_CHK_PID_NOT_FOUND(PBDP, PID, TVP) \ + debug_processes_check_found_pid((PBDP), (PID), (TVP), 0) +#else +# define ERTS_PROCS_DBG_CHK_PID_FOUND(PBDP, PID, TVP) +# define ERTS_PROCS_DBG_CHK_PID_NOT_FOUND(PBDP, PID, TVP) +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST +# define ERTS_PROCS_DBG_CHK_TPLIST() \ + debug_processes_check_term_proc_list() +# define ERTS_PROCS_DBG_CHK_FREELIST(FL) \ + debug_processes_check_term_proc_free_list(FL) +#else +# define ERTS_PROCS_DBG_CHK_TPLIST() +# define ERTS_PROCS_DBG_CHK_FREELIST(FL) +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL == 0 +#if ERTS_PROCS_DBG_DO_TRACE +# define ERTS_PROCS_DBG_INIT(P, PBDP) (PBDP)->debug.caller = (P)->id +# else +# define ERTS_PROCS_DBG_INIT(P, PBDP) +# endif +# define ERTS_PROCS_DBG_CLEANUP(PBDP) +#else +# define ERTS_PROCS_DBG_INIT(P, PBDP) \ +do { \ + (PBDP)->debug.caller = (P)->id; \ + ERTS_PROCS_DBG_HEAP_ALLOC_INIT((PBDP)); \ + ERTS_PROCS_DBG_CHK_PIDS_INIT((PBDP)); \ +} while (0) +# define ERTS_PROCS_DBG_CLEANUP(PBDP) \ +do { \ + ERTS_PROCS_DBG_CLEANUP_CHK_PIDS((PBDP)); \ +} while (0) +#endif + +#if ERTS_PROCS_DBG_DO_TRACE +# define ERTS_PROCS_DBG_TRACE(PID, FUNC, WHAT) \ + erts_fprintf(stderr, "%T %s:%d:%s(): %s\n", \ + (PID), __FILE__, __LINE__, #FUNC, #WHAT) +#else +# define ERTS_PROCS_DBG_TRACE(PID, FUNC, WHAT) +#endif + +static Uint processes_bif_tab_chunks; +static Export processes_trap_export; + +typedef struct { + SysTimeval time; +} ErtsProcessesBifChunkInfo; + +typedef enum { + INITIALIZING, + INSPECTING_TABLE, + INSPECTING_TERMINATED_PROCESSES, + BUILDING_RESULT, + RETURN_RESULT +} ErtsProcessesBifState; + +typedef struct { + ErtsProcessesBifState state; + Eterm caller; + ErtsProcessesBifChunkInfo *chunk; + int tix; + int pid_ix; + int pid_sz; + Eterm *pid; + ErtsTermProcElement *bif_invocation; /* Only used when > 1 chunk */ + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL != 0 || ERTS_PROCS_DBG_DO_TRACE + struct { + Eterm caller; +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS + SysTimeval *pid_started; +#endif +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_HALLOC + Eterm *heap; + Uint heap_size; +#endif +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_PIDS + int correct_pids_verified; + Eterm *correct_pids; +#endif + } debug; +#endif + +} ErtsProcessesBifData; + + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL != 0 +static void debug_processes_assert_error(char* expr, char* file, int line); +#endif +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_RESLIST +static void debug_processes_check_res_list(Eterm list); +#endif +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_PIDS +static void debug_processes_save_all_pids(ErtsProcessesBifData *pbdp); +static void debug_processes_verify_all_pids(ErtsProcessesBifData *pbdp); +#endif +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS +static void debug_processes_check_found_pid(ErtsProcessesBifData *pbdp, + Eterm pid, + SysTimeval *started, + int pid_should_be_found); +#endif +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST +static SysTimeval debug_tv_start; +static void debug_processes_check_term_proc_list(void); +static void debug_processes_check_term_proc_free_list(ErtsTermProcElement *tpep); +#endif + +static void +save_terminating_process(Process *p) +{ + ErtsTermProcElement *tpep = erts_alloc(ERTS_ALC_T_PROCS_TPROC_EL, + sizeof(ErtsTermProcElement)); + ERTS_PROCS_ASSERT(saved_term_procs.start && saved_term_procs.end); + ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&proc_tab_mtx)); + + ERTS_PROCS_DBG_CHK_TPLIST(); + + tpep->prev = saved_term_procs.end; + tpep->next = NULL; + tpep->ix = internal_pid_index(p->id); + tpep->u.process.pid = p->id; + tpep->u.process.spawned = p->started; + erts_get_emu_time(&tpep->u.process.exited); + + saved_term_procs.end->next = tpep; + saved_term_procs.end = tpep; + + ERTS_PROCS_DBG_CHK_TPLIST(); + + ERTS_PROCS_ASSERT((tpep->prev->ix >= 0 + ? erts_cmp_timeval(&tpep->u.process.exited, + &tpep->prev->u.process.exited) + : erts_cmp_timeval(&tpep->u.process.exited, + &tpep->prev->u.bif_invocation.time)) > 0); +} + +static void +cleanup_processes_bif_data(Binary *bp) +{ + ErtsProcessesBifData *pbdp = ERTS_MAGIC_BIN_DATA(bp); + + ERTS_PROCS_DBG_TRACE(pbdp->debug.caller, cleanup_processes_bif_data, call); + + if (pbdp->state != INITIALIZING) { + + if (pbdp->chunk) { + erts_free(ERTS_ALC_T_PROCS_CNKINF, pbdp->chunk); + pbdp->chunk = NULL; + } + if (pbdp->pid) { + erts_free(ERTS_ALC_T_PROCS_PIDS, pbdp->pid); + pbdp->pid = NULL; + } + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS + if (pbdp->debug.pid_started) { + erts_free(ERTS_ALC_T_PROCS_PIDS, pbdp->debug.pid_started); + pbdp->debug.pid_started = NULL; + } +#endif + + if (pbdp->bif_invocation) { + ErtsTermProcElement *tpep; + + erts_smp_mtx_lock(&proc_tab_mtx); + + ERTS_PROCS_DBG_TRACE(pbdp->debug.caller, + cleanup_processes_bif_data, + term_proc_cleanup); + + tpep = pbdp->bif_invocation; + pbdp->bif_invocation = NULL; + + ERTS_PROCS_DBG_CHK_TPLIST(); + + if (tpep->prev) { + /* + * Only remove this bif invokation when we + * have preceding invokations. + */ + tpep->prev->next = tpep->next; + if (tpep->next) + tpep->next->prev = tpep->prev; + else { + /* + * At the time of writing this branch cannot be + * reached. I don't want to remove this code though + * since it may be possible to reach this line + * in the future if the cleanup order in + * erts_do_exit_process() is changed. The ASSERT(0) + * is only here to make us aware that the reorder + * has happened. /rickard + */ + ASSERT(0); + saved_term_procs.end = tpep->prev; + } + erts_free(ERTS_ALC_T_PROCS_TPROC_EL, tpep); + } + else { + /* + * Free all elements until next bif invokation + * is found. + */ + ERTS_PROCS_ASSERT(saved_term_procs.start == tpep); + do { + ErtsTermProcElement *ftpep = tpep; + tpep = tpep->next; + erts_free(ERTS_ALC_T_PROCS_TPROC_EL, ftpep); + } while (tpep && tpep->ix >= 0); + saved_term_procs.start = tpep; + if (tpep) + tpep->prev = NULL; + else + saved_term_procs.end = NULL; + } + + ERTS_PROCS_DBG_CHK_TPLIST(); + + erts_smp_mtx_unlock(&proc_tab_mtx); + + } + } + + ERTS_PROCS_DBG_TRACE(pbdp->debug.caller, + cleanup_processes_bif_data, + return); + ERTS_PROCS_DBG_CLEANUP(pbdp); +} + +static int +processes_bif_engine(Process *p, Eterm *res_accp, Binary *mbp) +{ + ErtsProcessesBifData *pbdp = ERTS_MAGIC_BIN_DATA(mbp); + int have_reds; + int reds; + int locked = 0; + + do { + switch (pbdp->state) { + case INITIALIZING: + pbdp->chunk = erts_alloc(ERTS_ALC_T_PROCS_CNKINF, + (sizeof(ErtsProcessesBifChunkInfo) + * processes_bif_tab_chunks)); + pbdp->tix = 0; + pbdp->pid_ix = 0; + + erts_smp_mtx_lock(&proc_tab_mtx); + locked = 1; + + ERTS_PROCS_DBG_TRACE(p->id, processes_bif_engine, init); + + pbdp->pid_sz = erts_process_count(); + pbdp->pid = erts_alloc(ERTS_ALC_T_PROCS_PIDS, + sizeof(Eterm)*pbdp->pid_sz); + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS + pbdp->debug.pid_started = erts_alloc(ERTS_ALC_T_PROCS_PIDS, + sizeof(SysTimeval)*pbdp->pid_sz); +#endif + + ERTS_PROCS_DBG_SAVE_PIDS(pbdp); + + if (processes_bif_tab_chunks == 1) + pbdp->bif_invocation = NULL; + else { + /* + * We will have to access the table multiple times + * releasing the table lock in between chunks. + */ + pbdp->bif_invocation = erts_alloc(ERTS_ALC_T_PROCS_TPROC_EL, + sizeof(ErtsTermProcElement)); + pbdp->bif_invocation->ix = -1; + erts_get_emu_time(&pbdp->bif_invocation->u.bif_invocation.time); + ERTS_PROCS_DBG_CHK_TPLIST(); + + pbdp->bif_invocation->next = NULL; + if (saved_term_procs.end) { + pbdp->bif_invocation->prev = saved_term_procs.end; + saved_term_procs.end->next = pbdp->bif_invocation; + ERTS_PROCS_ASSERT(saved_term_procs.start); + } + else { + pbdp->bif_invocation->prev = NULL; + saved_term_procs.start = pbdp->bif_invocation; + } + saved_term_procs.end = pbdp->bif_invocation; + + ERTS_PROCS_DBG_CHK_TPLIST(); + + } + + pbdp->state = INSPECTING_TABLE; + /* Fall through */ + + case INSPECTING_TABLE: { + int ix = pbdp->tix; + int indices = ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE; + int cix = ix / ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE; + int end_ix = ix + indices; + SysTimeval *invocation_timep; + + invocation_timep = (pbdp->bif_invocation + ? &pbdp->bif_invocation->u.bif_invocation.time + : NULL); + + ERTS_PROCS_ASSERT(is_nil(*res_accp)); + if (!locked) { + erts_smp_mtx_lock(&proc_tab_mtx); + locked = 1; + } + + ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&proc_tab_mtx)); + ERTS_PROCS_DBG_TRACE(p->id, processes_bif_engine, insp_table); + + if (cix != 0) + erts_get_emu_time(&pbdp->chunk[cix].time); + else if (pbdp->bif_invocation) + pbdp->chunk[0].time = *invocation_timep; + /* else: Time is irrelevant */ + + if (end_ix >= erts_max_processes) { + ERTS_PROCS_ASSERT(cix+1 == processes_bif_tab_chunks); + end_ix = erts_max_processes; + indices = end_ix - ix; + /* What to do when done with this chunk */ + pbdp->state = (processes_bif_tab_chunks == 1 + ? BUILDING_RESULT + : INSPECTING_TERMINATED_PROCESSES); + } + + for (; ix < end_ix; ix++) { + Process *rp = process_tab[ix]; + if (rp + && (!invocation_timep + || erts_cmp_timeval(&rp->started, + invocation_timep) < 0)) { + ERTS_PROCS_ASSERT(is_internal_pid(rp->id)); + pbdp->pid[pbdp->pid_ix] = rp->id; + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS + pbdp->debug.pid_started[pbdp->pid_ix] = rp->started; +#endif + + pbdp->pid_ix++; + ERTS_PROCS_ASSERT(pbdp->pid_ix <= pbdp->pid_sz); + } + } + + pbdp->tix = end_ix; + + erts_smp_mtx_unlock(&proc_tab_mtx); + locked = 0; + + reds = indices/ERTS_PROCESSES_BIF_TAB_INSPECT_INDICES_PER_RED; + BUMP_REDS(p, reds); + + have_reds = ERTS_BIF_REDS_LEFT(p); + + if (have_reds && pbdp->state == INSPECTING_TABLE) { + ix = pbdp->tix; + indices = ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE; + end_ix = ix + indices; + if (end_ix > erts_max_processes) { + end_ix = erts_max_processes; + indices = end_ix - ix; + } + + reds = indices/ERTS_PROCESSES_BIF_TAB_INSPECT_INDICES_PER_RED; + + /* Pretend we have no reds left if we haven't got enough + reductions to complete next chunk */ + if (reds > have_reds) + have_reds = 0; + } + + break; + } + + case INSPECTING_TERMINATED_PROCESSES: { + int i; + int max_reds; + int free_term_procs = 0; + SysTimeval *invocation_timep; + ErtsTermProcElement *tpep; + ErtsTermProcElement *free_list = NULL; + + tpep = pbdp->bif_invocation; + ERTS_PROCS_ASSERT(tpep); + invocation_timep = &tpep->u.bif_invocation.time; + + max_reds = have_reds = ERTS_BIF_REDS_LEFT(p); + if (max_reds > ERTS_PROCESSES_INSPECT_TERM_PROC_MAX_REDS) + max_reds = ERTS_PROCESSES_INSPECT_TERM_PROC_MAX_REDS; + + reds = 0; + erts_smp_mtx_lock(&proc_tab_mtx); + ERTS_PROCS_DBG_TRACE(p->id, processes_bif_engine, insp_term_procs); + + ERTS_PROCS_DBG_CHK_TPLIST(); + + if (tpep->prev) + tpep->prev->next = tpep->next; + else { + ERTS_PROCS_ASSERT(saved_term_procs.start == tpep); + saved_term_procs.start = tpep->next; + + if (saved_term_procs.start && saved_term_procs.start->ix >= 0) { + free_list = saved_term_procs.start; + free_term_procs = 1; + } + } + + if (tpep->next) + tpep->next->prev = tpep->prev; + else + saved_term_procs.end = tpep->prev; + + tpep = tpep->next; + + i = 0; + while (reds < max_reds && tpep) { + if (tpep->ix < 0) { + if (free_term_procs) { + ERTS_PROCS_ASSERT(free_list); + ERTS_PROCS_ASSERT(tpep->prev); + + tpep->prev->next = NULL; /* end of free_list */ + saved_term_procs.start = tpep; + tpep->prev = NULL; + free_term_procs = 0; + } + } + else { + int cix = tpep->ix/ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE; + SysTimeval *chunk_timep = &pbdp->chunk[cix].time; + Eterm pid = tpep->u.process.pid; + ERTS_PROCS_ASSERT(is_internal_pid(pid)); + + if (erts_cmp_timeval(&tpep->u.process.spawned, + invocation_timep) < 0) { + if (erts_cmp_timeval(&tpep->u.process.exited, + chunk_timep) < 0) { + ERTS_PROCS_DBG_CHK_PID_NOT_FOUND(pbdp, + pid, + &tpep->u.process.spawned); + pbdp->pid[pbdp->pid_ix] = pid; +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS + pbdp->debug.pid_started[pbdp->pid_ix] = tpep->u.process.spawned; +#endif + pbdp->pid_ix++; + ERTS_PROCS_ASSERT(pbdp->pid_ix <= pbdp->pid_sz); + } + else { + ERTS_PROCS_DBG_CHK_PID_FOUND(pbdp, + pid, + &tpep->u.process.spawned); + } + } + else { + ERTS_PROCS_DBG_CHK_PID_NOT_FOUND(pbdp, + pid, + &tpep->u.process.spawned); + } + + i++; + if (i == ERTS_PROCESSES_BIF_INSPECT_TERM_PROC_PER_RED) { + reds++; + i = 0; + } + if (free_term_procs) + reds += ERTS_PROCESSES_BIF_TAB_FREE_TERM_PROC_REDS; + } + tpep = tpep->next; + } + + if (free_term_procs) { + ERTS_PROCS_ASSERT(free_list); + saved_term_procs.start = tpep; + if (!tpep) + saved_term_procs.end = NULL; + else { + ERTS_PROCS_ASSERT(tpep->prev); + tpep->prev->next = NULL; /* end of free_list */ + tpep->prev = NULL; + } + } + + if (!tpep) { + /* Done */ + ERTS_PROCS_ASSERT(pbdp->pid_ix == pbdp->pid_sz); + pbdp->state = BUILDING_RESULT; + pbdp->bif_invocation->next = free_list; + free_list = pbdp->bif_invocation; + pbdp->bif_invocation = NULL; + } + else { + /* Link in bif_invocation again where we left off */ + pbdp->bif_invocation->prev = tpep->prev; + pbdp->bif_invocation->next = tpep; + tpep->prev = pbdp->bif_invocation; + if (pbdp->bif_invocation->prev) + pbdp->bif_invocation->prev->next = pbdp->bif_invocation; + else { + ERTS_PROCS_ASSERT(saved_term_procs.start == tpep); + saved_term_procs.start = pbdp->bif_invocation; + } + } + + ERTS_PROCS_DBG_CHK_TPLIST(); + ERTS_PROCS_DBG_CHK_FREELIST(free_list); + erts_smp_mtx_unlock(&proc_tab_mtx); + + /* + * We do the actual free of term proc structures now when we + * have released the table lock instead of when we encountered + * them. This since free() isn't for free and we don't want to + * unnecessarily block other schedulers. + */ + while (free_list) { + tpep = free_list; + free_list = tpep->next; + erts_free(ERTS_ALC_T_PROCS_TPROC_EL, tpep); + } + + have_reds -= reds; + if (have_reds < 0) + have_reds = 0; + BUMP_REDS(p, reds); + break; + } + + case BUILDING_RESULT: { + int conses, ix, min_ix; + Eterm *hp; + Eterm res = *res_accp; + + ERTS_PROCS_DBG_VERIFY_PIDS(pbdp); + ERTS_PROCS_DBG_CHK_RESLIST(res); + + ERTS_PROCS_DBG_TRACE(p->id, processes_bif_engine, begin_build_res); + + have_reds = ERTS_BIF_REDS_LEFT(p); + conses = ERTS_PROCESSES_BIF_BUILD_RESULT_CONSES_PER_RED*have_reds; + min_ix = pbdp->pid_ix - conses; + if (min_ix < 0) { + min_ix = 0; + conses = pbdp->pid_ix; + } + + hp = HAlloc(p, conses*2); + ERTS_PROCS_DBG_SAVE_HEAP_ALLOC(pbdp, hp, conses*2); + + for (ix = pbdp->pid_ix - 1; ix >= min_ix; ix--) { + ERTS_PROCS_ASSERT(is_internal_pid(pbdp->pid[ix])); + res = CONS(hp, pbdp->pid[ix], res); + hp += 2; + } + + ERTS_PROCS_DBG_VERIFY_HEAP_ALLOC_USED(pbdp, hp); + + pbdp->pid_ix = min_ix; + if (min_ix == 0) + pbdp->state = RETURN_RESULT; + else { + pbdp->pid_sz = min_ix; + pbdp->pid = erts_realloc(ERTS_ALC_T_PROCS_PIDS, + pbdp->pid, + sizeof(Eterm)*pbdp->pid_sz); +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS + pbdp->debug.pid_started = erts_realloc(ERTS_ALC_T_PROCS_PIDS, + pbdp->debug.pid_started, + sizeof(SysTimeval)*pbdp->pid_sz); +#endif + } + reds = conses/ERTS_PROCESSES_BIF_BUILD_RESULT_CONSES_PER_RED; + BUMP_REDS(p, reds); + have_reds -= reds; + + ERTS_PROCS_DBG_CHK_RESLIST(res); + ERTS_PROCS_DBG_TRACE(p->id, processes_bif_engine, end_build_res); + *res_accp = res; + break; + } + case RETURN_RESULT: + cleanup_processes_bif_data(mbp); + return 1; + + default: + erl_exit(ERTS_ABORT_EXIT, + "erlang:processes/0: Invalid state: %d\n", + (int) pbdp->state); + } + + + } while (have_reds || pbdp->state == RETURN_RESULT); + + return 0; +} + +/* + * processes_trap/2 is a hidden BIF that processes/0 traps to. + */ + +static BIF_RETTYPE processes_trap(BIF_ALIST_2) +{ + Eterm res_acc; + Binary *mbp; + + /* + * This bif cannot be called from erlang code. It can only be + * trapped to from processes/0; therefore, a bad argument + * is a processes/0 internal error. + */ + + ERTS_PROCS_DBG_TRACE(BIF_P->id, processes_trap, call); + ERTS_PROCS_ASSERT(is_nil(BIF_ARG_1) || is_list(BIF_ARG_1)); + + res_acc = BIF_ARG_1; + + ERTS_PROCS_ASSERT(ERTS_TERM_IS_MAGIC_BINARY(BIF_ARG_2)); + + mbp = ((ProcBin *) binary_val(BIF_ARG_2))->val; + + ERTS_PROCS_ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(mbp) + == cleanup_processes_bif_data); + ERTS_PROCS_ASSERT( + ((ErtsProcessesBifData *) ERTS_MAGIC_BIN_DATA(mbp))->debug.caller + == BIF_P->id); + + if (processes_bif_engine(BIF_P, &res_acc, mbp)) { + ERTS_PROCS_DBG_TRACE(BIF_P->id, processes_trap, return); + BIF_RET(res_acc); + } + else { + ERTS_PROCS_DBG_TRACE(BIF_P->id, processes_trap, trap); + ERTS_BIF_YIELD2(&processes_trap_export, BIF_P, res_acc, BIF_ARG_2); + } +} + + + +/* + * The actual processes/0 BIF. + */ + +BIF_RETTYPE processes_0(BIF_ALIST_0) +{ + /* + * A requirement: The list of pids returned should be a consistent + * snapshot of all processes existing at some point + * in time during the execution of processes/0. Since + * processes might terminate while processes/0 is + * executing, we have to keep track of terminated + * processes and add them to the result. We also + * ignore processes created after processes/0 has + * begun executing. + */ + Eterm res_acc = NIL; + Binary *mbp = erts_create_magic_binary(sizeof(ErtsProcessesBifData), + cleanup_processes_bif_data); + ErtsProcessesBifData *pbdp = ERTS_MAGIC_BIN_DATA(mbp); + + ERTS_PROCS_DBG_TRACE(BIF_P->id, processes_0, call); + pbdp->state = INITIALIZING; + ERTS_PROCS_DBG_INIT(BIF_P, pbdp); + + if (ERTS_BIF_REDS_LEFT(BIF_P) >= ERTS_PROCESSES_BIF_MIN_START_REDS + && processes_bif_engine(BIF_P, &res_acc, mbp)) { + erts_bin_free(mbp); + ERTS_PROCS_DBG_CHK_RESLIST(res_acc); + ERTS_PROCS_DBG_TRACE(BIF_P->id, processes_0, return); + BIF_RET(res_acc); + } + else { + Eterm *hp; + Eterm magic_bin; + ERTS_PROCS_DBG_CHK_RESLIST(res_acc); + hp = HAlloc(BIF_P, PROC_BIN_SIZE); + ERTS_PROCS_DBG_SAVE_HEAP_ALLOC(pbdp, hp, PROC_BIN_SIZE); + magic_bin = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), mbp); + ERTS_PROCS_DBG_VERIFY_HEAP_ALLOC_USED(pbdp, hp); + ERTS_PROCS_DBG_TRACE(BIF_P->id, processes_0, trap); + ERTS_BIF_YIELD2(&processes_trap_export, BIF_P, res_acc, magic_bin); + } +} + +static void +init_processes_bif(void) +{ + saved_term_procs.start = NULL; + saved_term_procs.end = NULL; + processes_bif_tab_chunks = (((erts_max_processes - 1) + / ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE) + + 1); + + /* processes_trap/2 is a hidden BIF that the processes/0 BIF traps to. */ + sys_memset((void *) &processes_trap_export, 0, sizeof(Export)); + processes_trap_export.address = &processes_trap_export.code[3]; + processes_trap_export.code[0] = am_erlang; + processes_trap_export.code[1] = am_processes_trap; + processes_trap_export.code[2] = 2; + processes_trap_export.code[3] = (Eterm) em_apply_bif; + processes_trap_export.code[4] = (Eterm) &processes_trap; + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST + erts_get_emu_time(&debug_tv_start); +#endif + +} + +/* + * Debug stuff + */ + +Eterm +erts_debug_processes(Process *c_p) +{ + /* This is the old processes/0 BIF. */ + int i; + Uint need; + Eterm res; + Eterm* hp; + Process *p; +#ifdef DEBUG + Eterm *hp_end; +#endif + + erts_smp_mtx_lock(&proc_tab_mtx); + + res = NIL; + need = erts_process_count() * 2; + hp = HAlloc(c_p, need); /* we need two heap words for each pid */ +#ifdef DEBUG + hp_end = hp + need; +#endif + + /* make the list by scanning bakward */ + + + for (i = erts_max_processes-1; i >= 0; i--) { + if ((p = process_tab[i]) != NULL) { + res = CONS(hp, process_tab[i]->id, res); + hp += 2; + } + } + ASSERT(hp == hp_end); + + erts_smp_mtx_unlock(&proc_tab_mtx); + + return res; +} + +Eterm +erts_debug_processes_bif_info(Process *c_p) +{ + ERTS_DECL_AM(processes_bif_info); + Eterm elements[] = { + AM_processes_bif_info, + make_small((Uint) ERTS_PROCESSES_BIF_MIN_START_REDS), + make_small((Uint) processes_bif_tab_chunks), + make_small((Uint) ERTS_PROCESSES_BIF_TAB_CHUNK_SIZE), + make_small((Uint) ERTS_PROCESSES_BIF_TAB_INSPECT_INDICES_PER_RED), + make_small((Uint) ERTS_PROCESSES_BIF_TAB_FREE_TERM_PROC_REDS), + make_small((Uint) ERTS_PROCESSES_BIF_INSPECT_TERM_PROC_PER_RED), + make_small((Uint) ERTS_PROCESSES_INSPECT_TERM_PROC_MAX_REDS), + make_small((Uint) ERTS_PROCESSES_BIF_BUILD_RESULT_CONSES_PER_RED), + make_small((Uint) ERTS_PROCESSES_BIF_DEBUGLEVEL) + }; + Uint sz = 0; + Eterm *hp; + (void) erts_bld_tuplev(NULL, &sz, sizeof(elements)/sizeof(Eterm), elements); + hp = HAlloc(c_p, sz); + return erts_bld_tuplev(&hp, NULL, sizeof(elements)/sizeof(Eterm), elements); +} + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_FOUND_PIDS +static void +debug_processes_check_found_pid(ErtsProcessesBifData *pbdp, + Eterm pid, + SysTimeval *tvp, + int pid_should_be_found) +{ + int i; + for (i = 0; i < pbdp->pid_ix; i++) { + if (pbdp->pid[i] == pid + && pbdp->debug.pid_started[i].tv_sec == tvp->tv_sec + && pbdp->debug.pid_started[i].tv_usec == tvp->tv_usec) { + ERTS_PROCS_ASSERT(pid_should_be_found); + return; + } + } + ERTS_PROCS_ASSERT(!pid_should_be_found); +} +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_RESLIST +static void +debug_processes_check_res_list(Eterm list) +{ + while (is_list(list)) { + Eterm* consp = list_val(list); + Eterm hd = CAR(consp); + ERTS_PROCS_ASSERT(is_internal_pid(hd)); + list = CDR(consp); + } + + ERTS_PROCS_ASSERT(is_nil(list)); +} +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_PIDS + +static void +debug_processes_save_all_pids(ErtsProcessesBifData *pbdp) +{ + int ix, tix, cpix; + pbdp->debug.correct_pids_verified = 0; + pbdp->debug.correct_pids = erts_alloc(ERTS_ALC_T_PROCS_PIDS, + sizeof(Eterm)*pbdp->pid_sz); + + for (tix = 0, cpix = 0; tix < erts_max_processes; tix++) { + Process *rp = process_tab[tix]; + if (rp) { + ERTS_PROCS_ASSERT(is_internal_pid(rp->id)); + pbdp->debug.correct_pids[cpix++] = rp->id; + ERTS_PROCS_ASSERT(cpix <= pbdp->pid_sz); + } + } + ERTS_PROCS_ASSERT(cpix == pbdp->pid_sz); + + for (ix = 0; ix < pbdp->pid_sz; ix++) + pbdp->pid[ix] = make_small(ix); +} + +static void +debug_processes_verify_all_pids(ErtsProcessesBifData *pbdp) +{ + int ix, cpix; + + ERTS_PROCS_ASSERT(pbdp->pid_ix == pbdp->pid_sz); + + for (ix = 0; ix < pbdp->pid_sz; ix++) { + int found = 0; + Eterm pid = pbdp->pid[ix]; + ERTS_PROCS_ASSERT(is_internal_pid(pid)); + for (cpix = ix; cpix < pbdp->pid_sz; cpix++) { + if (pbdp->debug.correct_pids[cpix] == pid) { + pbdp->debug.correct_pids[cpix] = NIL; + found = 1; + break; + } + } + if (!found) { + for (cpix = 0; cpix < ix; cpix++) { + if (pbdp->debug.correct_pids[cpix] == pid) { + pbdp->debug.correct_pids[cpix] = NIL; + found = 1; + break; + } + } + } + ERTS_PROCS_ASSERT(found); + } + pbdp->debug.correct_pids_verified = 1; + + erts_free(ERTS_ALC_T_PROCS_PIDS, pbdp->debug.correct_pids); + pbdp->debug.correct_pids = NULL; +} +#endif /* ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_PIDS */ + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST +static void +debug_processes_check_term_proc_list(void) +{ + ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&proc_tab_mtx)); + if (!saved_term_procs.start) + ERTS_PROCS_ASSERT(!saved_term_procs.end); + else { + SysTimeval tv_now; + SysTimeval *prev_xtvp = NULL; + ErtsTermProcElement *tpep; + erts_get_emu_time(&tv_now); + + for (tpep = saved_term_procs.start; tpep; tpep = tpep->next) { + if (!tpep->prev) + ERTS_PROCS_ASSERT(saved_term_procs.start == tpep); + else + ERTS_PROCS_ASSERT(tpep->prev->next == tpep); + if (!tpep->next) + ERTS_PROCS_ASSERT(saved_term_procs.end == tpep); + else + ERTS_PROCS_ASSERT(tpep->next->prev == tpep); + if (tpep->ix < 0) { + SysTimeval *tvp = &tpep->u.bif_invocation.time; + ERTS_PROCS_ASSERT(erts_cmp_timeval(&debug_tv_start, tvp) < 0 + && erts_cmp_timeval(tvp, &tv_now) < 0); + } + else { + SysTimeval *stvp = &tpep->u.process.spawned; + SysTimeval *xtvp = &tpep->u.process.exited; + + ERTS_PROCS_ASSERT(erts_cmp_timeval(&debug_tv_start, + stvp) < 0); + ERTS_PROCS_ASSERT(erts_cmp_timeval(stvp, xtvp) < 0); + if (prev_xtvp) + ERTS_PROCS_ASSERT(erts_cmp_timeval(prev_xtvp, xtvp) < 0); + prev_xtvp = xtvp; + ERTS_PROCS_ASSERT(is_internal_pid(tpep->u.process.pid)); + ERTS_PROCS_ASSERT(tpep->ix + == internal_pid_index(tpep->u.process.pid)); + } + } + + } +} + +static void +debug_processes_check_term_proc_free_list(ErtsTermProcElement *free_list) +{ + if (saved_term_procs.start) { + ErtsTermProcElement *ftpep; + ErtsTermProcElement *tpep; + + for (ftpep = free_list; ftpep; ftpep = ftpep->next) { + for (tpep = saved_term_procs.start; tpep; tpep = tpep->next) + ERTS_PROCS_ASSERT(ftpep != tpep); + } + } +} + +#endif + +#if ERTS_PROCESSES_BIF_DEBUGLEVEL != 0 + +static void +debug_processes_assert_error(char* expr, char* file, int line) +{ + fflush(stdout); + erts_fprintf(stderr, "%s:%d: Assertion failed: %s\n", file, line, expr); + fflush(stderr); + abort(); +} + +#endif + +/* *\ + * End of the processes/0 BIF implementation. * +\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |