aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2017-05-19 16:32:41 +0200
committerRickard Green <[email protected]>2017-05-19 16:32:41 +0200
commit507278352541fcb3a00b85bf77764ab4cf340b15 (patch)
tree5f282604fc5fc7d28a936cb18c4596b0fcc0311d /erts
parent3e42bb4feb6a3a90895595d53b37092f699ba4c3 (diff)
parent32ea8ba368c455afba07afd85bed6fb57879f56d (diff)
downloadotp-507278352541fcb3a00b85bf77764ab4cf340b15.tar.gz
otp-507278352541fcb3a00b85bf77764ab4cf340b15.tar.bz2
otp-507278352541fcb3a00b85bf77764ab4cf340b15.zip
Merge branch 'rickard/ds-runqs'
OTP-14152 * rickard/ds-runqs: Make statistics/1 aware of dirty run-queues and tasks
Diffstat (limited to 'erts')
-rw-r--r--erts/doc/src/erlang.xml189
-rw-r--r--erts/emulator/beam/atom.names4
-rw-r--r--erts/emulator/beam/erl_bif_info.c34
-rw-r--r--erts/emulator/beam/erl_process.c188
-rw-r--r--erts/emulator/beam/erl_process.h23
-rw-r--r--erts/emulator/test/statistics_SUITE.erl25
-rw-r--r--erts/etc/unix/etp-commands.in4
-rw-r--r--erts/preloaded/ebin/erlang.beambin105020 -> 105164 bytes
-rw-r--r--erts/preloaded/src/erlang.erl8
9 files changed, 340 insertions, 135 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index 6d165e9eff..d9cc5ef936 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -6148,28 +6148,60 @@ true</pre>
<fsummary>Information about active processes and ports.</fsummary>
<desc>
<marker id="statistics_active_tasks"></marker>
+ <p>Returns the same as
+ <seealso marker="#statistics_active_tasks_all">
+ <c>statistics(active_tasks_all)</c></seealso>
+ with the exception that no information about the dirty
+ IO run queue and its associated schedulers is part of
+ the result. That is, only tasks that are expected to be
+ CPU bound are part of the result.</p>
+ </desc>
+ </func>
+
+ <func>
+ <name name="statistics" arity="1" clause_i="2"/>
+ <fsummary>Information about active processes and ports.</fsummary>
+ <desc>
+ <marker id="statistics_active_tasks_all"></marker>
<p>Returns a list where each element represents the amount
of active processes and ports on each run queue and its
- associated scheduler. That is, the number of processes and
- ports that are ready to run, or are currently running. The
- element location in the list corresponds to the scheduler
- and its run queue. The first element corresponds to scheduler
- number 1 and so on. The information is <em>not</em> gathered
- atomically. That is, the result is not necessarily a
- consistent snapshot of the state, but instead quite
- efficiently gathered.</p>
+ associated schedulers. That is, the number of processes and
+ ports that are ready to run, or are currently running.
+ Values for normal run queues and their associated schedulers
+ are located first in the resulting list. The first element
+ corresponds to scheduler number 1 and so on. If support for
+ dirty schedulers exist, an element with the value for the
+ dirty CPU run queue and its associated dirty CPU schedulers
+ follow and then as last element the value for the the dirty
+ IO run queue and its associated dirty IO schedulers follow.
+ The information is <em>not</em> gathered atomically. That is,
+ the result is not necessarily a consistent snapshot of the
+ state, but instead quite efficiently gathered.</p>
+ <note><p>Each normal scheduler has one run queue that it
+ manages. If dirty schedulers schedulers are supported, all
+ dirty CPU schedulers share one run queue, and all dirty IO
+ schedulers share one run queue. That is, we have multiple
+ normal run queues, one dirty CPU run queue and one dirty
+ IO run queue. Work can <em>not</em> migrate between the
+ different types of run queues. Only work in normal run
+ queues can migrate to other normal run queues. This has
+ to be taken into account when evaluating the result.</p></note>
<p>See also
<seealso marker="#statistics_total_active_tasks">
<c>statistics(total_active_tasks)</c></seealso>,
<seealso marker="#statistics_run_queue_lengths">
- <c>statistics(run_queue_lengths)</c></seealso>, and
+ <c>statistics(run_queue_lengths)</c></seealso>,
+ <seealso marker="#statistics_run_queue_lengths_all">
+ <c>statistics(run_queue_lengths_all)</c></seealso>,
<seealso marker="#statistics_total_run_queue_lengths">
- <c>statistics(total_run_queue_lengths)</c></seealso>.</p>
+ <c>statistics(total_run_queue_lengths)</c></seealso>, and
+ <seealso marker="#statistics_total_run_queue_lengths_all">
+ <c>statistics(total_run_queue_lengths_all)</c></seealso>.</p>
</desc>
</func>
<func>
- <name name="statistics" arity="1" clause_i="2"/>
+ <name name="statistics" arity="1" clause_i="3"/>
<fsummary>Information about context switches.</fsummary>
<desc>
<p>Returns the total number of context switches since the
@@ -6178,7 +6210,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="3"/>
+ <name name="statistics" arity="1" clause_i="4"/>
<fsummary>Information about exact reductions.</fsummary>
<desc>
<marker id="statistics_exact_reductions"></marker>
@@ -6194,7 +6226,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="4"/>
+ <name name="statistics" arity="1" clause_i="5"/>
<fsummary>Information about garbage collection.</fsummary>
<desc>
<p>Returns information about garbage collection, for example:</p>
@@ -6206,7 +6238,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="5"/>
+ <name name="statistics" arity="1" clause_i="6"/>
<fsummary>Information about I/O.</fsummary>
<desc>
<p>Returns <c><anno>Input</anno></c>,
@@ -6217,7 +6249,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="6"/>
+ <name name="statistics" arity="1" clause_i="7"/>
<fsummary>Information about microstate accounting.</fsummary>
<desc>
<marker id="statistics_microstate_accounting"></marker>
@@ -6353,7 +6385,7 @@ lists:map(
</func>
<func>
- <name name="statistics" arity="1" clause_i="7"/>
+ <name name="statistics" arity="1" clause_i="8"/>
<fsummary>Information about reductions.</fsummary>
<desc>
<marker id="statistics_reductions"></marker>
@@ -6372,12 +6404,13 @@ lists:map(
</func>
<func>
- <name name="statistics" arity="1" clause_i="8"/>
+ <name name="statistics" arity="1" clause_i="9"/>
<fsummary>Information about the run-queues.</fsummary>
<desc><marker id="statistics_run_queue"></marker>
- <p>Returns the total length of the run-queues. That is, the number
+ <p>Returns the total length of all normal run-queues. That is, the number
of processes and ports that are ready to run on all available
- run-queues. The information is gathered atomically. That
+ normal run-queues. Dirty run queues are not part of the
+ result. The information is gathered atomically. That
is, the result is a consistent snapshot of the state, but
this operation is much more expensive compared to
<seealso marker="#statistics_total_run_queue_lengths">
@@ -6387,29 +6420,63 @@ lists:map(
</func>
<func>
- <name name="statistics" arity="1" clause_i="9"/>
+ <name name="statistics" arity="1" clause_i="10"/>
<fsummary>Information about the run-queue lengths.</fsummary>
<desc><marker id="statistics_run_queue_lengths"></marker>
+ <p>Returns the same as
+ <seealso marker="#statistics_run_queue_lengths_all">
+ <c>statistics(run_queue_lengths_all)</c></seealso>
+ with the exception that no information about the dirty
+ IO run queue is part of the result. That is, only
+ run queues with work that is expected to be CPU bound
+ is part of the result.</p>
+ </desc>
+ </func>
+
+ <func>
+ <name name="statistics" arity="1" clause_i="11"/>
+ <fsummary>Information about the run-queue lengths.</fsummary>
+ <desc><marker id="statistics_run_queue_lengths_all"></marker>
<p>Returns a list where each element represents the amount
- of processes and ports ready to run for each run queue. The
- element location in the list corresponds to the run queue
- of a scheduler. The first element corresponds to the run
- queue of scheduler number 1 and so on. The information is
- <em>not</em> gathered atomically. That is, the result is
- not necessarily a consistent snapshot of the state, but
- instead quite efficiently gathered.</p>
+ of processes and ports ready to run for each run queue.
+ Values for normal run queues are located first in the
+ resulting list. The first element corresponds to the
+ normal run queue of scheduler number 1 and so on. If
+ support for dirty schedulers exist, values for the dirty
+ CPU run queue and the dirty IO run queue follow (in that
+ order) at the end. The information is <em>not</em>
+ gathered atomically. That is, the result is not
+ necessarily a consistent snapshot of the state, but
+ instead quite efficiently gathered.</p>
+ <note><p>Each normal scheduler has one run queue that it
+ manages. If dirty schedulers schedulers are supported, all
+ dirty CPU schedulers share one run queue, and all dirty IO
+ schedulers share one run queue. That is, we have multiple
+ normal run queues, one dirty CPU run queue and one dirty
+ IO run queue. Work can <em>not</em> migrate between the
+ different types of run queues. Only work in normal run
+ queues can migrate to other normal run queues. This has
+ to be taken into account when evaluating the result.</p></note>
<p>See also
+ <seealso marker="#statistics_run_queue_lengths">
+ <c>statistics(run_queue_lengths)</c></seealso>,
+ <seealso marker="#statistics_total_run_queue_lengths_all">
+ <c>statistics(total_run_queue_lengths_all)</c></seealso>,
<seealso marker="#statistics_total_run_queue_lengths">
<c>statistics(total_run_queue_lengths)</c></seealso>,
<seealso marker="#statistics_active_tasks">
- <c>statistics(active_tasks)</c></seealso>, and
+ <c>statistics(active_tasks)</c></seealso>,
+ <seealso marker="#statistics_active_tasks_all">
+ <c>statistics(active_tasks_all)</c></seealso>, and
<seealso marker="#statistics_total_active_tasks">
- <c>statistics(total_active_tasks)</c></seealso>.</p>
+ <c>statistics(total_active_tasks)</c></seealso>,
+ <seealso marker="#statistics_total_active_tasks_all">
+ <c>statistics(total_active_tasks_all)</c></seealso>.</p>
</desc>
</func>
<func>
- <name name="statistics" arity="1" clause_i="10"/>
+ <name name="statistics" arity="1" clause_i="12"/>
<fsummary>Information about runtime.</fsummary>
<desc>
<p>Returns information about runtime, in milliseconds.</p>
@@ -6424,7 +6491,7 @@ lists:map(
</func>
<func>
- <name name="statistics" arity="1" clause_i="11"/>
+ <name name="statistics" arity="1" clause_i="13"/>
<fsummary>Information about each schedulers work time.</fsummary>
<desc>
<marker id="statistics_scheduler_wall_time"></marker>
@@ -6545,7 +6612,7 @@ ok
</func>
<func>
- <name name="statistics" arity="1" clause_i="12"/>
+ <name name="statistics" arity="1" clause_i="14"/>
<fsummary>Information about each schedulers work time.</fsummary>
<desc>
<marker id="statistics_scheduler_wall_time_all"></marker>
@@ -6570,47 +6637,47 @@ ok
</desc>
</func>
<func>
- <name name="statistics" arity="1" clause_i="13"/>
+ <name name="statistics" arity="1" clause_i="15"/>
<fsummary>Information about active processes and ports.</fsummary>
<desc><marker id="statistics_total_active_tasks"></marker>
- <p>Returns the total amount of active processes and ports in
- the system. That is, the number of processes and ports that
- are ready to run, or are currently running. The information
- is <em>not</em> gathered atomically. That is, the result
- is not necessarily a consistent snapshot of the state, but
- instead quite efficiently gathered.</p>
- <p>See also
- <seealso marker="#statistics_active_tasks">
- <c>statistics(active_tasks)</c></seealso>,
- <seealso marker="#statistics_run_queue_lengths">
- <c>statistics(run_queue_lengths)</c></seealso>, and
- <seealso marker="#statistics_total_run_queue_lengths">
- <c>statistics(total_run_queue_lengths)</c></seealso>.</p>
+ <p>The same as calling
+ <c>lists:sum(</c><seealso marker="#statistics_active_tasks"><c>statistics(active_tasks)</c></seealso><c>)</c>,
+ but more efficient.</p>
</desc>
</func>
<func>
- <name name="statistics" arity="1" clause_i="14"/>
+ <name name="statistics" arity="1" clause_i="16"/>
+ <fsummary>Information about active processes and ports.</fsummary>
+ <desc><marker id="statistics_total_active_tasks_all"></marker>
+ <p>The same as calling
+ <c>lists:sum(</c><seealso marker="#statistics_active_tasks_all"><c>statistics(active_tasks_all)</c></seealso><c>)</c>,
+ but more efficient.</p>
+ </desc>
+ </func>
+
+ <func>
+ <name name="statistics" arity="1" clause_i="17"/>
<fsummary>Information about the run-queue lengths.</fsummary>
<desc><marker id="statistics_total_run_queue_lengths"></marker>
- <p>Returns the total length of the run queues. That is, the number
- of processes and ports that are ready to run on all available
- run queues. The information is <em>not</em> gathered atomically.
- That is, the result is not necessarily a consistent snapshot of
- the state, but much more efficiently gathered compared to
- <seealso marker="#statistics_run_queue">
- <c>statistics(run_queue)</c></seealso>.</p>
- <p>See also <seealso marker="#statistics_run_queue_lengths">
- <c>statistics(run_queue_lengths)</c></seealso>,
- <seealso marker="#statistics_total_active_tasks">
- <c>statistics(total_active_tasks)</c></seealso>, and
- <seealso marker="#statistics_active_tasks">
- <c>statistics(active_tasks)</c></seealso>.</p>
+ <p>The same as calling
+ <c>lists:sum(</c><seealso marker="#statistics_run_queue_lengths"><c>statistics(run_queue_lengths)</c></seealso><c>)</c>,
+ but more efficient.</p>
</desc>
</func>
<func>
- <name name="statistics" arity="1" clause_i="15"/>
+ <name name="statistics" arity="1" clause_i="18"/>
+ <fsummary>Information about the run-queue lengths.</fsummary>
+ <desc><marker id="statistics_total_run_queue_lengths_all"></marker>
+ <p>The same as calling
+ <c>lists:sum(</c><seealso marker="#statistics_run_queue_lengths_all"><c>statistics(run_queue_lengths_all)</c></seealso><c>)</c>,
+ but more efficient.</p>
+ </desc>
+ </func>
+
+ <func>
+ <name name="statistics" arity="1" clause_i="19"/>
<fsummary>Information about wall clock.</fsummary>
<desc>
<p>Returns information about wall clock. <c>wall_clock</c> can
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 477a7676d6..a44d23b181 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -76,6 +76,7 @@ atom ac
atom accessor
atom active
atom active_tasks
+atom active_tasks_all
atom alive
atom all
atom all_but_first
@@ -567,6 +568,7 @@ atom return_to
atom return_trace
atom run_queue
atom run_queue_lengths
+atom run_queue_lengths_all
atom runnable
atom runnable_ports
atom runnable_procs
@@ -656,8 +658,10 @@ atom Times='*'
atom timestamp
atom total
atom total_active_tasks
+atom total_active_tasks_all
atom total_heap_size
atom total_run_queue_lengths
+atom total_run_queue_lengths_all
atom tpkt
atom trace trace_ts traced
atom trace_control_word
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 5fc70dfc02..e2773475b0 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3450,9 +3450,15 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
if (is_non_value(res))
BIF_RET(am_undefined);
BIF_TRAP1(gather_sched_wall_time_res_trap, BIF_P, res);
- } else if (BIF_ARG_1 == am_total_active_tasks
- || BIF_ARG_1 == am_total_run_queue_lengths) {
- Uint no = erts_run_queues_len(NULL, 0, BIF_ARG_1 == am_total_active_tasks);
+ } else if ((BIF_ARG_1 == am_total_active_tasks)
+ | (BIF_ARG_1 == am_total_run_queue_lengths)
+ | (BIF_ARG_1 == am_total_active_tasks_all)
+ | (BIF_ARG_1 == am_total_run_queue_lengths_all)) {
+ Uint no = erts_run_queues_len(NULL, 0,
+ ((BIF_ARG_1 == am_total_active_tasks)
+ | (BIF_ARG_1 == am_total_active_tasks_all)),
+ ((BIF_ARG_1 == am_total_active_tasks_all)
+ | (BIF_ARG_1 == am_total_run_queue_lengths_all)));
if (IS_USMALL(0, no))
res = make_small(no);
else {
@@ -3460,13 +3466,21 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
res = uint_to_big(no, hp);
}
BIF_RET(res);
- } else if (BIF_ARG_1 == am_active_tasks
- || BIF_ARG_1 == am_run_queue_lengths) {
+ } else if ((BIF_ARG_1 == am_active_tasks)
+ | (BIF_ARG_1 == am_run_queue_lengths)
+ | (BIF_ARG_1 == am_active_tasks_all)
+ | (BIF_ARG_1 == am_run_queue_lengths_all)) {
Eterm res, *hp, **hpp;
Uint sz, *szp;
- int no_qs = erts_no_run_queues;
+ int incl_dirty_io = ((BIF_ARG_1 == am_active_tasks_all)
+ | (BIF_ARG_1 == am_run_queue_lengths_all));
+ int no_qs = (erts_no_run_queues + ERTS_NUM_DIRTY_CPU_RUNQS +
+ (incl_dirty_io ? ERTS_NUM_DIRTY_IO_RUNQS : 0));
Uint *qszs = erts_alloc(ERTS_ALC_T_TMP,sizeof(Uint)*no_qs*2);
- (void) erts_run_queues_len(qszs, 0, BIF_ARG_1 == am_active_tasks);
+ (void) erts_run_queues_len(qszs, 0,
+ ((BIF_ARG_1 == am_active_tasks)
+ | (BIF_ARG_1 == am_active_tasks_all)),
+ incl_dirty_io);
sz = 0;
szp = &sz;
hpp = NULL;
@@ -3539,7 +3553,7 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
res = TUPLE2(hp, b1, b2);
BIF_RET(res);
} else if (BIF_ARG_1 == am_run_queue) {
- res = erts_run_queues_len(NULL, 1, 0);
+ res = erts_run_queues_len(NULL, 1, 0, 0);
BIF_RET(make_small(res));
} else if (BIF_ARG_1 == am_wall_clock) {
UWord w1, w2;
@@ -3557,9 +3571,9 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
else if (ERTS_IS_ATOM_STR("run_queues", BIF_ARG_1)) {
Eterm res, *hp, **hpp;
Uint sz, *szp;
- int no_qs = erts_no_run_queues;
+ int no_qs = erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS;
Uint *qszs = erts_alloc(ERTS_ALC_T_TMP,sizeof(Uint)*no_qs*2);
- (void) erts_run_queues_len(qszs, 0, 0);
+ (void) erts_run_queues_len(qszs, 0, 0, 1);
sz = 0;
szp = &sz;
hpp = NULL;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index da27c7e7c6..7952e3031d 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -405,13 +405,59 @@ static erts_atomic_t runq_supervisor_sleeping;
ErtsSchedulerData *erts_scheduler_data;
#endif
-ErtsAlignedRunQueue *erts_aligned_run_queues;
-Uint erts_no_run_queues;
+ErtsAlignedRunQueue * ERTS_WRITE_UNLIKELY(erts_aligned_run_queues);
+Uint ERTS_WRITE_UNLIKELY(erts_no_run_queues);
-ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
#ifdef ERTS_DIRTY_SCHEDULERS
-ErtsAlignedSchedulerData *erts_aligned_dirty_cpu_scheduler_data;
-ErtsAlignedSchedulerData *erts_aligned_dirty_io_scheduler_data;
+
+struct {
+ union {
+ erts_smp_atomic32_t active;
+ char align__[ERTS_CACHE_LINE_SIZE];
+ } cpu;
+ union {
+ erts_smp_atomic32_t active;
+ char align__[ERTS_CACHE_LINE_SIZE];
+ } io;
+} dirty_count erts_align_attribute(ERTS_CACHE_LINE_SIZE);
+
+#endif
+
+static ERTS_INLINE void
+dirty_active(ErtsSchedulerData *esdp, erts_aint32_t add)
+{
+#ifdef ERTS_DIRTY_SCHEDULERS
+ erts_aint32_t val;
+ erts_smp_atomic32_t *ap;
+ switch (esdp->type) {
+ case ERTS_SCHED_DIRTY_CPU:
+ ap = &dirty_count.cpu.active;
+ break;
+ case ERTS_SCHED_DIRTY_IO:
+ ap = &dirty_count.io.active;
+ break;
+ default:
+ ap = NULL;
+ ERTS_INTERNAL_ERROR("Not a dirty scheduler");
+ break;
+ }
+
+ /*
+ * All updates done under run-queue lock, so
+ * no inc or dec needed...
+ */
+ ERTS_SMP_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+
+ val = erts_smp_atomic32_read_nob(ap);
+ val += add;
+ erts_smp_atomic32_set_nob(ap, val);
+#endif
+}
+
+ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_scheduler_data);
+#ifdef ERTS_DIRTY_SCHEDULERS
+ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_cpu_scheduler_data);
+ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_io_scheduler_data);
typedef union {
Process dsp;
char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(Process))];
@@ -539,22 +585,28 @@ do { \
} \
} while (0)
-#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, DOX) \
+#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, NRQS, DO, DOX) \
do { \
ErtsRunQueue *RQVAR; \
+ int nrqs = (NRQS); \
int ix__; \
- for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) { \
+ for (ix__ = 0; ix__ < nrqs; ix__++) { \
RQVAR = ERTS_RUNQ_IX(ix__); \
erts_smp_runq_lock(RQVAR); \
{ DO; } \
} \
{ DOX; } \
- for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) \
+ for (ix__ = 0; ix__ < nrqs; ix__++) \
erts_smp_runq_unlock(ERTS_RUNQ_IX(ix__)); \
} while (0)
-#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \
- ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, )
+#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \
+ ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS, DO, )
+
+#define ERTS_ATOMIC_FOREACH_NORMAL_RUNQ(RQVAR, DO) \
+ ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues, DO, )
+
+
/*
* Local functions.
*/
@@ -2971,7 +3023,7 @@ erts_active_schedulers(void)
{
Uint as = erts_no_schedulers;
- ERTS_ATOMIC_FOREACH_RUNQ(rq, as -= abs(rq->waiting));
+ ERTS_ATOMIC_FOREACH_NORMAL_RUNQ(rq, as -= abs(rq->waiting));
return as;
}
@@ -3383,6 +3435,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
rq->sleepers.list->prev = ssi;
rq->sleepers.list = ssi;
erts_smp_spin_unlock(&rq->sleepers.lock);
+ dirty_active(esdp, -1);
}
#endif
@@ -3724,6 +3777,9 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
sched_active_sys(esdp->no, rq);
}
+ if (ERTS_SCHEDULER_IS_DIRTY(esdp))
+ dirty_active(esdp, 1);
+
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
}
@@ -6123,7 +6179,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
#endif
)
{
- int ix, n, no_ssi;
+ int ix, n, no_ssi, tot_rqs;
char *daww_ptr;
size_t daww_sz;
size_t size_runqs;
@@ -6156,26 +6212,19 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
/* Create and initialize run queues */
n = no_schedulers;
- size_runqs = sizeof(ErtsAlignedRunQueue) * (n + ERTS_NUM_DIRTY_RUNQS);
+ tot_rqs = (n + ERTS_NUM_DIRTY_RUNQS);
+ size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs;
erts_aligned_run_queues =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs);
#ifdef ERTS_SMP
-#ifdef ERTS_DIRTY_SCHEDULERS
- erts_aligned_run_queues += ERTS_NUM_DIRTY_RUNQS;
-#endif
erts_smp_atomic32_init_nob(&no_empty_run_queues, 0);
#endif
erts_no_run_queues = n;
- for (ix = -(ERTS_NUM_DIRTY_RUNQS); ix < n; ix++) {
+ for (ix = 0; ix < tot_rqs; ix++) {
int pix, rix;
-#ifdef ERTS_DIRTY_SCHEDULERS
- ErtsRunQueue *rq = ERTS_RUNQ_IX_IS_DIRTY(ix) ?
- ERTS_DIRTY_RUNQ_IX(ix) : ERTS_RUNQ_IX(ix);
-#else
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
-#endif
rq->ix = ix;
@@ -6448,6 +6497,11 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
ERTS_SCHED_DIRTY_IO,
no_dirty_io_schedulers);
+ erts_smp_atomic32_init_nob(&dirty_count.cpu.active,
+ (erts_aint32_t) no_dirty_cpu_schedulers);
+ erts_smp_atomic32_init_nob(&dirty_count.io.active,
+ (erts_aint32_t) no_dirty_io_schedulers);
+
#endif
if (set_schdlr_sspnd_change_flags)
@@ -7822,6 +7876,7 @@ suspend_scheduler(ErtsSchedulerData *esdp)
#endif
if (sched_type != ERTS_SCHED_NORMAL) {
+ dirty_active(esdp, -1);
erts_smp_runq_unlock(esdp->run_queue);
dirty_sched_wall_time_change(esdp, 0);
}
@@ -8130,7 +8185,9 @@ suspend_scheduler(ErtsSchedulerData *esdp)
erts_smp_runq_lock(esdp->run_queue);
non_empty_runq(esdp->run_queue);
- if (sched_type == ERTS_SCHED_NORMAL) {
+ if (sched_type != ERTS_SCHED_NORMAL)
+ dirty_active(esdp, 1);
+ else {
schedule_bound_processes(esdp->run_queue, &sbp);
erts_sched_check_cpu_bind_post_suspend(esdp);
@@ -9764,38 +9821,69 @@ erts_internal_is_process_executing_dirty_1(BIF_ALIST_1)
BIF_RET(am_false);
}
+static ERTS_INLINE void
+run_queues_len_aux(ErtsRunQueue *rq, Uint *tot_len, Uint *qlen, int *ip, int incl_active_sched, int locked)
+{
+ Sint rq_len;
+
+ if (locked)
+ rq_len = (Sint) erts_smp_atomic32_read_dirty(&rq->len);
+ else
+ rq_len = (Sint) erts_smp_atomic32_read_nob(&rq->len);
+ ASSERT(rq_len >= 0);
+
+ if (incl_active_sched) {
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) {
+ erts_aint32_t dcnt;
+ if (ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(rq)) {
+ dcnt = erts_smp_atomic32_read_nob(&dirty_count.cpu.active);
+ ASSERT(0 <= dcnt && dcnt <= erts_no_dirty_cpu_schedulers);
+ }
+ else {
+ ASSERT(ERTS_RUNQ_IS_DIRTY_IO_RUNQ(rq));
+ dcnt = erts_smp_atomic32_read_nob(&dirty_count.io.active);
+ ASSERT(0 <= dcnt && dcnt <= erts_no_dirty_io_schedulers);
+ }
+ rq_len += (Sint) dcnt;
+ }
+ else
+#endif
+ {
+ if (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC)
+ rq_len++;
+ }
+ }
+ if (qlen)
+ qlen[(*ip)++] = rq_len;
+ *tot_len += (Uint) rq_len;
+}
Uint
-erts_run_queues_len(Uint *qlen, int atomic_queues_read, int incl_active_sched)
+erts_run_queues_len(Uint *qlen, int atomic_queues_read, int incl_active_sched,
+ int incl_dirty_io)
{
- int i = 0;
+ int i = 0, j = 0;
Uint len = 0;
- if (atomic_queues_read)
- ERTS_ATOMIC_FOREACH_RUNQ(rq,
- {
- Sint rq_len = (Sint) erts_smp_atomic32_read_dirty(&rq->len);
- ASSERT(rq_len >= 0);
- if (incl_active_sched
- && (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC)) {
- rq_len++;
- }
- if (qlen)
- qlen[i++] = rq_len;
- len += (Uint) rq_len;
- }
- );
+ int no_rqs = erts_no_run_queues;
+
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (incl_dirty_io)
+ no_rqs += ERTS_NUM_DIRTY_RUNQS;
+ else
+ no_rqs += ERTS_NUM_DIRTY_CPU_RUNQS;
+#endif
+
+ if (atomic_queues_read) {
+ ERTS_ATOMIC_FOREACH_RUNQ_X(rq, no_rqs,
+ run_queues_len_aux(rq, &len, qlen, &j,
+ incl_active_sched, 1),
+ /* Nothing... */);
+ }
else {
- for (i = 0; i < erts_no_run_queues; i++) {
+ for (i = 0; i < no_rqs; i++) {
ErtsRunQueue *rq = ERTS_RUNQ_IX(i);
- Sint rq_len = (Sint) erts_smp_atomic32_read_nob(&rq->len);
- ASSERT(rq_len >= 0);
- if (incl_active_sched
- && (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC)) {
- rq_len++;
- }
- if (qlen)
- qlen[i] = rq_len;
- len += (Uint) rq_len;
+ run_queues_len_aux(rq, &len, qlen, &j, incl_active_sched, 0);
}
}
@@ -12097,6 +12185,8 @@ erts_get_total_reductions(Uint *redsp, Uint *diffp)
Uint reds = 0;
ERTS_ATOMIC_FOREACH_RUNQ_X(rq,
+ erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS,
+
reds += rq->procs.reductions,
if (redsp) *redsp = reds;
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 2b169bb9ce..d44e8c252d 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -1541,24 +1541,29 @@ extern int erts_system_profile_ts_type;
} while (0)
#if defined(ERTS_DIRTY_SCHEDULERS) && defined(ERTS_SMP)
-#define ERTS_NUM_DIRTY_RUNQS 2
+#define ERTS_NUM_DIRTY_CPU_RUNQS 1
+#define ERTS_NUM_DIRTY_IO_RUNQS 1
#else
-#define ERTS_NUM_DIRTY_RUNQS 0
+#define ERTS_NUM_DIRTY_CPU_RUNQS 0
+#define ERTS_NUM_DIRTY_IO_RUNQS 0
#endif
+#define ERTS_NUM_DIRTY_RUNQS (ERTS_NUM_DIRTY_CPU_RUNQS+ERTS_NUM_DIRTY_IO_RUNQS)
+
#define ERTS_RUNQ_IX(IX) \
- (ASSERT(0 <= (IX) && (IX) < erts_no_run_queues), \
+ (ASSERT(0 <= (IX) && (IX) < erts_no_run_queues+ERTS_NUM_DIRTY_RUNQS), \
&erts_aligned_run_queues[(IX)].runq)
#ifdef ERTS_DIRTY_SCHEDULERS
#define ERTS_RUNQ_IX_IS_DIRTY(IX) \
- (-(ERTS_NUM_DIRTY_RUNQS) <= (IX) && (IX) < 0)
+ (ASSERT(0 <= (IX) && (IX) < erts_no_run_queues+ERTS_NUM_DIRTY_RUNQS), \
+ (erts_no_run_queues <= (IX)))
#define ERTS_DIRTY_RUNQ_IX(IX) \
(ASSERT(ERTS_RUNQ_IX_IS_DIRTY(IX)), \
&erts_aligned_run_queues[(IX)].runq)
-#define ERTS_DIRTY_CPU_RUNQ (&erts_aligned_run_queues[-1].runq)
-#define ERTS_DIRTY_IO_RUNQ (&erts_aligned_run_queues[-2].runq)
-#define ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(RQ) ((RQ)->ix == -1)
-#define ERTS_RUNQ_IS_DIRTY_IO_RUNQ(RQ) ((RQ)->ix == -2)
+#define ERTS_DIRTY_CPU_RUNQ (&erts_aligned_run_queues[erts_no_run_queues].runq)
+#define ERTS_DIRTY_IO_RUNQ (&erts_aligned_run_queues[erts_no_run_queues+1].runq)
+#define ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(RQ) ((RQ) == ERTS_DIRTY_CPU_RUNQ)
+#define ERTS_RUNQ_IS_DIRTY_IO_RUNQ(RQ) ((RQ) == ERTS_DIRTY_IO_RUNQ)
#else
#define ERTS_RUNQ_IX_IS_DIRTY(IX) 0
#endif
@@ -1836,7 +1841,7 @@ Uint erts_active_schedulers(void);
void erts_init_process(int, int, int);
Eterm erts_process_state2status(erts_aint32_t);
Eterm erts_process_status(Process *, Eterm);
-Uint erts_run_queues_len(Uint *, int, int);
+Uint erts_run_queues_len(Uint *, int, int, int);
void erts_add_to_runq(Process *);
Eterm erts_bound_schedulers_term(Process *c_p);
Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which);
diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl
index 0cea941687..7690557fda 100644
--- a/erts/emulator/test/statistics_SUITE.erl
+++ b/erts/emulator/test/statistics_SUITE.erl
@@ -502,20 +502,37 @@ run_queues_lengths_active_tasks(_Config) ->
end,
lists:seq(1,10)),
+
+
TRQLs0 = statistics(total_run_queue_lengths),
+ TRQLAs0 = statistics(total_run_queue_lengths_all),
TATs0 = statistics(total_active_tasks),
+ TATAs0 = statistics(total_active_tasks_all),
true = is_integer(TRQLs0),
true = is_integer(TATs0),
true = TRQLs0 >= 0,
+ true = TRQLAs0 >= 0,
true = TATs0 >= 11,
+ true = TATAs0 >= 11,
NoScheds = erlang:system_info(schedulers),
+ {DefRqs,
+ AllRqs} = case erlang:system_info(dirty_cpu_schedulers) of
+ 0 -> {NoScheds, NoScheds};
+ _ -> {NoScheds+1, NoScheds+2}
+ end,
RQLs0 = statistics(run_queue_lengths),
+ RQLAs0 = statistics(run_queue_lengths_all),
ATs0 = statistics(active_tasks),
- NoScheds = length(RQLs0),
- NoScheds = length(ATs0),
+ ATAs0 = statistics(active_tasks_all),
+ DefRqs = length(RQLs0),
+ AllRqs = length(RQLAs0),
+ DefRqs = length(ATs0),
+ AllRqs = length(ATAs0),
true = lists:sum(RQLs0) >= 0,
+ true = lists:sum(RQLAs0) >= 0,
true = lists:sum(ATs0) >= 11,
+ true = lists:sum(ATAs0) >= 11,
SO = erlang:system_flag(schedulers_online, 1),
@@ -531,8 +548,8 @@ run_queues_lengths_active_tasks(_Config) ->
RQLs1 = statistics(run_queue_lengths),
ATs1 = statistics(active_tasks),
- NoScheds = length(RQLs1),
- NoScheds = length(ATs1),
+ DefRqs = length(RQLs1),
+ DefRqs = length(ATs1),
TRQLs2 = lists:sum(RQLs1),
TATs2 = lists:sum(ATs1),
true = TRQLs2 >= 10,
diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in
index b7b3a2ae99..fc7b614c21 100644
--- a/erts/etc/unix/etp-commands.in
+++ b/erts/etc/unix/etp-commands.in
@@ -2810,10 +2810,10 @@ define etp-run-queue-info-internal
else
if ($sched_type == 1)
printf "\n--- Dirty CPU Run Queue ---\n"
- set $runq = &erts_aligned_run_queues[-1].runq
+ set $runq = &erts_aligned_run_queues[erts_no_run_queues].runq
else
printf "\n--- Dirty I/O Run Queue ---\n"
- set $runq = &erts_aligned_run_queues[-2].runq
+ set $runq = &erts_aligned_run_queues[erts_no_run_queues+1].runq
end
end
printf " Length: total=%d", *((Uint32 *) &($runq->len))
diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam
index 980df873ca..63518ed6e1 100644
--- a/erts/preloaded/ebin/erlang.beam
+++ b/erts/preloaded/ebin/erlang.beam
Binary files differ
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index fcb27ef575..72dd804412 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -2306,6 +2306,8 @@ spawn_opt(_Tuple) ->
-spec statistics(active_tasks) -> [ActiveTasks] when
ActiveTasks :: non_neg_integer();
+ (active_tasks_all) -> [ActiveTasks] when
+ ActiveTasks :: non_neg_integer();
(context_switches) -> {ContextSwitches,0} when
ContextSwitches :: non_neg_integer();
(exact_reductions) -> {Total_Exact_Reductions,
@@ -2335,6 +2337,8 @@ spawn_opt(_Tuple) ->
(run_queue) -> non_neg_integer();
(run_queue_lengths) -> [RunQueueLength] when
RunQueueLength :: non_neg_integer();
+ (run_queue_lengths_all) -> [RunQueueLength] when
+ RunQueueLength :: non_neg_integer();
(runtime) -> {Total_Run_Time, Time_Since_Last_Call} when
Total_Run_Time :: non_neg_integer(),
Time_Since_Last_Call :: non_neg_integer();
@@ -2347,9 +2351,13 @@ spawn_opt(_Tuple) ->
ActiveTime :: non_neg_integer(),
TotalTime :: non_neg_integer();
(total_active_tasks) -> ActiveTasks when
+ ActiveTasks :: non_neg_integer();
+ (total_active_tasks_all) -> ActiveTasks when
ActiveTasks :: non_neg_integer();
(total_run_queue_lengths) -> TotalRunQueueLengths when
TotalRunQueueLengths :: non_neg_integer();
+ (total_run_queue_lengths_all) -> TotalRunQueueLengths when
+ TotalRunQueueLengths :: non_neg_integer();
(wall_clock) -> {Total_Wallclock_Time,
Wallclock_Time_Since_Last_Call} when
Total_Wallclock_Time :: non_neg_integer(),