From 37f58ad6bff2bf2bac4f3f20c2684e8cee66af03 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Tue, 15 Dec 2015 09:40:30 +0100
Subject: Light weight statistics of run queue lengths

- statistics(total_run_queue_lengths)
- statistics(run_queue_lengths)
- statistics(total_active_tasks)
- statistics(active_tasks)

Conflicts:
	erts/emulator/beam/erl_process.c
---
 erts/doc/src/erlang.xml                 | 111 ++++++++++++++++++++++++++++----
 erts/emulator/beam/atom.names           |   4 ++
 erts/emulator/beam/erl_bif_info.c       |  37 ++++++++++-
 erts/emulator/beam/erl_process.c        |  65 +++++++++++++------
 erts/emulator/beam/erl_process.h        |  48 +++++++++-----
 erts/emulator/test/statistics_SUITE.erl |  61 +++++++++++++++++-
 erts/preloaded/ebin/erlang.beam         | Bin 101796 -> 102000 bytes
 erts/preloaded/src/erlang.erl           |  10 ++-
 8 files changed, 281 insertions(+), 55 deletions(-)

(limited to 'erts')

diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index c37ed3bea5..64eebec936 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -5684,8 +5684,31 @@ true</pre>
 	<anno>Dest</anno>, <anno>Msg</anno>, [])</c></seealso>.</p>
       </desc>
     </func>
+
     <func>
       <name name="statistics" arity="1" clause_i="1"/>
+      <fsummary>Information about active processes and ports.</fsummary>
+      <desc><marker id="statistics_active_tasks"></marker>
+	<p>
+	  Returns a list where each element represents the amount
+	  of active processes and ports on each run queue and its
+	  associated scheduler. That is, the number of processes and
+	  ports that are ready to run, or are currently running. The
+	  element location in the list corresponds to the scheduler
+	  and its run queue. The first element corresponds to scheduler
+	  number 1 and so on. The information is <em>not</em> gathered
+	  atomically. That is, the result is not necessarily a
+	  consistent snapshot of the state, but instead quite
+	  efficiently gathered. See also,
+	  <seealso marker="#statistics_total_active_tasks"><c>statistics(total_active_tasks)</c></seealso>,
+	  <seealso marker="#statistics_run_queue_lengths"><c>statistics(run_queue_lengths)</c></seealso>, and
+	  <seealso marker="#statistics_total_run_queue_lengths"><c>statistics(total_run_queue_lengths)</c></seealso>.
+	</p>
+      </desc>
+    </func>
+
+    <func>
+      <name name="statistics" arity="1" clause_i="2"/>
       <fsummary>Information about context switches.</fsummary>
       <desc>
         <p>Returns the total number of context switches since the
@@ -5694,7 +5717,7 @@ true</pre>
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="2"/>
+      <name name="statistics" arity="1" clause_i="3"/>
       <fsummary>Information about exact reductions.</fsummary>
       <desc>
         <marker id="statistics_exact_reductions"></marker>
@@ -5708,7 +5731,7 @@ true</pre>
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="3"/>
+      <name name="statistics" arity="1" clause_i="4"/>
       <fsummary>Information about garbage collection.</fsummary>
       <desc>
         <p>Returns information about garbage collection, for example:</p>
@@ -5720,7 +5743,7 @@ true</pre>
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="4"/>
+      <name name="statistics" arity="1" clause_i="5"/>
       <fsummary>Information about I/O.</fsummary>
       <desc>
         <p>Returns <c><anno>Input</anno></c>,
@@ -5731,7 +5754,7 @@ true</pre>
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="5"/>
+      <name name="statistics" arity="1" clause_i="6"/>
       <fsummary>Information about reductions.</fsummary>
       <desc>
         <marker id="statistics_reductions"></marker>
@@ -5749,16 +5772,43 @@ true</pre>
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="6"/>
-      <fsummary>Information about the run-queue.</fsummary>
-      <desc>
-        <p>Returns the total length of run-queues, that is, the number
-          of processes that are ready to run on all available run-queues.</p>
+      <name name="statistics" arity="1" clause_i="7"/>
+      <fsummary>Information about the run-queues.</fsummary>
+      <desc><marker id="statistics_run_queue"></marker>
+        <p>
+	  Returns the total length of the run-queues. That is, the number
+          of processes and ports that are ready to run on all available
+	  run-queues. The information is gathered atomically. That
+	  is, the result is a consistent snapshot of the state, but
+	  this operation is much more expensive compared to
+	  <seealso marker="#statistics_total_run_queue_lengths"><c>statistics(total_run_queue_lengths)</c></seealso>.
+	  This especially when a large amount of schedulers is used.
+	</p>
       </desc>
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="7"/>
+      <name name="statistics" arity="1" clause_i="8"/>
+      <fsummary>Information about the run-queue lengths.</fsummary>
+      <desc><marker id="statistics_run_queue_lengths"></marker>
+	<p>
+	  Returns a list where each element represents the amount
+	  of processes and ports ready to run for each run queue. The
+	  element location in the list corresponds to the run queue
+	  of a scheduler. The first element corresponds to the run
+	  queue of scheduler number 1 and so on. The information is
+	  <em>not</em> gathered atomically. That is, the result is
+	  not necessarily a consistent snapshot of the state, but
+	  instead quite efficiently gathered. See also,
+	  <seealso marker="#statistics_total_run_queue_lengths"><c>statistics(total_run_queue_lengths)</c></seealso>,
+	  <seealso marker="#statistics_active_tasks"><c>statistics(active_tasks)</c></seealso>, and
+	  <seealso marker="#statistics_total_active_tasks"><c>statistics(total_active_tasks)</c></seealso>.
+	</p>
+      </desc>
+    </func>
+
+    <func>
+      <name name="statistics" arity="1" clause_i="9"/>
       <fsummary>Information about runtime.</fsummary>
       <desc>
         <p>Returns information about runtime, in milliseconds.</p>
@@ -5773,7 +5823,7 @@ true</pre>
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="8"/>
+      <name name="statistics" arity="1" clause_i="10"/>
       <fsummary>Information about each schedulers work time.</fsummary>
       <desc>
       <marker id="statistics_scheduler_wall_time"></marker>
@@ -5844,7 +5894,44 @@ ok
     </func>
 
     <func>
-      <name name="statistics" arity="1" clause_i="9"/>
+      <name name="statistics" arity="1" clause_i="11"/>
+      <fsummary>Information about active processes and ports.</fsummary>
+      <desc><marker id="statistics_total_active_tasks"></marker>
+	<p>
+	  Returns the total amount of active processes and ports in
+	  the system. That is, the number of processes and ports that
+	  are ready to run, or are currently running. The information
+	  is <em>not</em> gathered atomically. That is, the result
+	  is not necessarily a consistent snapshot of the state, but
+	  instead quite efficiently gathered. See also,
+	  <seealso marker="#statistics_active_tasks"><c>statistics(active_tasks)</c></seealso>,
+	  <seealso marker="#statistics_run_queue_lengths"><c>statistics(run_queue_lengths)</c></seealso>, and
+	  <seealso marker="#statistics_total_run_queue_lengths"><c>statistics(total_run_queue_lengths)</c></seealso>.
+	</p>
+      </desc>
+    </func>
+
+    <func>
+      <name name="statistics" arity="1" clause_i="12"/>
+      <fsummary>Information about the run-queue lengths.</fsummary>
+      <desc><marker id="statistics_total_run_queue_lengths"></marker>
+        <p>
+	  Returns the total length of the run-queues. That is, the number
+          of processes and ports that are ready to run on all available
+	  run-queues. The information is <em>not</em> gathered atomically.
+	  That is, the result is not necessarily a consistent snapshot of
+	  the state, but much more efficiently gathered compared to
+	  <seealso marker="#statistics_run_queue"><c>statistics(run_queue)</c></seealso>.
+	  See also,
+	  <seealso marker="#statistics_run_queue_lengths"><c>statistics(run_queue_lengths)</c></seealso>,
+	  <seealso marker="#statistics_total_active_tasks"><c>statistics(total_active_tasks)</c></seealso>, and
+	  <seealso marker="#statistics_active_tasks"><c>statistics(active_tasks)</c></seealso>.
+	</p>
+      </desc>
+    </func>
+
+    <func>
+      <name name="statistics" arity="1" clause_i="13"/>
       <fsummary>Information about wall clock.</fsummary>
       <desc>
         <p>Returns information about wall clock. <c>wall_clock</c> can
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 190e7817dc..fb3368eae2 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -71,6 +71,7 @@ atom absoluteURI
 atom ac
 atom accessor
 atom active
+atom active_tasks
 atom all
 atom all_but_first
 atom all_names
@@ -512,6 +513,7 @@ atom return_from
 atom return_to
 atom return_trace
 atom run_queue
+atom run_queue_lengths
 atom runnable
 atom runnable_ports
 atom runnable_procs
@@ -579,7 +581,9 @@ atom timeout_value
 atom Times='*'
 atom timestamp
 atom total
+atom total_active_tasks
 atom total_heap_size
+atom total_run_queue_lengths
 atom tpkt
 atom trace trace_ts traced 
 atom trace_control_word
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index b44382cde8..414ff6711a 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3234,6 +3234,39 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
 	if (is_non_value(res))
 	    BIF_RET(am_undefined);
 	BIF_TRAP1(gather_sched_wall_time_res_trap, BIF_P, res);
+    } else if (BIF_ARG_1 == am_total_active_tasks
+	       || BIF_ARG_1 == am_total_run_queue_lengths) {
+	Uint no = erts_run_queues_len(NULL, 0, BIF_ARG_1 == am_total_active_tasks);
+	if (IS_USMALL(0, no))
+	    res = make_small(no);
+	else {
+	    Eterm *hp = HAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
+	    res = uint_to_big(no, hp);
+	}
+	BIF_RET(res);
+    } else if (BIF_ARG_1 == am_active_tasks
+	       || BIF_ARG_1 == am_run_queue_lengths) {
+	Eterm res, *hp, **hpp;
+	Uint sz, *szp;
+	int no_qs = erts_no_run_queues;
+	Uint *qszs = erts_alloc(ERTS_ALC_T_TMP,sizeof(Uint)*no_qs*2);
+	(void) erts_run_queues_len(qszs, 0, BIF_ARG_1 == am_active_tasks);
+	sz = 0;
+	szp = &sz;
+	hpp = NULL;
+	while (1) {
+	    int i;
+	    for (i = 0; i < no_qs; i++)
+		qszs[no_qs+i] = erts_bld_uint(hpp, szp, qszs[i]);
+	    res = erts_bld_list(hpp, szp, no_qs, &qszs[no_qs]);
+	    if (hpp) {
+		erts_free(ERTS_ALC_T_TMP, qszs);
+		BIF_RET(res);
+	    }
+	    hp = HAlloc(BIF_P, sz);
+	    szp = NULL;
+	    hpp = &hp;
+	}
     } else if (BIF_ARG_1 == am_context_switches) {
 	Eterm cs = erts_make_integer(erts_get_total_context_switches(), BIF_P);
 	hp = HAlloc(BIF_P, 3);
@@ -3282,7 +3315,7 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
 	res = TUPLE2(hp, b1, b2);
 	BIF_RET(res);
     } else if (BIF_ARG_1 ==  am_run_queue) {
-	res = erts_run_queues_len(NULL);
+	res = erts_run_queues_len(NULL, 1, 0);
 	BIF_RET(make_small(res));
     } else if (BIF_ARG_1 == am_wall_clock) {
 	UWord w1, w2;
@@ -3302,7 +3335,7 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
 	Uint sz, *szp;
 	int no_qs = erts_no_run_queues;
 	Uint *qszs = erts_alloc(ERTS_ALC_T_TMP,sizeof(Uint)*no_qs*2);
-	(void) erts_run_queues_len(qszs);
+	(void) erts_run_queues_len(qszs, 0, 0);
 	sz = 0;
 	szp = &sz;
 	hpp = NULL;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index d583118e7b..b47aae0f74 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -3149,7 +3149,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 	    }
 
 #ifndef ERTS_SMP
-	    if (rq->len != 0 || rq->misc.start)
+	    if (erts_smp_atomic32_read_dirty(&rq->len) != 0 || rq->misc.start)
 		goto sys_woken;
 #else
 	    flgs = erts_smp_atomic32_read_acqb(&ssi->flags);
@@ -3248,7 +3248,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 	}
 
 #ifndef ERTS_SMP
-	if (rq->len == 0 && !rq->misc.start)
+	if (erts_smp_atomic32_read_dirty(&rq->len) == 0 && !rq->misc.start)
 	    goto sys_aux_work;
     sys_woken:
 #else
@@ -4965,7 +4965,7 @@ erts_fprintf(stderr, "--------------------------------\n");
 	rq->out_of_work_count = 0;
 	(void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags);
 
-	rq->max_len = rq->len;
+	rq->max_len = erts_smp_atomic32_read_dirty(&rq->len);
 	for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) {
 	    ErtsRunQueueInfo *rqi;
 	    rqi = (pix == ERTS_PORT_PRIO_LEVEL
@@ -5123,7 +5123,7 @@ wakeup_other_check(ErtsRunQueue *rq, Uint32 flags)
 {
     int wo_reds = rq->wakeup_other_reds;
     if (wo_reds) {
-	int left_len = rq->len - 1;
+	int left_len = erts_smp_atomic32_read_dirty(&rq->len) - 1;
 	if (left_len < 1) {
 	    int wo_reduce = wo_reds << wakeup_other.dec_shift;
 	    wo_reduce &= wakeup_other.dec_mask;
@@ -5196,7 +5196,7 @@ wakeup_other_check_legacy(ErtsRunQueue *rq, Uint32 flags)
 {
     int wo_reds = rq->wakeup_other_reds;
     if (wo_reds) {
-	erts_aint32_t len = rq->len;
+	erts_aint32_t len = erts_smp_atomic32_read_dirty(&rq->len);
 	if (len < 2) {
 	    rq->wakeup_other -= ERTS_WAKEUP_OTHER_DEC_LEGACY*wo_reds;
 	    if (rq->wakeup_other < 0)
@@ -5292,7 +5292,7 @@ runq_supervisor(void *unused)
 	    ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
 	    if (ERTS_RUNQ_FLGS_GET(rq) & ERTS_RUNQ_FLG_NONEMPTY) {
 		erts_smp_runq_lock(rq);
-		if (rq->len != 0)
+		if (erts_smp_atomic32_read_dirty(&rq->len) != 0)
 		    wake_scheduler_on_empty_runq(rq); /* forced wakeup... */
 		erts_smp_runq_unlock(rq);
 	    }
@@ -5642,7 +5642,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
 	}
 	rq->out_of_work_count = 0;
 	rq->max_len = 0;
-	rq->len = 0;
+	erts_smp_atomic32_set_nob(&rq->len, 0);
 	rq->wakeup_other = 0;
 	rq->wakeup_other_reds = 0;
 	rq->halt_in_progress = 0;
@@ -7939,6 +7939,9 @@ sched_thread_func(void *vesdp)
 
     erts_sched_init_time_sup(esdp);
 
+    (void) ERTS_RUNQ_FLGS_SET_NOB(esdp->run_queue,
+				  ERTS_RUNQ_FLG_EXEC);
+
 #ifdef ERTS_SMP
     tse = erts_tse_fetch();
     erts_tse_prepare_timed(tse);
@@ -8947,24 +8950,39 @@ resume_process_1(BIF_ALIST_1)
 }
 
 Uint
-erts_run_queues_len(Uint *qlen)
+erts_run_queues_len(Uint *qlen, int atomic_queues_read, int incl_active_sched)
 {
     int i = 0;
     Uint len = 0;
-    ERTS_ATOMIC_FOREACH_RUNQ(rq,
-    {
-	Sint pqlen = 0;
-	int pix;
-	for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++)
-	    pqlen += RUNQ_READ_LEN(&rq->procs.prio_info[pix].len);
+    if (atomic_queues_read)
+	ERTS_ATOMIC_FOREACH_RUNQ(rq,
+	 {
+	     Sint rq_len = (Sint) erts_smp_atomic32_read_dirty(&rq->len);
+	     ASSERT(rq_len >= 0);
+	     if (incl_active_sched
+		 && (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC)) {
+		 rq_len++;
+	     }
+	     if (qlen)
+		 qlen[i++] = rq_len;
+	     len += (Uint) rq_len;
+	 }
+	    );
+    else {
+	for (i = 0; i < erts_no_run_queues; i++) {
+	    ErtsRunQueue *rq = ERTS_RUNQ_IX(i);
+	    Sint rq_len = (Sint) erts_smp_atomic32_read_nob(&rq->len);
+	    ASSERT(rq_len >= 0);
+	     if (incl_active_sched
+		 && (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC)) {
+		 rq_len++;
+	     }
+	    if (qlen)
+		qlen[i] = rq_len;
+	    len += (Uint) rq_len;
+	}
 
-	if (pqlen < 0)
-	    pqlen = 0;
- 	if (qlen)
-	    qlen[i++] = pqlen;
-	len += pqlen;
     }
-	);
     return len;
 }
 
@@ -9391,8 +9409,10 @@ Process *schedule(Process *p, int calls)
 
 	if (flags & (ERTS_RUNQ_FLG_CHK_CPU_BIND|ERTS_RUNQ_FLG_SUSPENDED)) {
 	    if (flags & ERTS_RUNQ_FLG_SUSPENDED) {
+		(void) ERTS_RUNQ_FLGS_UNSET_NOB(rq, ERTS_RUNQ_FLG_EXEC);
 		suspend_scheduler(esdp);
-		flags = ERTS_RUNQ_FLGS_GET_NOB(rq);
+		flags = ERTS_RUNQ_FLGS_SET_NOB(rq, ERTS_RUNQ_FLG_EXEC);
+		flags |= ERTS_RUNQ_FLG_EXEC;
 	    }
 	    if (flags & ERTS_RUNQ_FLG_CHK_CPU_BIND) {
 		flags = ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_CHK_CPU_BIND);
@@ -9483,7 +9503,10 @@ Process *schedule(Process *p, int calls)
 	    }
 #endif
 
+	    (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_EXEC);
 	    scheduler_wait(&fcalls, esdp, rq);
+	    flags = ERTS_RUNQ_FLGS_SET_NOB(rq, ERTS_RUNQ_FLG_EXEC);
+	    flags |= ERTS_RUNQ_FLG_EXEC;
 
 #ifdef ERTS_SMP
 	    non_empty_runq(rq);
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 10c6fa4a67..fd7d4183f4 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -170,8 +170,10 @@ extern int erts_sched_thread_suggested_stack_size;
   (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 5))
 #define ERTS_RUNQ_FLG_PROTECTED \
   (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 6))
+#define ERTS_RUNQ_FLG_EXEC \
+  (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 7))
 
-#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 7)
+#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 8)
 
 #define ERTS_RUNQ_FLGS_MIGRATION_QMASKS	\
   (ERTS_RUNQ_FLGS_EMIGRATE_QMASK	\
@@ -215,6 +217,9 @@ extern int erts_sched_thread_suggested_stack_size;
 #define ERTS_RUNQ_FLGS_SET(RQ, FLGS)					\
     ((Uint32) erts_smp_atomic32_read_bor_relb(&(RQ)->flags,		\
 					      (erts_aint32_t) (FLGS)))
+#define ERTS_RUNQ_FLGS_SET_NOB(RQ, FLGS)				\
+    ((Uint32) erts_smp_atomic32_read_bor_nob(&(RQ)->flags,		\
+					     (erts_aint32_t) (FLGS)))
 #define ERTS_RUNQ_FLGS_BSET(RQ, MSK, FLGS)				\
     ((Uint32) erts_smp_atomic32_read_bset_relb(&(RQ)->flags,		\
 					       (erts_aint32_t) (MSK),	\
@@ -222,6 +227,9 @@ extern int erts_sched_thread_suggested_stack_size;
 #define ERTS_RUNQ_FLGS_UNSET(RQ, FLGS)					\
     ((Uint32) erts_smp_atomic32_read_band_relb(&(RQ)->flags,		\
 					       (erts_aint32_t) ~(FLGS)))
+#define ERTS_RUNQ_FLGS_UNSET_NOB(RQ, FLGS)					\
+    ((Uint32) erts_smp_atomic32_read_band_nob(&(RQ)->flags,		\
+					      (erts_aint32_t) ~(FLGS)))
 #define ERTS_RUNQ_FLGS_GET(RQ)						\
     ((Uint32) erts_smp_atomic32_read_acqb(&(RQ)->flags))
 #define ERTS_RUNQ_FLGS_GET_NOB(RQ)					\
@@ -467,7 +475,7 @@ struct ErtsRunQueue_ {
     int full_reds_history[ERTS_FULL_REDS_HISTORY_SIZE];
     int out_of_work_count;
     erts_aint32_t max_len;
-    erts_aint32_t len;
+    erts_smp_atomic32_t len;
     int wakeup_other;
     int wakeup_other_reds;
     int halt_in_progress;
@@ -728,7 +736,19 @@ erts_smp_inc_runq_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi, int prio)
 
     ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
 
-    len = erts_smp_atomic32_read_nob(&rqi->len);
+    len = erts_smp_atomic32_read_dirty(&rq->len);
+
+#ifdef ERTS_SMP
+    if (len == 0)
+	erts_non_empty_runq(rq);
+#endif
+    len++;
+    if (rq->max_len < len)
+	rq->max_len = len;
+    ASSERT(len > 0);
+    erts_smp_atomic32_set_nob(&rq->len, len);
+
+    len = erts_smp_atomic32_read_dirty(&rqi->len);
     ASSERT(len >= 0);
     if (len == 0) {
 	ASSERT((erts_smp_atomic32_read_nob(&rq->flags)
@@ -741,15 +761,6 @@ erts_smp_inc_runq_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi, int prio)
 	rqi->max_len = len;
 
     erts_smp_atomic32_set_relb(&rqi->len, len);
-
-#ifdef ERTS_SMP
-    if (rq->len == 0)
-	erts_non_empty_runq(rq);
-#endif
-    rq->len++;
-    if (rq->max_len < rq->len)
-	rq->max_len = len;
-    ASSERT(rq->len > 0);
 }
 
 ERTS_GLB_INLINE void
@@ -759,7 +770,12 @@ erts_smp_dec_runq_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi, int prio)
 
     ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
 
-    len = erts_smp_atomic32_read_nob(&rqi->len);
+    len = erts_smp_atomic32_read_dirty(&rq->len);
+    len--;
+    ASSERT(len >= 0);
+    erts_smp_atomic32_set_nob(&rq->len, len);
+
+    len = erts_smp_atomic32_read_dirty(&rqi->len);
     len--;
     ASSERT(len >= 0);
     if (len == 0) {
@@ -770,8 +786,6 @@ erts_smp_dec_runq_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi, int prio)
     }
     erts_smp_atomic32_set_relb(&rqi->len, len);
 
-    rq->len--;
-    ASSERT(rq->len >= 0);
 }
 
 ERTS_GLB_INLINE void
@@ -781,7 +795,7 @@ erts_smp_reset_max_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi)
 
     ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
 
-    len = erts_smp_atomic32_read_nob(&rqi->len);
+    len = erts_smp_atomic32_read_dirty(&rqi->len);
     ASSERT(rqi->max_len >= len);
     rqi->max_len = len;
 }
@@ -1678,7 +1692,7 @@ void erts_sched_notify_check_cpu_bind(void);
 Uint erts_active_schedulers(void);
 void erts_init_process(int, int, int);
 Eterm erts_process_status(Process *, ErtsProcLocks, Process *, Eterm);
-Uint erts_run_queues_len(Uint *);
+Uint erts_run_queues_len(Uint *, int, int);
 void erts_add_to_runq(Process *);
 Eterm erts_bound_schedulers_term(Process *c_p);
 Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which);
diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl
index 56ecf4195a..53c9ba8715 100644
--- a/erts/emulator/test/statistics_SUITE.erl
+++ b/erts/emulator/test/statistics_SUITE.erl
@@ -32,7 +32,7 @@
 	 run_queue_one/1,
 	 scheduler_wall_time/1,
 	 reductions/1, reductions_big/1, garbage_collection/1, io/1,
-	 badarg/1]).
+	 badarg/1, run_queues_lengths_active_tasks/1]).
 
 %% Internal exports.
 
@@ -54,7 +54,8 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 all() -> 
     [{group, wall_clock}, {group, runtime}, reductions,
      reductions_big, {group, run_queue}, scheduler_wall_time,
-     garbage_collection, io, badarg].
+     garbage_collection, io, badarg,
+     run_queues_lengths_active_tasks].
 
 groups() -> 
     [{wall_clock, [],
@@ -409,3 +410,59 @@ badarg(Config) when is_list(Config) ->
     ?line case catch statistics(bad_atom) of
 	      {'EXIT', {badarg, _}} -> ok
 	  end.
+
+tok_loop() ->
+    tok_loop().
+
+run_queues_lengths_active_tasks(Config) ->
+    TokLoops = lists:map(fun (_) ->
+				 spawn_opt(fun () ->
+						   tok_loop()
+					   end,
+					   [link, {priority, low}])
+			 end,
+			 lists:seq(1,10)),
+
+    TRQLs0 = statistics(total_run_queue_lengths),
+    TATs0 = statistics(total_active_tasks),
+    true = is_integer(TRQLs0),
+    true = is_integer(TATs0),
+    true = TRQLs0 >= 0,
+    true = TATs0 >= 11,
+
+    NoScheds = erlang:system_info(schedulers),
+    RQLs0 = statistics(run_queue_lengths),
+    ATs0 = statistics(active_tasks),
+    NoScheds = length(RQLs0),
+    NoScheds = length(ATs0),
+    true = lists:sum(RQLs0) >= 0,
+    true = lists:sum(ATs0) >= 11,
+
+    SO = erlang:system_flag(schedulers_online, 1),
+
+    TRQLs1 = statistics(total_run_queue_lengths),
+    TATs1 = statistics(total_active_tasks),
+    true = TRQLs1 >= 10,
+    true = TATs1 >= 11,
+    NoScheds = erlang:system_info(schedulers),
+
+    RQLs1 = statistics(run_queue_lengths),
+    ATs1 = statistics(active_tasks),
+    NoScheds = length(RQLs1),
+    NoScheds = length(ATs1),
+    TRQLs2 = lists:sum(RQLs1),
+    TATs2 = lists:sum(ATs1),
+    true = TRQLs2 >= 10,
+    true = TATs2 >= 11,
+    [TRQLs2|_] = RQLs1,
+    [TATs2|_] = ATs1,
+
+    erlang:system_flag(schedulers_online, SO),
+
+    lists:foreach(fun (P) ->
+			  unlink(P),
+			  exit(P, bang)
+		  end,
+		  TokLoops),
+
+    ok.
diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam
index cd2e7f18a2..58516c0ff3 100644
Binary files a/erts/preloaded/ebin/erlang.beam and b/erts/preloaded/ebin/erlang.beam differ
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index 7280b43502..5fc6d14938 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -2205,7 +2205,9 @@ setelement(_Index, _Tuple1, _Value) ->
 spawn_opt(_Tuple) ->
    erlang:nif_error(undefined).
 
--spec statistics(context_switches) -> {ContextSwitches,0} when
+-spec statistics(active_tasks) -> [ActiveTasks] when
+      ActiveTasks :: non_neg_integer();
+		(context_switches) -> {ContextSwitches,0} when
       ContextSwitches :: non_neg_integer();
                 (exact_reductions) -> {Total_Exact_Reductions,
                                        Exact_Reductions_Since_Last_Call} when
@@ -2222,6 +2224,8 @@ spawn_opt(_Tuple) ->
       Total_Reductions :: non_neg_integer(),
       Reductions_Since_Last_Call :: non_neg_integer();
                 (run_queue) -> non_neg_integer();
+                (run_queue_lengths) -> [RunQueueLenght] when
+      RunQueueLenght :: non_neg_integer();
                 (runtime) -> {Total_Run_Time, Time_Since_Last_Call} when
       Total_Run_Time :: non_neg_integer(),
       Time_Since_Last_Call :: non_neg_integer();
@@ -2229,6 +2233,10 @@ spawn_opt(_Tuple) ->
       SchedulerId :: pos_integer(),
       ActiveTime  :: non_neg_integer(),
       TotalTime   :: non_neg_integer();
+		(total_active_tasks) -> ActiveTasks when
+      ActiveTasks :: non_neg_integer();
+                (total_run_queue_lengths) -> TotalRunQueueLenghts when
+      TotalRunQueueLenghts :: non_neg_integer();
                 (wall_clock) -> {Total_Wallclock_Time,
                                  Wallclock_Time_Since_Last_Call} when
       Total_Wallclock_Time :: non_neg_integer(),
-- 
cgit v1.2.3


From 89098ea8beb9e60faa59c3f2ea9ffc918ef87ae8 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Fri, 22 Jan 2016 10:24:13 +0100
Subject: Fix testcase

---
 erts/emulator/test/statistics_SUITE.erl | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'erts')

diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl
index 53c9ba8715..a6305d453c 100644
--- a/erts/emulator/test/statistics_SUITE.erl
+++ b/erts/emulator/test/statistics_SUITE.erl
@@ -440,6 +440,10 @@ run_queues_lengths_active_tasks(Config) ->
 
     SO = erlang:system_flag(schedulers_online, 1),
 
+    %% Give newly suspended schedulers some time to
+    %% migrate away work from their run queues...
+    receive after 1000 -> ok end,
+
     TRQLs1 = statistics(total_run_queue_lengths),
     TATs1 = statistics(total_active_tasks),
     true = TRQLs1 >= 10,
-- 
cgit v1.2.3