aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2017-01-04 18:10:26 +0100
committerRickard Green <[email protected]>2017-01-17 12:23:19 +0100
commitf9459092940943876dff040ee997515b96fd5d50 (patch)
tree075fc1e01f6cc1cd1e398b9f52a0ee0f617065e4
parentfd1fb9070333fe5b91f4528841a4b0fe71bbf096 (diff)
downloadotp-f9459092940943876dff040ee997515b96fd5d50.tar.gz
otp-f9459092940943876dff040ee997515b96fd5d50.tar.bz2
otp-f9459092940943876dff040ee997515b96fd5d50.zip
Scheduler wall time support for dirty schedulers
-rw-r--r--erts/doc/src/erlang.xml97
-rw-r--r--erts/emulator/beam/atom.names3
-rw-r--r--erts/emulator/beam/beam_debug.c13
-rw-r--r--erts/emulator/beam/bif.c2
-rw-r--r--erts/emulator/beam/erl_bif_info.c7
-rw-r--r--erts/emulator/beam/erl_process.c366
-rw-r--r--erts/emulator/beam/erl_process.h9
-rw-r--r--erts/emulator/test/statistics_SUITE.erl131
-rw-r--r--erts/preloaded/ebin/erlang.beambin105976 -> 106168 bytes
-rw-r--r--erts/preloaded/src/erlang.erl5
10 files changed, 535 insertions, 98 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index b3fab3874b..1f64d7be86 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -6415,12 +6415,17 @@ lists:map(
<c><anno>TotalTime</anno></c> is the total time duration since
<seealso marker="#system_flag_scheduler_wall_time">
<c>scheduler_wall_time</c></seealso>
- activation. The time unit is undefined and can be subject
- to change between releases, OSs, and system restarts.
- <c>scheduler_wall_time</c> is only to be used to
- calculate relative values for scheduler-utilization.
- <c><anno>ActiveTime</anno></c> can never exceed
- <c><anno>TotalTime</anno></c>.</p>
+ activation for the specific scheduler. Note that
+ activation time can differ significantly between
+ schedulers. Currently dirty schedulers are activated
+ at system start while normal schedulers are activated
+ some time after the <c>scheduler_wall_time</c>
+ functionality is enabled. The time unit is undefined
+ and can be subject to change between releases, OSs,
+ and system restarts. <c>scheduler_wall_time</c> is only
+ to be used to calculate relative values for scheduler
+ utilization. <c><anno>ActiveTime</anno></c> can never
+ exceed <c><anno>TotalTime</anno></c>.</p>
<p>The definition of a busy scheduler is when it is not idle
and is not scheduling (selecting) a process or port,
that is:</p>
@@ -6438,15 +6443,37 @@ lists:map(
<c>scheduler_wall_time</c></seealso> is turned off.</p>
<p>The list of scheduler information is unsorted and can
appear in different order between calls.</p>
+ <p>As of ERTS version 9.0, also dirty CPU schedulers will
+ be included in the result. That is, all scheduler threads
+ that are expected to handle CPU bound work. If you also
+ want information about dirty I/O schedulers, use
+ <seealso marker="#statistics_scheduler_wall_time_all"><c>statistics(scheduler_wall_time_all)</c></seealso>
+ instead.</p>
+
+ <p>Normal schedulers will have scheduler identifiers in
+ the range <c>1 =&lt; <anno>SchedulerId</anno> =&lt;
+ </c><seealso marker="#system_info_schedulers"><c>erlang:system_info(schedulers)</c></seealso>.
+ Dirty CPU schedulers will have scheduler identifiers in
+ the range <c>erlang:system_info(schedulers) &lt;
+ <anno>SchedulerId</anno> =&lt; erlang:system_info(schedulers)
+ +
+ </c><seealso marker="#system_info_dirty_cpu_schedulers"><c>erlang:system_info(dirty_cpu_schedulers)</c></seealso>.
+ </p>
+ <note><p>The different types of schedulers handle
+ specific types of jobs. Every job is assigned to a specific
+ scheduler type. Jobs can migrate between different schedulers
+ of the same type, but never between schedulers of different
+ types. This fact has to be taken under consideration when
+ evaluating the result returned.</p></note>
<p>Using <c>scheduler_wall_time</c> to calculate
- scheduler-utilization:</p>
+ scheduler utilization:</p>
<pre>
> <input>erlang:system_flag(scheduler_wall_time, true).</input>
false
> <input>Ts0 = lists:sort(erlang:statistics(scheduler_wall_time)), ok.</input>
ok</pre>
<p>Some time later the user takes another snapshot and calculates
- scheduler-utilization per scheduler, for example:</p>
+ scheduler utilization per scheduler, for example:</p>
<pre>
> <input>Ts1 = lists:sort(erlang:statistics(scheduler_wall_time)), ok.</input>
ok
@@ -6461,11 +6488,32 @@ ok
{7,0.973237033077876},
{8,0.9741297293248656}]</pre>
<p>Using the same snapshots to calculate a total
- scheduler-utilization:</p>
+ scheduler utilization:</p>
<pre>
> <input>{A, T} = lists:foldl(fun({{_, A0, T0}, {_, A1, T1}}, {Ai,Ti}) ->
- {Ai + (A1 - A0), Ti + (T1 - T0)} end, {0, 0}, lists:zip(Ts0,Ts1)), A/T.</input>
+ {Ai + (A1 - A0), Ti + (T1 - T0)} end, {0, 0}, lists:zip(Ts0,Ts1)),
+ TotalSchedulerUtilization = A/T.</input>
+0.9769136803764825</pre>
+ <p>Total scheduler utilization will equal <c>1.0</c> when
+ all schedulers have been active all the time between the
+ two measurements.</p>
+ <p>Another (probably more) useful value is to calculate
+ total scheduler utilization weighted against maximum amount
+ of available CPU time:</p>
+ <pre>
+> <input>WeightedSchedulerUtilization = (TotalSchedulerUtilization
+ * (erlang:system_info(schedulers)
+ + erlang:system_info(dirty_cpu_schedulers)))
+ / erlang:system_info(logical_processors_available).</input>
0.9769136803764825</pre>
+ <p>This weighted scheduler utilization will reach <c>1.0</c>
+ when schedulers are active the same amount of time as
+ maximum available CPU time. If more schedulers exist
+ than available logical processors, this value may
+ be greater than <c>1.0</c>.</p>
+ <p>As of ERTS version 9.0, the Erlang runtime system
+ with SMP support will as default have more schedulers
+ than logical processors. This due to the dirty schedulers.</p>
<note>
<p><c>scheduler_wall_time</c> is by default disabled. To
enable it, use
@@ -6476,6 +6524,31 @@ ok
<func>
<name name="statistics" arity="1" clause_i="12"/>
+ <fsummary>Information about each schedulers work time.</fsummary>
+ <desc>
+ <marker id="statistics_scheduler_wall_time_all"></marker>
+ <p>The same as
+ <seealso marker="#statistics_scheduler_wall_time"><c>statistics(scheduler_wall_time)</c></seealso>,
+ except that it also include information about all dirty I/O
+ schedulers.</p>
+ <p>Dirty IO schedulers will have scheduler identifiers in
+ the range
+ <seealso marker="#system_info_schedulers"><c>erlang:system_info(schedulers)</c></seealso><c>
+ +
+ </c><seealso marker="#system_info_dirty_cpu_schedulers"><c>erlang:system_info(dirty_cpu_schedulers)</c></seealso><c> &lt;
+ <anno>SchedulerId</anno> =&lt; erlang:system_info(schedulers)
+ + erlang:system_info(dirty_cpu_schedulers)
+ +
+ </c><seealso marker="#system_info_dirty_io_schedulers"><c>erlang:system_info(dirty_io_schedulers)</c></seealso>.</p>
+ <note><p>Note that work executing on dirty I/O schedulers
+ are expected to mainly wait for I/O. That is, when you
+ get high scheduler utilization on dirty I/O schedulers,
+ CPU utilization is <em>not</em> expected to be high due to
+ this work.</p></note>
+ </desc>
+ </func>
+ <func>
+ <name name="statistics" arity="1" clause_i="13"/>
<fsummary>Information about active processes and ports.</fsummary>
<desc><marker id="statistics_total_active_tasks"></marker>
<p>Returns the total amount of active processes and ports in
@@ -6495,7 +6568,7 @@ ok
</func>
<func>
- <name name="statistics" arity="1" clause_i="13"/>
+ <name name="statistics" arity="1" clause_i="14"/>
<fsummary>Information about the run-queue lengths.</fsummary>
<desc><marker id="statistics_total_run_queue_lengths"></marker>
<p>Returns the total length of the run queues. That is, the number
@@ -6515,7 +6588,7 @@ ok
</func>
<func>
- <name name="statistics" arity="1" clause_i="14"/>
+ <name name="statistics" arity="1" clause_i="15"/>
<fsummary>Information about wall clock.</fsummary>
<desc>
<p>Returns information about wall clock. <c>wall_clock</c> can
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 7df350116a..dd0a42b5ba 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -574,6 +574,8 @@ atom safe
atom save_calls
atom scheduler
atom scheduler_id
+atom scheduler_wall_time
+atom scheduler_wall_time_all
atom schedulers_online
atom scheme
atom scientific
@@ -678,7 +680,6 @@ atom waiting
atom wall_clock
atom warning
atom warning_msg
-atom scheduler_wall_time
atom wordsize
atom write_concurrency
atom xor
diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c
index 3ffbb0364c..8326d348af 100644
--- a/erts/emulator/beam/beam_debug.c
+++ b/erts/emulator/beam/beam_debug.c
@@ -994,13 +994,20 @@ dirty_test(Process *c_p, Eterm type, Eterm arg1, Eterm arg2, UWord *I)
Eterm *hp, *hp2;
Uint sz;
int i;
+ ErtsSchedulerData *esdp = erts_proc_sched_data(c_p);
+ int dirty_io = esdp->type == ERTS_SCHED_DIRTY_IO;
+
if (ERTS_PROC_IS_EXITING(real_c_p))
goto badarg;
dirty_send_message(c_p, arg2, am_alive);
- /* Wait until dead */
- while (!ERTS_PROC_IS_EXITING(real_c_p))
- erts_thr_yield();
+ /* Wait until dead */
+ while (!ERTS_PROC_IS_EXITING(real_c_p)) {
+ if (dirty_io)
+ ms_wait(c_p, make_small(100), 0);
+ else
+ erts_thr_yield();
+ }
ms_wait(c_p, make_small(1000), 0);
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index a5b66db35c..65c370c55b 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -4601,7 +4601,7 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2)
erts_aint32_t new = BIF_ARG_2 == am_true ? 1 : 0;
erts_aint32_t old = erts_smp_atomic32_xchg_nob(&sched_wall_time,
new);
- Eterm ref = erts_sched_wall_time_request(BIF_P, 1, new);
+ Eterm ref = erts_sched_wall_time_request(BIF_P, 1, new, 0, 0);
ASSERT(is_value(ref));
BIF_TRAP2(await_sched_wall_time_mod_trap,
BIF_P,
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 5f2122b969..5ee19aead8 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3385,7 +3385,12 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
Eterm* hp;
if (BIF_ARG_1 == am_scheduler_wall_time) {
- res = erts_sched_wall_time_request(BIF_P, 0, 0);
+ res = erts_sched_wall_time_request(BIF_P, 0, 0, 1, 0);
+ if (is_non_value(res))
+ BIF_RET(am_undefined);
+ BIF_TRAP1(gather_sched_wall_time_res_trap, BIF_P, res);
+ } else if (BIF_ARG_1 == am_scheduler_wall_time_all) {
+ res = erts_sched_wall_time_request(BIF_P, 0, 0, 1, 1);
if (is_non_value(res))
BIF_RET(am_undefined);
BIF_TRAP1(gather_sched_wall_time_res_trap, BIF_P, res);
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 084ecb14ec..8c9aa45a81 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -804,15 +804,28 @@ erts_late_init_process(void)
}
+#define ERTS_SCHED_WTIME_IDLE ~((Uint64) 0)
+
static void
-init_sched_wall_time(ErtsSchedWallTime *swtp)
+init_sched_wall_time(ErtsSchedulerData *esdp, Uint64 time_stamp)
{
- swtp->need = erts_sched_balance_util;
- swtp->enabled = 0;
- swtp->start = 0;
- swtp->working.total = 0;
- swtp->working.start = 0;
- swtp->working.currently = 0;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (esdp->type != ERTS_SCHED_NORMAL) {
+ erts_atomic32_init_nob(&esdp->sched_wall_time.u.mod, 0);
+ esdp->sched_wall_time.enabled = 1;
+ esdp->sched_wall_time.start = time_stamp;
+ esdp->sched_wall_time.working.total = 0;
+ esdp->sched_wall_time.working.start = ERTS_SCHED_WTIME_IDLE;
+ }
+ else
+#endif
+ {
+ esdp->sched_wall_time.u.need = erts_sched_balance_util;
+ esdp->sched_wall_time.enabled = 0;
+ esdp->sched_wall_time.start = 0;
+ esdp->sched_wall_time.working.total = 0;
+ esdp->sched_wall_time.working.start = 0;
+ }
}
static ERTS_INLINE Uint64
@@ -1003,31 +1016,145 @@ init_runq_sched_util(ErtsRunQueueSchedUtil *rqsu, int enabled)
#endif /* ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT */
-static ERTS_INLINE void
+#ifdef ERTS_DIRTY_SCHEDULERS
+
+typedef struct {
+ Uint64 working;
+ Uint64 total;
+} ErtsDirtySchedWallTime;
+
+static void
+read_dirty_sched_wall_time(ErtsSchedulerData *esdp, ErtsDirtySchedWallTime *info)
+{
+ erts_aint32_t mod1;
+ Uint64 working, start, ts;
+
+ mod1 = erts_atomic32_read_nob(&esdp->sched_wall_time.u.mod);
+
+ while (1) {
+ erts_aint32_t mod2;
+
+ /* Spin until values are not written... */
+ while (1) {
+ if ((mod1 & 1) == 0)
+ break;
+ ERTS_SPIN_BODY;
+ mod1 = erts_atomic32_read_nob(&esdp->sched_wall_time.u.mod);
+ }
+
+ ERTS_THR_READ_MEMORY_BARRIER;
+
+ working = esdp->sched_wall_time.working.total;
+ start = esdp->sched_wall_time.working.start;
+
+ ERTS_THR_READ_MEMORY_BARRIER;
+
+ mod2 = erts_atomic32_read_nob(&esdp->sched_wall_time.u.mod);
+ if (mod1 == mod2)
+ break;
+ mod1 = mod2;
+ }
+
+ ts = sched_wall_time_ts();
+ ts -= esdp->sched_wall_time.start;
+
+ info->total = ts;
+
+ if (start == ERTS_SCHED_WTIME_IDLE || ts < start)
+ info->working = working;
+ else
+ info->working = working + (ts - start);
+
+ if (info->working > info->total)
+ info->working = info->total;
+}
+
+#endif
+
+#ifdef ERTS_SMP
+
+static void
+dirty_sched_wall_time_change(ErtsSchedulerData *esdp, int working)
+{
+ erts_aint32_t mod;
+ Uint64 ts = sched_wall_time_ts();
+
+ ts -= esdp->sched_wall_time.start;
+
+ /*
+ * This thread is the only thread writing in
+ * this sched_wall_time struct. We set 'mod' to
+ * an odd value while writing...
+ */
+ mod = erts_atomic32_read_dirty(&esdp->sched_wall_time.u.mod);
+ ASSERT((mod & 1) == 0);
+ mod++;
+
+ erts_atomic32_set_nob(&esdp->sched_wall_time.u.mod, mod);
+ ERTS_THR_WRITE_MEMORY_BARRIER;
+
+ if (working) {
+ ASSERT(esdp->sched_wall_time.working.start
+ == ERTS_SCHED_WTIME_IDLE);
+
+ esdp->sched_wall_time.working.start = ts;
+
+ }
+ else {
+ Uint64 total;
+
+ ASSERT(esdp->sched_wall_time.working.start
+ != ERTS_SCHED_WTIME_IDLE);
+
+ total = esdp->sched_wall_time.working.total;
+ total += ts - esdp->sched_wall_time.working.start;
+
+ esdp->sched_wall_time.working.total = total;
+ esdp->sched_wall_time.working.start = ERTS_SCHED_WTIME_IDLE;
+
+
+ }
+
+ ERTS_THR_WRITE_MEMORY_BARRIER;
+ mod++;
+ erts_atomic32_set_nob(&esdp->sched_wall_time.u.mod, mod);
+
+#if 0
+ if (!working) {
+ ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_BUSY_WAIT);
+ } else {
+ ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_OTHER);
+ }
+#endif
+}
+
+#endif /* ERTS_SMP */
+
+static void
sched_wall_time_change(ErtsSchedulerData *esdp, int working)
{
- if (esdp->sched_wall_time.need) {
+ if (esdp->sched_wall_time.u.need) {
Uint64 ts = sched_wall_time_ts();
#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT
- update_avg_sched_util(esdp, ts, working);
+ update_avg_sched_util(esdp, ts, working);
#endif
if (esdp->sched_wall_time.enabled) {
if (working) {
-#ifdef DEBUG
- ASSERT(!esdp->sched_wall_time.working.currently);
- esdp->sched_wall_time.working.currently = 1;
-#endif
+ ASSERT(esdp->sched_wall_time.working.start
+ == ERTS_SCHED_WTIME_IDLE);
ts -= esdp->sched_wall_time.start;
esdp->sched_wall_time.working.start = ts;
}
else {
-#ifdef DEBUG
- ASSERT(esdp->sched_wall_time.working.currently);
- esdp->sched_wall_time.working.currently = 0;
-#endif
+ ASSERT(esdp->sched_wall_time.working.start
+ != ERTS_SCHED_WTIME_IDLE);
ts -= esdp->sched_wall_time.start;
ts -= esdp->sched_wall_time.working.start;
esdp->sched_wall_time.working.total += ts;
+#ifdef DEBUG
+ esdp->sched_wall_time.working.start
+ = ERTS_SCHED_WTIME_IDLE;
+#endif
}
}
}
@@ -1047,6 +1174,10 @@ typedef struct {
Eterm ref_heap[REF_THING_SIZE];
Uint req_sched;
erts_smp_atomic32_t refc;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ int want_dirty_cpu;
+ int want_dirty_io;
+#endif
} ErtsSchedWallTimeReq;
typedef struct {
@@ -1068,6 +1199,7 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(screq,
5,
ERTS_ALC_T_SYS_CHECK_REQ)
+
static void
reply_sched_wall_time(void *vswtrp)
{
@@ -1091,23 +1223,23 @@ reply_sched_wall_time(void *vswtrp)
#endif
if (swtrp->set) {
if (!swtrp->enable && esdp->sched_wall_time.enabled) {
- esdp->sched_wall_time.need = erts_sched_balance_util;
+ esdp->sched_wall_time.u.need = erts_sched_balance_util;
esdp->sched_wall_time.enabled = 0;
}
else if (swtrp->enable && !esdp->sched_wall_time.enabled) {
Uint64 ts = sched_wall_time_ts();
- esdp->sched_wall_time.need = 1;
+ esdp->sched_wall_time.u.need = 1;
esdp->sched_wall_time.enabled = 1;
esdp->sched_wall_time.start = ts;
esdp->sched_wall_time.working.total = 0;
esdp->sched_wall_time.working.start = 0;
- esdp->sched_wall_time.working.currently = 1;
}
}
if (esdp->sched_wall_time.enabled) {
Uint64 ts = sched_wall_time_ts();
- ASSERT(esdp->sched_wall_time.working.currently);
+ ASSERT(esdp->sched_wall_time.working.start
+ != ERTS_SCHED_WTIME_IDLE);
ts -= esdp->sched_wall_time.start;
total = ts;
ts -= esdp->sched_wall_time.working.start;
@@ -1118,30 +1250,117 @@ reply_sched_wall_time(void *vswtrp)
hpp = NULL;
szp = &sz;
- while (1) {
- if (hpp)
- ref_copy = STORE_NC(hpp, ohp, swtrp->ref);
- else
- *szp += REF_THING_SIZE;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (esdp->sched_wall_time.enabled
+ && swtrp->req_sched == esdp->no
+ && (swtrp->want_dirty_cpu || swtrp->want_dirty_io)) {
+ /* Reply with info about this scheduler and all dirty schedulers... */
+ ErtsDirtySchedWallTime *dswt;
+ int ix, no_dirty_scheds, want_dcpu, want_dio, soffset;
+
+ want_dcpu = swtrp->want_dirty_cpu;
+ want_dio = swtrp->want_dirty_io;
+
+ no_dirty_scheds = 0;
+ if (want_dcpu)
+ no_dirty_scheds += erts_no_dirty_cpu_schedulers;
+ if (want_dio)
+ no_dirty_scheds += erts_no_dirty_io_schedulers;
+
+ ASSERT(no_dirty_scheds);
+
+ dswt = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(ErtsDirtySchedWallTime)
+ * no_dirty_scheds);
+
+ for (ix = 0; ix < no_dirty_scheds; ix++) {
+ ErtsSchedulerData *esdp;
+ if (want_dcpu && ix < erts_no_dirty_cpu_schedulers)
+ esdp = &erts_aligned_dirty_cpu_scheduler_data[ix].esd;
+ else {
+ int dio_ix = ix - erts_no_dirty_cpu_schedulers;
+ esdp = &erts_aligned_dirty_io_scheduler_data[dio_ix].esd;
+ }
+ read_dirty_sched_wall_time(esdp, &dswt[ix]);
+ }
- if (swtrp->set)
- msg = ref_copy;
- else {
- msg = (!esdp->sched_wall_time.enabled
- ? am_notsup
- : erts_bld_tuple(hpp, szp, 3,
- make_small(esdp->no),
- erts_bld_uint64(hpp, szp, working),
- erts_bld_uint64(hpp, szp, total)));
+ soffset = erts_no_schedulers + 1;
- msg = erts_bld_tuple(hpp, szp, 2, ref_copy, msg);
- }
- if (hpp)
- break;
+ if (!want_dcpu) {
+ ASSERT(want_dio);
+ soffset += erts_no_dirty_cpu_schedulers;
+ }
+
+ while (1) {
+ if (hpp)
+ ref_copy = STORE_NC(hpp, ohp, swtrp->ref);
+ else
+ *szp += REF_THING_SIZE;
+
+ ASSERT(!swtrp->set);
+
+ /* info about dirty schedulers... */
+ msg = NIL;
+ for (ix = no_dirty_scheds-1; ix >= 0; ix--) {
+ msg = erts_bld_cons(hpp, szp,
+ erts_bld_tuple(hpp, szp, 3,
+ make_small(ix+soffset),
+ erts_bld_uint64(hpp, szp,
+ dswt[ix].working),
+ erts_bld_uint64(hpp, szp,
+ dswt[ix].total)),
+ msg);
+ }
+ /* info about this scheduler... */
+ msg = erts_bld_cons(hpp, szp,
+ erts_bld_tuple(hpp, szp, 3,
+ make_small(esdp->no),
+ erts_bld_uint64(hpp, szp, working),
+ erts_bld_uint64(hpp, szp, total)),
+ msg);
+
+ msg = erts_bld_tuple(hpp, szp, 2, ref_copy, msg);
+
+ if (hpp)
+ break;
+
+ mp = erts_alloc_message_heap(rp, &rp_locks, sz, &hp, &ohp);
+ szp = NULL;
+ hpp = &hp;
+ }
- mp = erts_alloc_message_heap(rp, &rp_locks, sz, &hp, &ohp);
- szp = NULL;
- hpp = &hp;
+ erts_free(ERTS_ALC_T_TMP, dswt);
+ }
+ else
+#endif
+ {
+ /* Reply with info about this scheduler only... */
+
+ while (1) {
+ if (hpp)
+ ref_copy = STORE_NC(hpp, ohp, swtrp->ref);
+ else
+ *szp += REF_THING_SIZE;
+
+ if (swtrp->set)
+ msg = ref_copy;
+ else {
+ msg = (!esdp->sched_wall_time.enabled
+ ? am_undefined
+ : erts_bld_tuple(hpp, szp, 3,
+ make_small(esdp->no),
+ erts_bld_uint64(hpp, szp, working),
+ erts_bld_uint64(hpp, szp, total)));
+
+ msg = erts_bld_tuple(hpp, szp, 2, ref_copy, msg);
+ }
+ if (hpp)
+ break;
+
+ mp = erts_alloc_message_heap(rp, &rp_locks, sz, &hp, &ohp);
+ szp = NULL;
+ hpp = &hp;
+ }
}
erts_queue_message(rp, rp_locks, mp, msg, am_system);
@@ -1159,7 +1378,8 @@ reply_sched_wall_time(void *vswtrp)
}
Eterm
-erts_sched_wall_time_request(Process *c_p, int set, int enable)
+erts_sched_wall_time_request(Process *c_p, int set, int enable,
+ int want_dirty_cpu, int want_dirty_io)
{
ErtsSchedulerData *esdp = erts_proc_sched_data(c_p);
Eterm ref;
@@ -1181,6 +1401,10 @@ erts_sched_wall_time_request(Process *c_p, int set, int enable)
swtrp->proc = c_p;
swtrp->ref = STORE_NC(&hp, NULL, ref);
swtrp->req_sched = esdp->no;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ swtrp->want_dirty_cpu = want_dirty_cpu;
+ swtrp->want_dirty_io = want_dirty_io;
+#endif
erts_smp_atomic32_init_nob(&swtrp->refc,
(erts_aint32_t) erts_no_schedulers);
@@ -3106,8 +3330,10 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
tse_wait:
- if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && thr_prgr_active != working)
- sched_wall_time_change(esdp, thr_prgr_active);
+ if (ERTS_SCHEDULER_IS_DIRTY(esdp))
+ dirty_sched_wall_time_change(esdp, working = 0);
+ else if (thr_prgr_active != working)
+ sched_wall_time_change(esdp, working = thr_prgr_active);
while (1) {
ErtsMonotonicTime current_time = 0;
@@ -3218,10 +3444,12 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
(ERTS_SSI_FLG_SUSPENDED
| ERTS_SSI_FLG_MSB_EXEC));
- if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) {
+ if (ERTS_SCHEDULER_IS_DIRTY(esdp))
+ dirty_sched_wall_time_change(esdp, working = 1);
+ else if (!thr_prgr_active) {
erts_thr_progress_active(esdp, thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
- }
+ }
erts_smp_runq_lock(rq);
sched_active(esdp->no, rq);
@@ -5712,7 +5940,8 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num,
ErtsSchedulerSleepInfo* ssi,
ErtsRunQueue* runq,
char** daww_ptr, size_t daww_sz,
- Process *shadow_proc)
+ Process *shadow_proc,
+ Uint64 time_stamp)
{
esdp->timer_wheel = NULL;
#ifdef ERTS_SMP
@@ -5794,7 +6023,7 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num,
esdp->reductions = 0;
- init_sched_wall_time(&esdp->sched_wall_time);
+ init_sched_wall_time(esdp, time_stamp);
erts_port_task_handle_init(&esdp->nosuspend_port_task_handle);
}
@@ -6008,17 +6237,18 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
erts_aligned_scheduler_data =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA,
- n*sizeof(ErtsAlignedSchedulerData));
+ n*sizeof(ErtsAlignedSchedulerData));
for (ix = 0; ix < n; ix++) {
ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix);
init_scheduler_data(esdp, ix+1, ERTS_SCHED_SLEEP_INFO_IX(ix),
ERTS_RUNQ_IX(ix), &daww_ptr, daww_sz,
- NULL);
+ NULL, 0);
}
#ifdef ERTS_DIRTY_SCHEDULERS
{
+ Uint64 ts = sched_wall_time_ts();
int dirty_scheds = no_dirty_cpu_schedulers + no_dirty_io_schedulers;
int adspix = 0;
ErtsAlignedDirtyShadowProcess *adsp =
@@ -6038,13 +6268,13 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix);
init_scheduler_data(esdp, ix+1, ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix),
ERTS_DIRTY_CPU_RUNQ, NULL, 0,
- &adsp[adspix++].dsp);
+ &adsp[adspix++].dsp, ts);
}
for (ix = 0; ix < no_dirty_io_schedulers; ix++) {
ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix);
init_scheduler_data(esdp, ix+1, ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix),
ERTS_DIRTY_IO_RUNQ, NULL, 0,
- &adsp[adspix++].dsp);
+ &adsp[adspix++].dsp, ts);
}
}
#endif
@@ -7506,8 +7736,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
#endif
- if (sched_type != ERTS_SCHED_NORMAL)
+ if (sched_type != ERTS_SCHED_NORMAL) {
erts_smp_runq_unlock(esdp->run_queue);
+ dirty_sched_wall_time_change(esdp, 0);
+ }
else {
if (no != 1)
evacuate_run_queue(esdp->run_queue, &sbp);
@@ -7518,8 +7750,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_inactive);
-
}
+
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED);
@@ -7797,15 +8029,17 @@ suspend_scheduler(ErtsSchedulerData *esdp)
ASSERT(curr_online);
- if (sched_type == ERTS_SCHED_NORMAL) {
- (void) erts_get_monotonic_time(esdp);
- if (erts_system_profile_flags.scheduler)
- profile_scheduler(make_small(esdp->no), am_active);
+ if (sched_type != ERTS_SCHED_NORMAL)
+ dirty_sched_wall_time_change(esdp, 1);
+ else {
+ (void) erts_get_monotonic_time(esdp);
+ if (erts_system_profile_flags.scheduler)
+ profile_scheduler(make_small(esdp->no), am_active);
- if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
- sched_wall_time_change(esdp, 1);
- }
+ if (!thr_prgr_active) {
+ erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ sched_wall_time_change(esdp, 1);
+ }
}
erts_smp_runq_lock(esdp->run_queue);
@@ -8518,6 +8752,8 @@ sched_dirty_cpu_thread_func(void *vesdp)
callbacks.wait = NULL;
callbacks.finalize_wait = NULL;
+ dirty_sched_wall_time_change(esdp, 1);
+
esdp->thr_id += erts_no_schedulers;
erts_msacc_init_thread("dirty_cpu_scheduler", no, 0);
@@ -8565,6 +8801,8 @@ sched_dirty_io_thread_func(void *vesdp)
callbacks.wait = NULL;
callbacks.finalize_wait = NULL;
+ dirty_sched_wall_time_change(esdp, 1);
+
esdp->thr_id += erts_no_schedulers + erts_no_dirty_cpu_schedulers;
erts_msacc_init_thread("dirty_io_scheduler", no, 0);
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index fc2055332b..ce0989883c 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -550,13 +550,15 @@ do { \
} while (0)
typedef struct {
- int need; /* "+sbu true" or scheduler_wall_time enabled */
+ union {
+ erts_atomic32_t mod; /* on dirty schedulers */
+ int need; /* "+sbu true" or scheduler_wall_time enabled */
+ } u;
int enabled;
Uint64 start;
struct {
Uint64 total;
Uint64 start;
- int currently;
} working;
} ErtsSchedWallTime;
@@ -1598,7 +1600,8 @@ void erts_init_scheduling(int, int
void erts_execute_dirty_system_task(Process *c_p);
#endif
int erts_set_gc_state(Process *c_p, int enable);
-Eterm erts_sched_wall_time_request(Process *c_p, int set, int enable);
+Eterm erts_sched_wall_time_request(Process *c_p, int set, int enable,
+ int dirty_cpu, int want_dirty_io);
Eterm erts_system_check_request(Process *c_p);
Eterm erts_gc_info_request(Process *c_p);
Uint64 erts_get_proc_interval(void);
diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl
index a1f12ba93c..f51244485b 100644
--- a/erts/emulator/test/statistics_SUITE.erl
+++ b/erts/emulator/test/statistics_SUITE.erl
@@ -28,6 +28,8 @@
runtime_update/1, runtime_diff/1,
run_queue_one/1,
scheduler_wall_time/1,
+ scheduler_wall_time_all/1,
+ msb_scheduler_wall_time/1,
reductions/1, reductions_big/1, garbage_collection/1, io/1,
badarg/1, run_queues_lengths_active_tasks/1, msacc/1]).
@@ -43,7 +45,9 @@ suite() ->
all() ->
[{group, wall_clock}, {group, runtime}, reductions,
- reductions_big, {group, run_queue}, scheduler_wall_time,
+ reductions_big, {group, run_queue},
+ scheduler_wall_time, scheduler_wall_time_all,
+ msb_scheduler_wall_time,
garbage_collection, io, badarg,
run_queues_lengths_active_tasks,
msacc].
@@ -271,35 +275,64 @@ hog_iter(0, Mon) ->
%% Tests that statistics(scheduler_wall_time) works as intended
scheduler_wall_time(Config) when is_list(Config) ->
+ scheduler_wall_time_test(scheduler_wall_time).
+
+%% Tests that statistics(scheduler_wall_time_all) works as intended
+scheduler_wall_time_all(Config) when is_list(Config) ->
+ scheduler_wall_time_test(scheduler_wall_time_all).
+
+scheduler_wall_time_test(Type) ->
%% Should return undefined if system_flag is not turned on yet
- undefined = statistics(scheduler_wall_time),
+ undefined = statistics(Type),
%% Turn on statistics
false = erlang:system_flag(scheduler_wall_time, true),
try
Schedulers = erlang:system_info(schedulers_online),
+ DirtyCPUSchedulers = erlang:system_info(dirty_cpu_schedulers_online),
+ DirtyIOSchedulers = erlang:system_info(dirty_io_schedulers),
+ TotLoadSchedulers = case Type of
+ scheduler_wall_time_all ->
+ Schedulers + DirtyCPUSchedulers + DirtyIOSchedulers;
+ scheduler_wall_time ->
+ Schedulers + DirtyCPUSchedulers
+ end,
+
%% Let testserver and everyone else finish their work
timer:sleep(1500),
%% Empty load
- EmptyLoad = get_load(),
+ EmptyLoad = get_load(Type),
{false, _} = {lists:any(fun(Load) -> Load > 50 end, EmptyLoad),EmptyLoad},
MeMySelfAndI = self(),
StartHog = fun() ->
- Pid = spawn(?MODULE, hog, [self()]),
+ Pid = spawn_link(?MODULE, hog, [self()]),
receive hog_started -> MeMySelfAndI ! go end,
Pid
end,
+ StartDirtyHog = fun(Func) ->
+ F = fun () ->
+ erts_debug:Func(alive_waitexiting,
+ MeMySelfAndI)
+ end,
+ Pid = spawn_link(F),
+ receive {alive, Pid} -> ok end,
+ Pid
+ end,
P1 = StartHog(),
%% Max on one, the other schedulers empty (hopefully)
%% Be generous the process can jump between schedulers
%% which is ok and we don't want the test to fail for wrong reasons
- _L1 = [S1Load|EmptyScheds1] = get_load(),
+ _L1 = [S1Load|EmptyScheds1] = get_load(Type),
{true,_} = {S1Load > 50,S1Load},
{false,_} = {lists:any(fun(Load) -> Load > 50 end, EmptyScheds1),EmptyScheds1},
{true,_} = {lists:sum(EmptyScheds1) < 60,EmptyScheds1},
%% 50% load
HalfHogs = [StartHog() || _ <- lists:seq(1, (Schedulers-1) div 2)],
- HalfLoad = lists:sum(get_load()) div Schedulers,
+ HalfDirtyCPUHogs = [StartDirtyHog(dirty_cpu)
+ || _ <- lists:seq(1, DirtyCPUSchedulers div 2)],
+ HalfDirtyIOHogs = [StartDirtyHog(dirty_io)
+ || _ <- lists:seq(1, DirtyIOSchedulers div 2)],
+ HalfLoad = lists:sum(get_load(Type)) div TotLoadSchedulers,
if Schedulers < 2, HalfLoad > 80 -> ok; %% Ok only one scheduler online and one hog
%% We want roughly 50% load
HalfLoad > 40, HalfLoad < 60 -> ok;
@@ -308,23 +341,30 @@ scheduler_wall_time(Config) when is_list(Config) ->
%% 100% load
LastHogs = [StartHog() || _ <- lists:seq(1, Schedulers div 2)],
- FullScheds = get_load(),
+ LastDirtyCPUHogs = [StartDirtyHog(dirty_cpu)
+ || _ <- lists:seq(1, DirtyCPUSchedulers div 2)],
+ LastDirtyIOHogs = [StartDirtyHog(dirty_io)
+ || _ <- lists:seq(1, DirtyIOSchedulers div 2)],
+ FullScheds = get_load(Type),
{false,_} = {lists:any(fun(Load) -> Load < 80 end, FullScheds),FullScheds},
- FullLoad = lists:sum(FullScheds) div Schedulers,
+ FullLoad = lists:sum(FullScheds) div TotLoadSchedulers,
if FullLoad > 90 -> ok;
true -> exit({fullload, FullLoad})
end,
KillHog = fun (HP) ->
HPM = erlang:monitor(process, HP),
+ unlink(HP),
exit(HP, kill),
receive
{'DOWN', HPM, process, HP, killed} ->
ok
end
end,
- [KillHog(Pid) || Pid <- [P1|HalfHogs++LastHogs]],
- AfterLoad = get_load(),
+ [KillHog(Pid) || Pid <- [P1|HalfHogs++HalfDirtyCPUHogs++HalfDirtyIOHogs
+ ++LastHogs++LastDirtyCPUHogs++LastDirtyIOHogs]],
+ receive after 2000 -> ok end, %% Give dirty schedulers time to complete...
+ AfterLoad = get_load(Type),
io:format("AfterLoad=~p~n", [AfterLoad]),
{false,_} = {lists:any(fun(Load) -> Load > 25 end, AfterLoad),AfterLoad},
true = erlang:system_flag(scheduler_wall_time, false)
@@ -332,16 +372,81 @@ scheduler_wall_time(Config) when is_list(Config) ->
erlang:system_flag(scheduler_wall_time, false)
end.
-get_load() ->
- Start = erlang:statistics(scheduler_wall_time),
+get_load(Type) ->
+ Start = erlang:statistics(Type),
timer:sleep(1500),
- End = erlang:statistics(scheduler_wall_time),
+ End = erlang:statistics(Type),
lists:reverse(lists:sort(load_percentage(lists:sort(Start),lists:sort(End)))).
load_percentage([{Id, WN, TN}|Ss], [{Id, WP, TP}|Ps]) ->
[100*(WN-WP) div (TN-TP)|load_percentage(Ss, Ps)];
load_percentage([], []) -> [].
+count(0) ->
+ ok;
+count(N) ->
+ count(N-1).
+
+msb_swt_hog(true) ->
+ count(1000000),
+ erts_debug:dirty_cpu(wait, 10),
+ erts_debug:dirty_io(wait, 10),
+ msb_swt_hog(true);
+msb_swt_hog(false) ->
+ count(1000000),
+ msb_swt_hog(false).
+
+msb_scheduler_wall_time(Config) ->
+ erlang:system_flag(scheduler_wall_time, true),
+ Dirty = erlang:system_info(dirty_cpu_schedulers) /= 0,
+ Hogs = lists:map(fun (_) ->
+ spawn_opt(fun () ->
+ msb_swt_hog(Dirty)
+ end, [{priority,low}, link, monitor])
+ end, lists:seq(1,10)),
+ erlang:system_flag(multi_scheduling, block),
+ try
+ SWT1 = lists:sort(statistics(scheduler_wall_time_all)),
+ %% io:format("SWT1 = ~p~n", [SWT1]),
+ receive after 4000 -> ok end,
+ SWT2 = lists:sort(statistics(scheduler_wall_time_all)),
+ %% io:format("SWT2 = ~p~n", [SWT2]),
+ SWT = lists:zip(SWT1, SWT2),
+ io:format("SU = ~p~n", [lists:map(fun({{I, A0, T0}, {I, A1, T1}}) ->
+ {I, (A1 - A0)/(T1 - T0)} end,
+ SWT)]),
+ {A, T} = lists:foldl(fun({{_, A0, T0}, {_, A1, T1}}, {Ai,Ti}) ->
+ {Ai + (A1 - A0), Ti + (T1 - T0)}
+ end,
+ {0, 0},
+ SWT),
+ TSU = A/T,
+ WSU = ((TSU * (erlang:system_info(schedulers)
+ + erlang:system_info(dirty_cpu_schedulers)
+ + erlang:system_info(dirty_io_schedulers)))
+ / 1),
+ %% Weighted scheduler utilization should be
+ %% very close to 1.0, i.e., we execute the
+ %% same time as one thread executing all
+ %% the time...
+ io:format("WSU = ~p~n", [WSU]),
+ true = 0.9 < WSU andalso WSU < 1.1,
+ ok
+ after
+ erlang:system_flag(multi_scheduling, unblock),
+ erlang:system_flag(scheduler_wall_time, false),
+ lists:foreach(fun ({HP, _HM}) ->
+ unlink(HP),
+ exit(HP, kill)
+ end, Hogs),
+ lists:foreach(fun ({HP, HM}) ->
+ receive
+ {'DOWN', HM, process, HP, _} ->
+ ok
+ end
+ end, Hogs),
+ ok
+ end.
%% Tests that statistics(garbage_collection) is callable.
%% It is not clear how to test anything more.
diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam
index 359d28c0aa..0799546c60 100644
--- a/erts/preloaded/ebin/erlang.beam
+++ b/erts/preloaded/ebin/erlang.beam
Binary files differ
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index 99ef5c6314..86ebb4dd4b 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -2333,6 +2333,10 @@ spawn_opt(_Tuple) ->
SchedulerId :: pos_integer(),
ActiveTime :: non_neg_integer(),
TotalTime :: non_neg_integer();
+ (scheduler_wall_time_all) -> [{SchedulerId, ActiveTime, TotalTime}] | undefined when
+ SchedulerId :: pos_integer(),
+ ActiveTime :: non_neg_integer(),
+ TotalTime :: non_neg_integer();
(total_active_tasks) -> ActiveTasks when
ActiveTasks :: non_neg_integer();
(total_run_queue_lengths) -> TotalRunQueueLenghts when
@@ -4014,6 +4018,7 @@ sched_wall_time(Ref, N, undefined) ->
sched_wall_time(Ref, N, Acc) ->
receive
{Ref, undefined} -> sched_wall_time(Ref, N-1, undefined);
+ {Ref, SWTL} when erlang:is_list(SWTL) -> sched_wall_time(Ref, N-1, Acc ++ SWTL);
{Ref, SWT} -> sched_wall_time(Ref, N-1, [SWT|Acc])
end.