aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
Diffstat (limited to 'erts')
-rw-r--r--erts/configure.in15
-rw-r--r--erts/doc/src/erlang.xml188
-rw-r--r--erts/emulator/Makefile.in3
-rw-r--r--erts/emulator/beam/atom.names4
-rw-r--r--erts/emulator/beam/beam_bp.c2
-rw-r--r--erts/emulator/beam/beam_emu.c75
-rw-r--r--erts/emulator/beam/bif.c48
-rw-r--r--erts/emulator/beam/bif.tab2
-rw-r--r--erts/emulator/beam/erl_alloc.c4
-rw-r--r--erts/emulator/beam/erl_alloc.h18
-rw-r--r--erts/emulator/beam/erl_alloc.types1
-rw-r--r--erts/emulator/beam/erl_async.c34
-rw-r--r--erts/emulator/beam/erl_bif_info.c12
-rw-r--r--erts/emulator/beam/erl_gc.c7
-rw-r--r--erts/emulator/beam/erl_init.c3
-rw-r--r--erts/emulator/beam/erl_lock_check.c2
-rw-r--r--erts/emulator/beam/erl_msacc.c486
-rw-r--r--erts/emulator/beam/erl_msacc.h409
-rw-r--r--erts/emulator/beam/erl_port_task.c3
-rw-r--r--erts/emulator/beam/erl_process.c72
-rw-r--r--erts/emulator/beam/erl_threads.h34
-rw-r--r--erts/emulator/beam/erl_time.h2
-rw-r--r--erts/emulator/beam/erl_time_sup.c14
-rw-r--r--erts/emulator/beam/global.h2
-rw-r--r--erts/emulator/beam/io.c32
-rw-r--r--erts/emulator/beam/ops.tab9
-rw-r--r--erts/emulator/beam/sys.h11
-rw-r--r--erts/emulator/beam/time.c20
-rw-r--r--erts/emulator/sys/common/erl_poll.c36
-rw-r--r--erts/emulator/sys/unix/erl_unix_sys.h27
-rw-r--r--erts/emulator/sys/unix/sys_time.c114
-rw-r--r--erts/emulator/sys/win32/erl_poll.c4
-rw-r--r--erts/emulator/sys/win32/erl_win_sys.h14
-rw-r--r--erts/emulator/sys/win32/sys_time.c1
-rw-r--r--erts/emulator/test/statistics_SUITE.erl116
-rw-r--r--erts/include/internal/ethr_internal.h1
-rw-r--r--erts/include/internal/ethread.h47
-rw-r--r--erts/lib_src/common/ethr_aux.c32
-rw-r--r--erts/preloaded/ebin/erl_prim_loader.beambin49956 -> 49984 bytes
-rw-r--r--erts/preloaded/ebin/erlang.beambin101404 -> 101988 bytes
-rw-r--r--erts/preloaded/ebin/erts_code_purger.beambin8744 -> 8764 bytes
-rw-r--r--erts/preloaded/ebin/erts_internal.beambin8512 -> 9412 bytes
-rw-r--r--erts/preloaded/ebin/init.beambin44700 -> 44724 bytes
-rw-r--r--erts/preloaded/ebin/otp_ring0.beambin1448 -> 1468 bytes
-rw-r--r--erts/preloaded/ebin/prim_eval.beambin1320 -> 1340 bytes
-rw-r--r--erts/preloaded/ebin/prim_file.beambin44884 -> 44908 bytes
-rw-r--r--erts/preloaded/ebin/prim_inet.beambin72608 -> 72632 bytes
-rw-r--r--erts/preloaded/ebin/prim_zip.beambin23280 -> 23308 bytes
-rw-r--r--erts/preloaded/ebin/zlib.beambin14156 -> 14176 bytes
-rw-r--r--erts/preloaded/src/erlang.erl18
-rw-r--r--erts/preloaded/src/erts_internal.erl35
51 files changed, 1880 insertions, 77 deletions
diff --git a/erts/configure.in b/erts/configure.in
index 2f19c0f760..d84651f4f9 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -342,6 +342,21 @@ AS_HELP_STRING([--disable-saved-compile-time], [disable saved compile time]),
AC_DEFINE_UNQUOTED(ERTS_SAVED_COMPILE_TIME, $save_compile_time, [Save compile time?])
+AC_ARG_WITH(microstate-accounting,
+AS_HELP_STRING([--with-microstate-accounting={yes|extra}],
+ [enable microstate account, possibly with extra detailed states])
+AS_HELP_STRING([--without-microstate-accounting],
+ [don't enable microstate accounting]),
+[],[with_microstate_accounting=yes])
+
+case "$with_microstate_accounting" in
+ yes) AC_DEFINE(ERTS_ENABLE_MSACC,[1],
+ [Define as 1 if you want to enable microstate accounting, 2 if you want extra states]) ;;
+ extra) AC_DEFINE(ERTS_ENABLE_MSACC,[2],
+ [Define as 1 if you want to enable microstate accounting, 2 if you want extra states]) ;;
+ *) ;;
+esac
+
dnl Magic test for clearcase.
OTP_RELEASE=
if test "${ERLANG_COMMERCIAL_BUILD}" != ""; then
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index d3aad94660..803da382ed 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -131,6 +131,17 @@
</note>
</item>
+ <tag><c>perf_counter</c></tag>
+ <item><p>Symbolic representation of the performance counter
+ time unit used by the Erlang runtime system.</p>
+
+ <p>The <c>perf_counter</c> time unit behaves much in the same way
+ as the <c>native</c> time unit. That is it might differ inbetween
+ run-time restarts. You get values of this type by calling
+ <seealso marker="kernel:os#perf_counter/0"><c>os:perf_counter()</c></seealso>
+ </p>
+ </item>
+
</taglist>
<p>The <c>time_unit/0</c> type may be extended. Use
@@ -5859,6 +5870,146 @@ true</pre>
<func>
<name name="statistics" arity="1" clause_i="6"/>
+ <fsummary>Information about microstate accounting.</fsummary>
+ <desc>
+ <marker id="statistics_microstate_accounting"></marker>
+ <p>
+ Microstate accounting can be used to measure how much time the Erlang
+ runtime system spends doing various tasks. It is designed to be as
+ lightweight as possible, but there will be some overhead when this
+ is enabled. Microstate accounting is meant to be a profiling tool
+ to help figure out performance bottlenecks.
+ To <c>start</c>/<c>stop</c>/<c>reset</c> microstate_accounting you use
+ the system_flag
+ <seealso marker="#system_flag_microstate_accounting">
+ <c>microstate_accounting</c></seealso>.
+ </p>
+ <p>
+ <c>erlang:statistics(microstate_accounting)</c> returns a list of maps
+ representing some of the OS threads within ERTS. Each map contains
+ <c>type</c> and <c>id</c> fields that can be used to identify what
+ thread it is, and also a counters field that contains data about how
+ much time has been spent in the various states.</p>
+ <pre>
+> <input>erlang:statistics(microstate_accounting).</input>
+[#{counters => #{aux => 1899182914,
+ check_io => 2605863602,
+ emulator => 45731880463,
+ gc => 1512206910,
+ other => 5421338456,
+ port => 221631,
+ sleep => 5150294100},
+ id => 1,
+ type => scheduler}|...]
+ </pre>
+ <p>The time unit is the same as returned by
+ <seealso marker="kernel:os#perf_counter/0">
+ <c>os:perf_counter/0</c></seealso>.
+ So to convert it to milliseconds you could do something like this:</p>
+ <pre>
+lists:map(
+ fun(#{ counters := Cnt } = M) ->
+ MsCnt = maps:map(fun(_K, PerfCount) ->
+ erlang:convert_time_unit(PerfCount, perf_counter, 1000)
+ end, Cnt),
+ M#{ counters := MsCnt }
+ end, erlang:statistics(microstate_accounting)).
+ </pre>
+ <p>
+ It is important to note that these values are not guaranteed to be
+ the exact time spent in each state. This is because of various
+ optimisation done in order to keep the overhead as small as possible.
+ </p>
+
+ <p>Currently the following <c><anno>MSAcc_Thread_Type</anno></c> are available:</p>
+ <taglist>
+ <tag><c>scheduler</c></tag>
+ <item>The main execution threads that do most of the work.</item>
+ <tag><c>async</c></tag><item>Async threads are used by various
+ linked-in drivers (mainly the file drivers) do offload non-cpu
+ intensive work.</item>
+ <tag><c>aux</c></tag><item>Takes care of any work that is not
+ specifically assigned to a scheduler.</item>
+ </taglist>
+ <p>Currently the following <c><anno>MSAcc_Thread_State</anno></c>s are available.
+ All states are exclusive, meaning that a thread cannot be in two states
+ at once. So if you add the numbers of all counters in a thread
+ you will get the total run-time for that thread.</p>
+ <taglist>
+ <tag><c>aux</c></tag>
+ <item>Time spent handling auxiliary jobs.</item>
+ <tag><c>check_io</c></tag>
+ <item>Time spent checking for new I/O events.</item>
+ <tag><c>emulator</c></tag>
+ <item>Time spent executing erlang processes.</item>
+ <tag><c>gc</c></tag>
+ <item>Time spent doing garbage collection. When extra states are
+ enabled this is the time spent doing non-fullsweep garbage
+ collections.</item>
+ <tag><c>other</c></tag>
+ <item>Time spent doing unaccounted things.</item>
+ <tag><c>port</c></tag>
+ <item>Time spent executing ports.</item>
+ <tag><c>sleep</c></tag>
+ <item>Time spent sleeping.</item>
+ </taglist>
+ <p>It is possible to add more fine grained <c><anno>MSAcc_Thread_State</anno></c>s
+ through configure.
+ (e.g. <c>./configure --with-microstate-accounting=extra</c>).
+ Enabling these states will cause a performance degradation when
+ microstate accounting is turned off and increase the overhead when
+ it is turned on.</p>
+ <taglist>
+ <tag><c>alloc</c></tag>
+ <item>Time spent managing memory. Without extra states this time is
+ spread out over all other states.</item>
+ <tag><c>bif</c></tag>
+ <item>Time spent in bifs. Without extra states this time is part of
+ the <c>emulator</c> state.</item>
+ <tag><c>busy_wait</c></tag>
+ <item>Time spent busy waiting. This is also the state where a
+ scheduler no longer reports that it is active when using
+ <seealso marker="#statistics_scheduler_wall_time">
+ <c>erlang:statistics(scheduler_wall_time)</c></seealso>.
+ So if you add all other states but this and sleep and then divide that
+ by all time in the thread you should get something very similar to the
+ scheduler_wall_time fraction. Without extra states this time is part
+ of the <c>other</c> state.</item>
+ <tag><c>ets</c></tag>
+ <item>Time spent executing ETS bifs. Without extra states this time is
+ part of the <c>emulator</c> state.</item>
+ <tag><c>gc_full</c></tag>
+ <item>Time spent doing fullsweep garbage collection. Without extra
+ states this time is part of the <c>gc</c> state.</item>
+ <tag><c>nif</c></tag>
+ <item>Time spent in nifs. Without extra states this time is part of
+ the <c>emulator</c> state.</item>
+ <tag><c>send</c></tag>
+ <item>Time spent sending messages (processes only). Without extra
+ states this time is part of the <c>emulator</c> state.</item>
+ <tag><c>timers</c></tag>
+ <item>Time spent managing timers. Without extra states this time is
+ part of the <c>other</c> state.</item>
+ </taglist>
+ <p>There is a utility module called
+ <seealso marker="runtime_tools:msacc"><c>msacc</c></seealso> in
+ runtime_tools that can be used to more easily analyse these
+ statistics.</p>
+
+ <p>
+ Returns <c>undefined</c> if the system flag
+ <seealso marker="#system_flag_microstate_accounting">
+ <c>microstate_accounting</c></seealso>
+ is turned off.
+ </p>
+ <p>The list of thread information is unsorted and may appear in
+ different order between calls.</p>
+ <note><p>The threads and states are subject to change without any
+ prior notice.</p></note>
+ </desc>
+ </func>
+ <func>
+ <name name="statistics" arity="1" clause_i="7"/>
<fsummary>Information about reductions.</fsummary>
<desc>
<marker id="statistics_reductions"></marker>
@@ -5876,7 +6027,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="7"/>
+ <name name="statistics" arity="1" clause_i="8"/>
<fsummary>Information about the run-queues.</fsummary>
<desc><marker id="statistics_run_queue"></marker>
<p>
@@ -5892,7 +6043,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="8"/>
+ <name name="statistics" arity="1" clause_i="9"/>
<fsummary>Information about the run-queue lengths.</fsummary>
<desc><marker id="statistics_run_queue_lengths"></marker>
<p>
@@ -5912,7 +6063,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="9"/>
+ <name name="statistics" arity="1" clause_i="10"/>
<fsummary>Information about runtime.</fsummary>
<desc>
<p>Returns information about runtime, in milliseconds.</p>
@@ -5927,7 +6078,7 @@ true</pre>
</func>
<func>
- <name name="statistics" arity="1" clause_i="10"/>
+ <name name="statistics" arity="1" clause_i="11"/>
<fsummary>Information about each schedulers work time.</fsummary>
<desc>
<marker id="statistics_scheduler_wall_time"></marker>
@@ -5998,7 +6149,7 @@ ok
</func>
<func>
- <name name="statistics" arity="1" clause_i="11"/>
+ <name name="statistics" arity="1" clause_i="12"/>
<fsummary>Information about active processes and ports.</fsummary>
<desc><marker id="statistics_total_active_tasks"></marker>
<p>
@@ -6016,7 +6167,7 @@ ok
</func>
<func>
- <name name="statistics" arity="1" clause_i="12"/>
+ <name name="statistics" arity="1" clause_i="13"/>
<fsummary>Information about the run-queue lengths.</fsummary>
<desc><marker id="statistics_total_run_queue_lengths"></marker>
<p>
@@ -6035,7 +6186,7 @@ ok
</func>
<func>
- <name name="statistics" arity="1" clause_i="13"/>
+ <name name="statistics" arity="1" clause_i="14"/>
<fsummary>Information about wall clock.</fsummary>
<desc>
<p>Returns information about wall clock. <c>wall_clock</c> can
@@ -6269,6 +6420,17 @@ ok
<func>
<name name="system_flag" arity="2" clause_i="5"/>
+ <fsummary>Set system flag microstate_accounting</fsummary>
+ <desc><p><marker id="system_flag_microstate_accounting"></marker>
+ Turns on/off microstate accounting measurements. By passing reset it is possible to reset
+ all counters to 0.</p>
+ <p>For more information see,
+ <seealso marker="#statistics_microstate_accounting">erlang:statistics(microstate_accounting)</seealso>.
+ </p>
+ </desc>
+ </func>
+ <func>
+ <name name="system_flag" arity="2" clause_i="6"/>
<fsummary>Sets system flag <c>min_heap_size</c>.</fsummary>
<desc>
<p>Sets the default minimum heap size for processes. The size
@@ -6283,7 +6445,7 @@ ok
</func>
<func>
- <name name="system_flag" arity="2" clause_i="6"/>
+ <name name="system_flag" arity="2" clause_i="7"/>
<fsummary>Sets system flag <c>min_bin_vheap_size</c>.</fsummary>
<desc>
<p>Sets the default minimum binary virtual heap size for
@@ -6300,7 +6462,7 @@ ok
</func>
<func>
- <name name="system_flag" arity="2" clause_i="7"/>
+ <name name="system_flag" arity="2" clause_i="8"/>
<fsummary>Sets system flag <c>multi_scheduling</c>.</fsummary>
<desc>
<p><marker id="system_flag_multi_scheduling"></marker>
@@ -6338,7 +6500,7 @@ ok
</func>
<func>
- <name name="system_flag" arity="2" clause_i="8"/>
+ <name name="system_flag" arity="2" clause_i="9"/>
<fsummary>Sets system flag <c>scheduler_bind_type</c>.</fsummary>
<type name="scheduler_bind_type"/>
<desc>
@@ -6456,7 +6618,7 @@ ok
</func>
<func>
- <name name="system_flag" arity="2" clause_i="9"/>
+ <name name="system_flag" arity="2" clause_i="10"/>
<fsummary>Sets system flag <c>scheduler_wall_time</c>.</fsummary>
<desc><p><marker id="system_flag_scheduler_wall_time"></marker>
Turns on or off scheduler wall time measurements.</p>
@@ -6466,7 +6628,7 @@ ok
</func>
<func>
- <name name="system_flag" arity="2" clause_i="10"/>
+ <name name="system_flag" arity="2" clause_i="11"/>
<fsummary>Sets system flag <c>schedulers_online</c>.</fsummary>
<desc>
<p><marker id="system_flag_schedulers_online"></marker>
@@ -6491,7 +6653,7 @@ ok
</func>
<func>
- <name name="system_flag" arity="2" clause_i="11"/>
+ <name name="system_flag" arity="2" clause_i="12"/>
<fsummary>Sets system flag <c>trace_control_word</c>.</fsummary>
<desc>
<p>Sets the value of the node trace control word to
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index f4b806fae9..12148ad9c7 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -776,7 +776,8 @@ RUN_OBJS = \
$(OBJDIR)/erl_zlib.o $(OBJDIR)/erl_nif.o \
$(OBJDIR)/erl_bif_binary.o $(OBJDIR)/erl_ao_firstfit_alloc.o \
$(OBJDIR)/erl_thr_queue.o $(OBJDIR)/erl_sched_spec_pre_alloc.o \
- $(OBJDIR)/erl_ptab.o $(OBJDIR)/erl_map.o
+ $(OBJDIR)/erl_ptab.o $(OBJDIR)/erl_map.o \
+ $(OBJDIR)/erl_msacc.o
ifeq ($(TARGET),win32)
DRV_OBJS = \
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 1629a92aa9..6fb08ee896 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -102,6 +102,7 @@ atom asynchronous
atom atom
atom atom_used
atom attributes
+atom await_microstate_accounting_modifications
atom await_port_send_result
atom await_proc_exit
atom await_result
@@ -172,6 +173,7 @@ atom const
atom context_switches
atom control
atom copy
+atom counters
atom cpu
atom cpu_timestamp
atom cr
@@ -269,6 +271,7 @@ atom get_tcw
atom getenv
atom gather_gc_info_result
atom gather_io_bytes
+atom gather_microstate_accounting_result
atom gather_sched_wall_time_result
atom getting_linked
atom getting_unlinked
@@ -360,6 +363,7 @@ atom merge_trap
atom meta
atom meta_match_spec
atom micro_seconds
+atom microstate_accounting
atom milli_seconds
atom min_heap_size
atom min_bin_vheap_size
diff --git a/erts/emulator/beam/beam_bp.c b/erts/emulator/beam/beam_bp.c
index 2a23da4f25..5d471d168b 100644
--- a/erts/emulator/beam/beam_bp.c
+++ b/erts/emulator/beam/beam_bp.c
@@ -864,7 +864,7 @@ do_call_trace(Process* c_p, BeamInstr* I, Eterm* reg,
Eterm* cpp;
int return_to_trace = 0;
BeamInstr w;
- BeamInstr *cp_save;
+ BeamInstr *cp_save = c_p->cp;
Uint32 flags;
Uint need = 0;
Eterm* E = c_p->stop;
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index b807102aa0..d4fb36acc3 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -113,6 +113,9 @@ do { \
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
+#define GET_BIF_MODULE(p) ((Eterm) (((Export *) p)->code[0]))
+#define GET_BIF_FUNCTION(p) ((Eterm) (((Export *) p)->code[1]))
+#define GET_BIF_ARITY(p) ((Eterm) (((Export *) p)->code[2]))
#define GET_BIF_ADDRESS(p) ((BifFunction) (((Export *) p)->code[4]))
#define TermWords(t) (((t) / (sizeof(BeamInstr)/sizeof(Eterm))) + !!((t) % (sizeof(BeamInstr)/sizeof(Eterm))))
@@ -1249,6 +1252,8 @@ void process_main(void)
Uint64 start_time = 0; /* Monitor long schedule */
BeamInstr* start_time_i = NULL;
+ ERTS_MSACC_DECLARE_CACHE_X() /* a cached value of the tsd pointer for msacc */
+
ERL_BITS_DECLARE_STATEP; /* Has to be last declaration */
@@ -1302,6 +1307,8 @@ void process_main(void)
ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p);
PROCESS_MAIN_CHK_LOCKS(c_p);
+ ERTS_MSACC_UPDATE_CACHE_X();
+
if (erts_system_monitor_long_schedule != 0) {
start_time = erts_timestamp_millis();
start_time_i = c_p->i;
@@ -2741,11 +2748,21 @@ do { \
*/
OpCase(call_bif_e):
{
- Eterm (*bf)(Process*, Eterm*, BeamInstr*) = GET_BIF_ADDRESS(Arg(0));
+ Eterm (*bf)(Process*, Eterm*, BeamInstr*);
Eterm result;
BeamInstr *next;
ErlHeapFragment *live_hf_end;
+ if (ERTS_MSACC_IS_ENABLED_CACHED_X()) {
+ if (GET_BIF_MODULE(Arg(0)) == am_ets) {
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_ETS);
+ } else {
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_BIF);
+ }
+ }
+
+ bf = GET_BIF_ADDRESS(Arg(0));
+
PRE_BIF_SWAPOUT(c_p);
c_p->fcalls = FCALLS - 1;
if (FCALLS <= 0) {
@@ -2768,6 +2785,12 @@ do { \
PROCESS_MAIN_CHK_LOCKS(c_p);
HTOP = HEAP_TOP(c_p);
FCALLS = c_p->fcalls;
+ /* We have to update the cache if we are enabled in order
+ to make sure no book keeping is done after we disabled
+ msacc. We don't always do this as it is quite expensive. */
+ if (ERTS_MSACC_IS_ENABLED_CACHED_X())
+ ERTS_MSACC_UPDATE_CACHE_X();
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_EMULATOR);
if (is_value(result)) {
r(0) = result;
CHECK_TERM(r(0));
@@ -3452,6 +3475,8 @@ do { \
BifFunction vbf;
ErlHeapFragment *live_hf_end;
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_NIF);
+
DTRACE_NIF_ENTRY(c_p, (Eterm)I[-3], (Eterm)I[-2], (Uint)I[-1]);
c_p->current = I-3; /* current and vbf set to please handle_error */
SWAPOUT;
@@ -3476,6 +3501,8 @@ do { \
PROCESS_MAIN_CHK_LOCKS(c_p);
ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_EMULATOR);
+
DTRACE_NIF_RETURN(c_p, (Eterm)I[-3], (Eterm)I[-2], (Uint)I[-1]);
goto apply_bif_or_nif_epilogue;
@@ -3490,6 +3517,13 @@ do { \
* code[3]: &&apply_bif
* code[4]: Function pointer to BIF function
*/
+ if (ERTS_MSACC_IS_ENABLED_CACHED_X()) {
+ if ((Eterm)I[-3] == am_ets) {
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_ETS);
+ } else {
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_BIF);
+ }
+ }
c_p->current = I-3; /* In case we apply process_info/1,2 or load_nif/1 */
c_p->i = I; /* In case we apply check_process_code/2. */
@@ -3516,7 +3550,12 @@ do { \
ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
PROCESS_MAIN_CHK_LOCKS(c_p);
}
-
+ /* We have to update the cache if we are enabled in order
+ to make sure no book keeping is done after we disabled
+ msacc. We don't always do this as it is quite expensive. */
+ if (ERTS_MSACC_IS_ENABLED_CACHED_X())
+ ERTS_MSACC_UPDATE_CACHE_X();
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_EMULATOR);
DTRACE_BIF_RETURN(c_p, (Eterm)I[-3], (Eterm)I[-2], (Uint)I[-1]);
apply_bif_or_nif_epilogue:
@@ -4895,6 +4934,38 @@ do { \
}
}
+ /* This is optimised as an instruction because
+ it has to be very very fast */
+ OpCase(i_perf_counter): {
+ BeamInstr* next;
+ ErtsSysPerfCounter ts;
+ PreFetch(0, next);
+
+ ts = erts_sys_perf_counter();
+
+ if (IS_SSMALL(ts)) {
+ r(0) = make_small((Sint)ts);
+ } else {
+ TestHeap(ERTS_SINT64_HEAP_SIZE(ts),0);
+ r(0) = make_big(HTOP);
+#if defined(ARCH_32) || HALFWORD_HEAP
+ if (ts >= (((Uint64) 1) << 32)) {
+ *HTOP = make_pos_bignum_header(2);
+ BIG_DIGIT(HTOP, 0) = (Uint) (ts & ((Uint) 0xffffffff));
+ BIG_DIGIT(HTOP, 1) = (Uint) ((ts >> 32) & ((Uint) 0xffffffff));
+ HTOP += 3;
+ }
+ else
+#endif
+ {
+ *HTOP = make_pos_bignum_header(1);
+ BIG_DIGIT(HTOP, 0) = (Uint) ts;
+ HTOP += 2;
+ }
+ }
+ NextPF(0, next);
+ }
+
OpCase(i_debug_breakpoint): {
HEAVY_SWAPOUT;
I = call_error_handler(c_p, I-3, reg, am_breakpoint);
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index bb9165cd79..7e8eb44680 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -44,6 +44,7 @@
#include "erl_ptab.h"
#include "erl_bits.h"
#include "erl_bif_unique.h"
+#include "erl_msacc.h"
Export *erts_await_result;
static Export* flush_monitor_messages_trap = NULL;
@@ -54,6 +55,9 @@ Export* erts_format_cpu_topology_trap = NULL;
static Export dsend_continue_trap_export;
Export *erts_convert_time_unit_trap = NULL;
+static Export *await_msacc_mod_trap = NULL;
+static erts_smp_atomic32_t msacc;
+
static Export *await_sched_wall_time_mod_trap;
static erts_smp_atomic32_t sched_wall_time;
@@ -2143,7 +2147,11 @@ BIF_RETTYPE send_3(BIF_ALIST_3)
int connect = !0;
Eterm l = opts;
Sint result;
+
DeclareTypedTmpHeap(ErtsSendContext, ctx, BIF_P);
+
+ ERTS_MSACC_PUSH_STATE_M_X();
+
UseTmpHeap(sizeof(ErtsSendContext)/sizeof(Eterm), BIF_P);
ctx->suspend = !0;
@@ -2172,7 +2180,10 @@ BIF_RETTYPE send_3(BIF_ALIST_3)
ref = NIL;
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_SEND);
result = do_send(p, to, msg, &ref, ctx);
+ ERTS_MSACC_POP_STATE_M_X();
+
if (result > 0) {
ERTS_VBUMP_REDS(p, result);
if (ERTS_IS_PROC_OUT_OF_REDS(p))
@@ -2288,8 +2299,8 @@ Eterm erl_send(Process *p, Eterm to, Eterm msg)
Eterm ref;
Sint result;
DeclareTypedTmpHeap(ErtsSendContext, ctx, p);
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_SEND);
UseTmpHeap(sizeof(ErtsSendContext)/sizeof(Eterm), p);
-
#ifdef DEBUG
ref = NIL;
#endif
@@ -2300,7 +2311,9 @@ Eterm erl_send(Process *p, Eterm to, Eterm msg)
ctx->dss.phase = ERTS_DSIG_SEND_PHASE_INIT;
result = do_send(p, to, msg, &ref, ctx);
-
+
+ ERTS_MSACC_POP_STATE_M_X();
+
if (result > 0) {
ERTS_VBUMP_REDS(p, result);
if (ERTS_IS_PROC_OUT_OF_REDS(p))
@@ -4586,6 +4599,31 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2)
default:
ERTS_INTERNAL_ERROR("Unknown state");
}
+#ifdef ERTS_ENABLE_MSACC
+ } else if (BIF_ARG_1 == am_microstate_accounting) {
+ Eterm threads;
+ if (BIF_ARG_2 == am_true || BIF_ARG_2 == am_false) {
+ erts_aint32_t new = BIF_ARG_2 == am_true ? ERTS_MSACC_ENABLE : ERTS_MSACC_DISABLE;
+ erts_aint32_t old = erts_smp_atomic32_xchg_nob(&msacc, new);
+ Eterm ref = erts_msacc_request(BIF_P, new, &threads);
+ if (is_non_value(ref))
+ BIF_RET(old ? am_true : am_false);
+ BIF_TRAP3(await_msacc_mod_trap,
+ BIF_P,
+ ref,
+ old ? am_true : am_false,
+ threads);
+ } else if (BIF_ARG_2 == am_reset) {
+ Eterm ref = erts_msacc_request(BIF_P, ERTS_MSACC_RESET, &threads);
+ erts_aint32_t old = erts_smp_atomic32_read_nob(&msacc);
+ ASSERT(is_value(ref));
+ BIF_TRAP3(await_msacc_mod_trap,
+ BIF_P,
+ ref,
+ old ? am_true : am_false,
+ threads);
+ }
+#endif
} else if (ERTS_IS_ATOM_STR("scheduling_statistics", BIF_ARG_1)) {
int what;
if (ERTS_IS_ATOM_STR("disable", BIF_ARG_2))
@@ -4915,8 +4953,12 @@ void erts_init_bif(void)
await_port_send_result_trap
= erts_export_put(am_erts_internal, am_await_port_send_result, 3);
await_sched_wall_time_mod_trap
- = erts_export_put(am_erlang, am_await_sched_wall_time_modifications, 2);
+ = erts_export_put(am_erlang, am_await_sched_wall_time_modifications, 2);
+ await_msacc_mod_trap
+ = erts_export_put(am_erts_internal, am_await_microstate_accounting_modifications, 3);
+
erts_smp_atomic32_init_nob(&sched_wall_time, 0);
+ erts_smp_atomic32_init_nob(&msacc, ERTS_MSACC_IS_ENABLED());
}
#ifdef HARDDEBUG
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 1b8ae8cef5..4efc055aaf 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -170,6 +170,7 @@ bif erts_internal:term_type/1
bif erts_internal:map_hashmap_children/1
bif erts_internal:time_unit/0
+bif erts_internal:perf_counter_unit/0
bif erts_internal:is_system_process/1
@@ -369,6 +370,7 @@ bif os:getpid/0
bif os:timestamp/0
bif os:system_time/0
bif os:system_time/1
+bif os:perf_counter/0
#
# Bifs in the erl_ddll module (the module actually does not exist)
diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c
index 5544712e8d..51b522c9e0 100644
--- a/erts/emulator/beam/erl_alloc.c
+++ b/erts/emulator/beam/erl_alloc.c
@@ -103,9 +103,9 @@ static Uint install_debug_functions(void);
static int lock_all_physical_memory = 0;
-ErtsAllocatorFunctions_t erts_allctrs[ERTS_ALC_A_MAX+1];
+ErtsAllocatorFunctions_t ERTS_WRITE_UNLIKELY(erts_allctrs[ERTS_ALC_A_MAX+1]);
ErtsAllocatorInfo_t erts_allctrs_info[ERTS_ALC_A_MAX+1];
-ErtsAllocatorThrSpec_t erts_allctr_thr_spec[ERTS_ALC_A_MAX+1];
+ErtsAllocatorThrSpec_t ERTS_WRITE_UNLIKELY(erts_allctr_thr_spec[ERTS_ALC_A_MAX+1]);
#define ERTS_MIN(A, B) ((A) < (B) ? (A) : (B))
#define ERTS_MAX(A, B) ((A) > (B) ? (A) : (B))
diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h
index 14e80960f5..71e4713624 100644
--- a/erts/emulator/beam/erl_alloc.h
+++ b/erts/emulator/beam/erl_alloc.h
@@ -233,12 +233,14 @@ ERTS_ALC_INLINE
void *erts_alloc(ErtsAlcType_t type, Uint size)
{
void *res;
+ ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
res = (*erts_allctrs[ERTS_ALC_T2A(type)].alloc)(
ERTS_ALC_T2N(type),
erts_allctrs[ERTS_ALC_T2A(type)].extra,
size);
if (!res)
erts_alloc_n_enomem(ERTS_ALC_T2N(type), size);
+ ERTS_MSACC_POP_STATE_X();
return res;
}
@@ -246,6 +248,7 @@ ERTS_ALC_INLINE
void *erts_realloc(ErtsAlcType_t type, void *ptr, Uint size)
{
void *res;
+ ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
res = (*erts_allctrs[ERTS_ALC_T2A(type)].realloc)(
ERTS_ALC_T2N(type),
erts_allctrs[ERTS_ALC_T2A(type)].extra,
@@ -253,37 +256,48 @@ void *erts_realloc(ErtsAlcType_t type, void *ptr, Uint size)
size);
if (!res)
erts_realloc_n_enomem(ERTS_ALC_T2N(type), ptr, size);
+ ERTS_MSACC_POP_STATE_X();
return res;
}
ERTS_ALC_INLINE
void erts_free(ErtsAlcType_t type, void *ptr)
{
+ ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
(*erts_allctrs[ERTS_ALC_T2A(type)].free)(
ERTS_ALC_T2N(type),
erts_allctrs[ERTS_ALC_T2A(type)].extra,
ptr);
+ ERTS_MSACC_POP_STATE_X();
}
ERTS_ALC_INLINE
void *erts_alloc_fnf(ErtsAlcType_t type, Uint size)
{
- return (*erts_allctrs[ERTS_ALC_T2A(type)].alloc)(
+ void *res;
+ ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
+ res = (*erts_allctrs[ERTS_ALC_T2A(type)].alloc)(
ERTS_ALC_T2N(type),
erts_allctrs[ERTS_ALC_T2A(type)].extra,
size);
+ ERTS_MSACC_POP_STATE_X();
+ return res;
}
ERTS_ALC_INLINE
void *erts_realloc_fnf(ErtsAlcType_t type, void *ptr, Uint size)
{
- return (*erts_allctrs[ERTS_ALC_T2A(type)].realloc)(
+ void *res;
+ ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
+ res = (*erts_allctrs[ERTS_ALC_T2A(type)].realloc)(
ERTS_ALC_T2N(type),
erts_allctrs[ERTS_ALC_T2A(type)].extra,
ptr,
size);
+ ERTS_MSACC_POP_STATE_X();
+ return res;
}
ERTS_ALC_INLINE
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index e0bc71c88a..5f153ac0ab 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -361,6 +361,7 @@ type AINFO_REQ SHORT_LIVED SYSTEM alloc_info_request
type SCHED_WTIME_REQ SHORT_LIVED SYSTEM sched_wall_time_request
type GC_INFO_REQ SHORT_LIVED SYSTEM gc_info_request
type PORT_DATA_HEAP STANDARD SYSTEM port_data_heap
+type MSACC DRIVER SYSTEM microstate_accounting
#
# Types used by system specific code
diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c
index be0bc0cfec..cdeeb5281b 100644
--- a/erts/emulator/beam/erl_async.c
+++ b/erts/emulator/beam/erl_async.c
@@ -400,13 +400,19 @@ static ERTS_INLINE void call_async_ready(ErtsAsync *a)
ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP);
#endif
if (!p) {
- if (a->async_free)
+ if (a->async_free) {
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_PORT);
a->async_free(a->async_data);
+ ERTS_MSACC_POP_STATE();
+ }
}
else {
if (async_ready(p, a->async_data)) {
- if (a->async_free)
+ if (a->async_free) {
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_PORT);
a->async_free(a->async_data);
+ ERTS_MSACC_POP_STATE();
+ }
}
#if ERTS_USE_ASYNC_READY_Q
erts_port_release(p);
@@ -460,6 +466,8 @@ static erts_tse_t *async_thread_init(ErtsAsyncQ *aq)
{
ErtsThrQInit_t qinit = ERTS_THR_Q_INIT_DEFAULT;
erts_tse_t *tse = erts_tse_fetch();
+ ERTS_DECLARE_DUMMY(Uint no);
+
#ifdef ERTS_SMP
ErtsThrPrgrCallbacks callbacks;
@@ -483,10 +491,12 @@ static erts_tse_t *async_thread_init(ErtsAsyncQ *aq)
/* Inform main thread that we are done initializing... */
erts_mtx_lock(&async->init.data.mtx);
- async->init.data.no_initialized++;
+ no = async->init.data.no_initialized++;
erts_cnd_signal(&async->init.data.cnd);
erts_mtx_unlock(&async->init.data.mtx);
+ erts_msacc_init_thread("async", no, 0);
+
return tse;
}
@@ -494,6 +504,7 @@ static void *async_main(void* arg)
{
ErtsAsyncQ *aq = (ErtsAsyncQ *) arg;
erts_tse_t *tse = async_thread_init(aq);
+ ERTS_MSACC_DECLARE_CACHE();
while (1) {
ErtsThrQPrepEnQ_t *prep_enq;
@@ -501,11 +512,14 @@ static void *async_main(void* arg)
if (is_nil(a->port))
break; /* Time to die */
+ ERTS_MSACC_UPDATE_CACHE();
+
#if ERTS_ASYNC_PRINT_JOB
erts_fprintf(stderr, "<- %ld\n", a->async_id);
#endif
-
+ ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_PORT);
a->async_invoke(a->async_data);
+ ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER);
async_reply(a, prep_enq);
}
@@ -628,10 +642,13 @@ long driver_async(ErlDrvPort ix, unsigned int* key,
unsigned int qix;
#if ERTS_USE_ASYNC_READY_Q
Uint sched_id;
+ ERTS_MSACC_PUSH_STATE();
sched_id = erts_get_scheduler_id();
if (!sched_id)
sched_id = 1;
+#else
+ ERTS_MSACC_PUSH_STATE();
#endif
prt = erts_drvport2port(ix);
@@ -684,12 +701,17 @@ long driver_async(ErlDrvPort ix, unsigned int* key,
return id;
}
#endif
-
+
+ ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_PORT);
(*a->async_invoke)(a->async_data);
+ ERTS_MSACC_POP_STATE();
if (async_ready(prt, a->async_data)) {
- if (a->async_free != NULL)
+ if (a->async_free != NULL) {
+ ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_PORT);
(*a->async_free)(a->async_data);
+ ERTS_MSACC_POP_STATE();
+ }
}
erts_free(ERTS_ALC_T_ASYNC, (void *) a);
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 06e7dc8661..017339e1f6 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -64,6 +64,7 @@ static Export* alloc_sizes_trap = NULL;
static Export* gather_io_bytes_trap = NULL;
static Export *gather_sched_wall_time_res_trap;
+static Export *gather_msacc_res_trap;
static Export *gather_gc_info_res_trap;
#define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
@@ -3260,6 +3261,14 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
szp = NULL;
hpp = &hp;
}
+#ifdef ERTS_ENABLE_MSACC
+ } else if (BIF_ARG_1 == am_microstate_accounting) {
+ Eterm threads;
+ res = erts_msacc_request(BIF_P, ERTS_MSACC_GATHER, &threads);
+ if (is_non_value(res))
+ BIF_RET(am_undefined);
+ BIF_TRAP2(gather_msacc_res_trap, BIF_P, res, threads);
+#endif
} else if (BIF_ARG_1 == am_context_switches) {
Eterm cs = erts_make_integer(erts_get_total_context_switches(), BIF_P);
hp = HAlloc(BIF_P, 3);
@@ -4396,6 +4405,9 @@ erts_bif_info_init(void)
= erts_export_put(am_erlang, am_gather_gc_info_result, 1);
gather_io_bytes_trap
= erts_export_put(am_erts_internal, am_gather_io_bytes, 2);
+ gather_msacc_res_trap
+ = erts_export_put(am_erts_internal, am_gather_microstate_accounting_result, 2);
+
process_info_init();
os_info_init();
}
diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c
index 06acece471..21b03ae8bd 100644
--- a/erts/emulator/beam/erl_gc.c
+++ b/erts/emulator/beam/erl_gc.c
@@ -576,6 +576,7 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
int reds;
ErtsMonotonicTime start_time = 0; /* Shut up faulty warning... */
ErtsSchedulerData *esdp;
+ ERTS_MSACC_PUSH_STATE_M();
#ifdef USE_VM_PROBES
DTRACE_CHARBUF(pidbuf, DTRACE_TERM_BUF_SIZE);
#endif
@@ -588,6 +589,8 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
else if (p->live_hf_end != ERTS_INVALID_HFRAG_PTR)
live_hf_end = p->live_hf_end;
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_GC);
+
esdp = erts_get_scheduler_data();
if (IS_TRACED_FL(p, F_TRACE_GC)) {
@@ -624,9 +627,11 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
}
else {
do_major_collection:
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC_FULL);
DTRACE2(gc_major_start, pidbuf, need);
reds = major_collection(p, live_hf_end, need, objv, nobj, &reclaimed_now);
DTRACE2(gc_major_end, pidbuf, reclaimed_now);
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_GC);
}
reset_active_writer(p);
@@ -669,6 +674,8 @@ garbage_collect(Process* p, ErlHeapFragment *live_hf_end,
FLAGS(p) &= ~F_FORCE_GC;
p->live_hf_end = ERTS_INVALID_HFRAG_PTR;
+ ERTS_MSACC_POP_STATE_M();
+
#ifdef CHECK_FOR_HOLES
/*
* We intentionally do not rescan the areas copied by the GC.
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 42aca726bf..520b504fcb 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -394,6 +394,7 @@ erl_init(int ncpu,
#endif
packet_parser_init();
erl_nif_init();
+ erts_msacc_init();
}
static Eterm
@@ -1193,6 +1194,7 @@ early_init(int *argc, char **argv) /*
erts_thr_late_init(&elid);
}
#endif
+ erts_msacc_early_init();
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_late_init();
@@ -2220,6 +2222,7 @@ erl_start(int argc, char **argv)
#else
{
ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ erts_msacc_init_thread("scheduler", 1, 1);
erts_thr_set_main_status(1, 1);
#if ERTS_USE_ASYNC_READY_Q
esdp->aux_work_data.async_ready.queue
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index f7b4bd8041..598bf84c0b 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -137,6 +137,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
{ "mmap_init_atoms", NULL },
{ "drv_tsd", NULL },
{ "async_enq_mtx", NULL },
+ { "msacc_list_mutex", NULL },
+ { "msacc_unmanaged_mutex", NULL },
#ifdef ERTS_SMP
{ "atom_tab", NULL },
{ "misc_op_list_pre_alloc_lock", "address" },
diff --git a/erts/emulator/beam/erl_msacc.c b/erts/emulator/beam/erl_msacc.c
new file mode 100644
index 0000000000..71e3fd8b6e
--- /dev/null
+++ b/erts/emulator/beam/erl_msacc.c
@@ -0,0 +1,486 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2014-2015. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Microstate accounting.
+ *
+ * We keep track of the different states that the
+ * Erlang VM threads are in, in order to provide
+ * performance/debugging statistics. There is a
+ * small overhead in enabling this, but in the big
+ * scheme of things it should be negligible.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#define ERTS_MSACC_STATE_STRINGS 1
+
+#include "sys.h"
+#include "global.h"
+#include "erl_threads.h"
+#include "erl_bif_unique.h"
+#include "erl_map.h"
+#include "erl_msacc.h"
+
+#if ERTS_ENABLE_MSACC
+
+static Eterm erts_msacc_gather_stats(ErtsMsAcc *msacc, Eterm **hpp, Uint *szp);
+static void erts_msacc_reset(ErtsMsAcc *msacc);
+static ErtsMsAcc* get_msacc(void);
+
+#ifdef USE_THREADS
+erts_tsd_key_t ERTS_WRITE_UNLIKELY(erts_msacc_key);
+#else
+ErtsMsAcc *ERTS_WRITE_UNLIKELY(erts_msacc) = NULL;
+#endif
+int ERTS_WRITE_UNLIKELY(erts_msacc_enabled);
+
+static Eterm *erts_msacc_state_atoms = NULL;
+static erts_rwmtx_t msacc_mutex;
+static ErtsMsAcc *msacc_managed = NULL;
+#ifdef USE_THREADS
+static ErtsMsAcc *msacc_unmanaged = NULL;
+static Uint msacc_unmanaged_count = 0;
+#endif
+
+/* we have to split initiation as atoms are not inited in early init */
+void erts_msacc_early_init(void) {
+#ifndef ERTS_MSACC_ALWAYS_ON
+ erts_msacc_enabled = 0;
+#endif
+ erts_rwmtx_init(&msacc_mutex,"msacc_list_mutex");
+#ifdef USE_THREADS
+ erts_tsd_key_create(&erts_msacc_key,"erts_msacc_key");
+#else
+ erts_msacc = NULL;
+#endif
+}
+
+void erts_msacc_init(void) {
+ int i;
+ erts_msacc_state_atoms = erts_alloc(ERTS_ALC_T_MSACC,
+ sizeof(Eterm)*ERTS_MSACC_STATE_COUNT);
+ for (i = 0; i < ERTS_MSACC_STATE_COUNT; i++) {
+ erts_msacc_state_atoms[i] = am_atom_put(erts_msacc_states[i],
+ strlen(erts_msacc_states[i]));
+ }
+}
+
+void erts_msacc_init_thread(char *type, int id, int managed) {
+ ErtsMsAcc *msacc;
+
+ msacc = erts_alloc(ERTS_ALC_T_MSACC, sizeof(ErtsMsAcc));
+
+ msacc->type = strdup(type);
+ msacc->id = make_small(id);
+ msacc->unmanaged = !managed;
+ msacc->tid = erts_thr_self();
+ msacc->perf_counter = 0;
+
+#ifdef USE_THREADS
+ erts_rwmtx_rwlock(&msacc_mutex);
+ if (!managed) {
+ erts_mtx_init(&msacc->mtx,"msacc_unmanaged_mutex");
+ msacc->next = msacc_unmanaged;
+ msacc_unmanaged = msacc;
+ msacc_unmanaged_count++;
+ ERTS_MSACC_TSD_SET(msacc);
+ } else {
+ msacc->next = msacc_managed;
+ msacc_managed = msacc;
+ }
+ erts_rwmtx_rwunlock(&msacc_mutex);
+#else
+ msacc_managed = msacc;
+#endif
+
+ erts_msacc_reset(msacc);
+
+#ifdef ERTS_MSACC_ALWAYS_ON
+ ERTS_MSACC_TSD_SET(msacc);
+ msacc->perf_counter = erts_sys_perf_counter();
+ msacc->state = ERTS_MSACC_STATE_OTHER;
+#endif
+}
+
+/*
+ * Creates a structure looking like this
+ * #{ type => scheduler, id => 1, counters => #{ State1 => Counter1 ... StateN => CounterN}}
+ */
+static
+Eterm erts_msacc_gather_stats(ErtsMsAcc *msacc, Eterm **hpp, Uint *szp) {
+ int i;
+ Eterm *hp;
+ Eterm key, state_key, state_map;
+ Eterm res = THE_NON_VALUE;
+ flatmap_t *map;
+
+ if (szp) {
+ *szp += MAP_HEADER_FLATMAP_SZ + 1 + 2*(3);
+ *szp += MAP_HEADER_FLATMAP_SZ + 1 + 2*(ERTS_MSACC_STATE_COUNT);
+ for (i = 0; i < ERTS_MSACC_STATE_COUNT; i++) {
+ (void)erts_bld_sint64(NULL,szp,(Sint64)msacc->perf_counters[i]);
+#ifdef ERTS_MSACC_STATE_COUNTERS
+ (void)erts_bld_uint64(NULL,szp,msacc->state_counters[i]);
+ *szp += 3; /* tuple to put state+perf counter in */
+#endif
+ }
+ }
+
+ if (hpp) {
+ Eterm counters[ERTS_MSACC_STATE_COUNT];
+ hp = *hpp;
+ for (i = 0; i < ERTS_MSACC_STATE_COUNT; i++) {
+ Eterm counter = erts_bld_sint64(&hp,NULL,(Sint64)msacc->perf_counters[i]);
+#ifdef ERTS_MSACC_STATE_COUNTERS
+ Eterm counter__ = erts_bld_uint64(&hp,NULL,msacc->state_counters[i]);
+ counters[i] = TUPLE2(hp,counter,counter__);
+ hp += 3;
+#else
+ counters[i] = counter;
+#endif
+ }
+
+ key = TUPLE3(hp,am_counters,am_id,am_type);
+ hp += 4;
+
+ state_key = make_tuple(hp);
+ hp[0] = make_arityval(ERTS_MSACC_STATE_COUNT);
+
+ for (i = 0; i < ERTS_MSACC_STATE_COUNT; i++)
+ hp[1+i] = erts_msacc_state_atoms[i];
+ hp += 1 + ERTS_MSACC_STATE_COUNT;
+
+ map = (flatmap_t*)hp;
+ hp += MAP_HEADER_FLATMAP_SZ;
+ map->thing_word = MAP_HEADER_FLATMAP;
+ map->size = ERTS_MSACC_STATE_COUNT;
+ map->keys = state_key;
+ for (i = 0; i < ERTS_MSACC_STATE_COUNT; i++)
+ hp[i] = counters[i];
+ hp += ERTS_MSACC_STATE_COUNT;
+ state_map = make_flatmap(map);
+
+ map = (flatmap_t*)hp;
+ hp += MAP_HEADER_FLATMAP_SZ;
+ map->thing_word = MAP_HEADER_FLATMAP;
+ map->size = 3;
+ map->keys = key;
+ hp[0] = state_map;
+ hp[1] = msacc->id;
+ hp[2] = am_atom_put(msacc->type,strlen(msacc->type));
+ hp += 3;
+
+ *hpp = hp;
+ res = make_flatmap(map);
+ }
+
+ return res;
+}
+
+typedef struct {
+ int action;
+ Process *proc;
+ Eterm ref;
+ Eterm ref_heap[REF_THING_SIZE];
+ Uint req_sched;
+ erts_smp_atomic32_t refc;
+} ErtsMSAccReq;
+
+static ErtsMsAcc* get_msacc(void) {
+ ErtsMsAcc *msacc;
+ erts_rwmtx_rlock(&msacc_mutex);
+ msacc = msacc_managed;
+ while (!erts_equal_tids(msacc->tid,erts_thr_self())) {
+ msacc = msacc->next;
+ ASSERT(msacc != NULL);
+ }
+ erts_rwmtx_runlock(&msacc_mutex);
+ return msacc;
+}
+
+static void send_reply(ErtsMsAcc *msacc, ErtsMSAccReq *msaccrp) {
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ Process *rp = msaccrp->proc;
+ ErtsMessage *msgp = NULL;
+ Eterm **hpp, *hp;
+ Eterm ref_copy = NIL, msg;
+ Uint sz, *szp;
+ ErlOffHeap *ohp = NULL;
+ ErtsProcLocks rp_locks = (esdp && msaccrp->req_sched == esdp->no
+ ? ERTS_PROC_LOCK_MAIN : 0);
+
+ sz = 0;
+ hpp = NULL;
+ szp = &sz;
+
+ if (msacc->unmanaged) erts_mtx_lock(&msacc->mtx);
+
+ while (1) {
+ if (hpp)
+ ref_copy = STORE_NC(hpp, ohp, msaccrp->ref);
+ else
+ *szp += REF_THING_SIZE;
+
+ if (msaccrp->action != ERTS_MSACC_GATHER)
+ msg = ref_copy;
+ else {
+ msg = erts_msacc_gather_stats(msacc, hpp, szp);
+ msg = erts_bld_tuple(hpp, szp, 2, ref_copy, msg);
+ }
+ if (hpp)
+ break;
+
+ msgp = erts_alloc_message_heap(rp, &rp_locks, sz, &hp, &ohp);
+ hpp = &hp;
+ szp = NULL;
+ }
+
+ if (msacc->unmanaged) erts_mtx_unlock(&msacc->mtx);
+
+ erts_queue_message(rp, &rp_locks, msgp, msg, NIL);
+
+ if (esdp && msaccrp->req_sched == esdp->no)
+ rp_locks &= ~ERTS_PROC_LOCK_MAIN;
+
+ if (rp_locks)
+ erts_smp_proc_unlock(rp, rp_locks);
+
+}
+
+static void
+reply_msacc(void *vmsaccrp)
+{
+ ErtsMsAcc *msacc = ERTS_MSACC_TSD_GET();
+ ErtsMSAccReq *msaccrp = (ErtsMSAccReq *) vmsaccrp;
+
+ ASSERT(!msacc || !msacc->unmanaged);
+
+ if (msaccrp->action == ERTS_MSACC_ENABLE && !msacc) {
+ msacc = get_msacc();
+
+ msacc->perf_counter = erts_sys_perf_counter();
+
+ msacc->state = ERTS_MSACC_STATE_OTHER;
+
+ ERTS_MSACC_TSD_SET(msacc);
+
+ } else if (msaccrp->action == ERTS_MSACC_DISABLE && msacc) {
+ ERTS_MSACC_TSD_SET(NULL);
+ } else if (msaccrp->action == ERTS_MSACC_RESET) {
+ msacc = msacc ? msacc : get_msacc();
+ erts_msacc_reset(msacc);
+ } else if (msaccrp->action == ERTS_MSACC_GATHER && !msacc) {
+ msacc = get_msacc();
+ }
+
+ ASSERT(!msacc || !msacc->unmanaged);
+
+ send_reply(msacc, msaccrp);
+
+ erts_proc_dec_refc(msaccrp->proc);
+
+ if (erts_smp_atomic32_dec_read_nob(&msaccrp->refc) == 0)
+ erts_free(ERTS_ALC_T_MSACC, vmsaccrp);
+}
+
+static void erts_msacc_reset(ErtsMsAcc *msacc) {
+ int i;
+ if (msacc->unmanaged) erts_mtx_lock(&msacc->mtx);
+
+ for (i = 0; i < ERTS_MSACC_STATE_COUNT; i++) {
+ msacc->perf_counters[i] = 0;
+#ifdef ERTS_MSACC_STATE_COUNTERS
+ msacc->state_counters[i] = 0;
+#endif
+ }
+
+ if (msacc->unmanaged) erts_mtx_unlock(&msacc->mtx);
+}
+
+#endif /* ERTS_ENABLE_MSACC */
+
+
+/*
+ * This function is responsible for enabling, disabling, resetting and
+ * gathering data related to microstate accounting.
+ *
+ * Managed threads and unmanaged threads are handled differently.
+ * - managed threads get a misc_aux job telling them to switch on msacc
+ * - unmanaged have some fields protected by a mutex that has to be taken
+ * before any values can be updated
+ *
+ * For performance reasons there is also a global value erts_msacc_enabled
+ * that controls the state of all threads. Statistics gathering is only on
+ * if erts_msacc_enabled && msacc is true.
+ */
+Eterm
+erts_msacc_request(Process *c_p, int action, Eterm *threads)
+{
+#ifdef ERTS_ENABLE_MSACC
+ ErtsMsAcc *msacc = ERTS_MSACC_TSD_GET();
+ ErtsSchedulerData *esdp = ERTS_PROC_GET_SCHDATA(c_p);
+ Eterm ref;
+ ErtsMSAccReq *msaccrp;
+ Eterm *hp;
+
+
+#ifdef ERTS_MSACC_ALWAYS_ON
+ if (action == ERTS_MSACC_ENABLE || action == ERTS_MSACC_DISABLE)
+ return THE_NON_VALUE;
+#else
+ /* take care of double enable, and double disable here */
+ if (msacc && action == ERTS_MSACC_ENABLE) {
+ return THE_NON_VALUE;
+ } else if (!msacc && action == ERTS_MSACC_DISABLE) {
+ return THE_NON_VALUE;
+ }
+#endif
+
+ ref = erts_make_ref(c_p);
+
+ msaccrp = erts_alloc(ERTS_ALC_T_MSACC, sizeof(ErtsMSAccReq));
+ hp = &msaccrp->ref_heap[0];
+
+ msaccrp->action = action;
+ msaccrp->proc = c_p;
+ msaccrp->ref = STORE_NC(&hp, NULL, ref);
+ msaccrp->req_sched = esdp->no;
+
+#ifdef ERTS_SMP
+ *threads = erts_no_schedulers;
+ *threads += 1; /* aux thread */
+#else
+ *threads = 1;
+#endif
+
+ erts_smp_atomic32_init_nob(&msaccrp->refc,(erts_aint32_t)*threads);
+
+ erts_proc_add_refc(c_p, *threads);
+
+ if (erts_no_schedulers > 1)
+ erts_schedule_multi_misc_aux_work(1,
+ erts_no_schedulers,
+ reply_msacc,
+ (void *) msaccrp);
+#ifdef ERTS_SMP
+ /* aux thread */
+ erts_schedule_misc_aux_work(0, reply_msacc, (void *) msaccrp);
+#endif
+
+#ifdef USE_THREADS
+ /* Manage unmanaged threads */
+ switch (action) {
+ case ERTS_MSACC_GATHER: {
+ Uint unmanaged_count;
+ ErtsMsAcc *msacc, **unmanaged;
+ int i = 0;
+
+ /* we copy a list of pointers here so that we do not have to have
+ the msacc_mutex when sending messages */
+ erts_rwmtx_rlock(&msacc_mutex);
+ unmanaged_count = msacc_unmanaged_count;
+ unmanaged = erts_alloc(ERTS_ALC_T_MSACC,
+ sizeof(ErtsMsAcc*)*unmanaged_count);
+
+ for (i = 0, msacc = msacc_unmanaged;
+ i < unmanaged_count;
+ i++, msacc = msacc->next) {
+ unmanaged[i] = msacc;
+ }
+ erts_rwmtx_runlock(&msacc_mutex);
+
+ for (i = 0; i < unmanaged_count; i++) {
+ erts_mtx_lock(&unmanaged[i]->mtx);
+ if (unmanaged[i]->perf_counter) {
+ ErtsSysPerfCounter perf_counter;
+ /* if enabled update stats */
+ perf_counter = erts_sys_perf_counter();
+ unmanaged[i]->perf_counters[unmanaged[i]->state] +=
+ perf_counter - unmanaged[i]->perf_counter;
+ unmanaged[i]->perf_counter = perf_counter;
+ }
+ erts_mtx_unlock(&unmanaged[i]->mtx);
+ send_reply(unmanaged[i],msaccrp);
+ }
+ erts_free(ERTS_ALC_T_MSACC,unmanaged);
+ /* We have just sent unmanaged_count messages, so bump no of threads */
+ *threads += unmanaged_count;
+ break;
+ }
+ case ERTS_MSACC_RESET: {
+ ErtsMsAcc *msacc;
+ erts_rwmtx_rlock(&msacc_mutex);
+ for (msacc = msacc_unmanaged; msacc != NULL; msacc = msacc->next)
+ erts_msacc_reset(msacc);
+ erts_rwmtx_runlock(&msacc_mutex);
+ break;
+ }
+ case ERTS_MSACC_ENABLE: {
+ erts_rwmtx_rlock(&msacc_mutex);
+ for (msacc = msacc_unmanaged; msacc != NULL; msacc = msacc->next) {
+ erts_mtx_lock(&msacc->mtx);
+ msacc->perf_counter = erts_sys_perf_counter();
+ /* we assume the unmanaged thread is sleeping */
+ msacc->state = ERTS_MSACC_STATE_SLEEP;
+ erts_mtx_unlock(&msacc->mtx);
+ }
+ erts_rwmtx_runlock(&msacc_mutex);
+ break;
+ }
+ case ERTS_MSACC_DISABLE: {
+ ErtsSysPerfCounter perf_counter;
+ erts_rwmtx_rlock(&msacc_mutex);
+ /* make sure to update stats with latest results */
+ for (msacc = msacc_unmanaged; msacc != NULL; msacc = msacc->next) {
+ erts_mtx_lock(&msacc->mtx);
+ perf_counter = erts_sys_perf_counter();
+ msacc->perf_counters[msacc->state] += perf_counter - msacc->perf_counter;
+ msacc->perf_counter = 0;
+ erts_mtx_unlock(&msacc->mtx);
+ }
+ erts_rwmtx_runlock(&msacc_mutex);
+ break;
+ }
+ default: { ASSERT(0); }
+ }
+
+#endif
+
+ *threads = make_small(*threads);
+
+ reply_msacc((void *) msaccrp);
+
+#ifndef ERTS_MSACC_ALWAYS_ON
+ /* enable/disable the global value */
+ if (action == ERTS_MSACC_ENABLE) {
+ erts_msacc_enabled = 1;
+ } else if (action == ERTS_MSACC_DISABLE) {
+ erts_msacc_enabled = 0;
+ }
+#endif
+
+ return ref;
+#else
+ return THE_NON_VALUE;
+#endif
+}
diff --git a/erts/emulator/beam/erl_msacc.h b/erts/emulator/beam/erl_msacc.h
new file mode 100644
index 0000000000..284388f7aa
--- /dev/null
+++ b/erts/emulator/beam/erl_msacc.h
@@ -0,0 +1,409 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2014-2015. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef ERL_MSACC_H__
+#define ERL_MSACC_H__
+
+/* Can be enabled/disabled via configure */
+#if ERTS_ENABLE_MSACC == 2
+#define ERTS_MSACC_EXTENDED_STATES 1
+#endif
+
+/* Uncomment this to also count the number of
+ transitions to a state. This will add a count
+ to the counter map. */
+/* #define ERTS_MSACC_STATE_COUNTERS 1 */
+
+/* Uncomment this to make msacc to always be on,
+ this reduces overhead a little bit when profiling */
+/* #define ERTS_MSACC_ALWAYS_ON 1 */
+
+#define ERTS_MSACC_DISABLE 0
+#define ERTS_MSACC_ENABLE 1
+#define ERTS_MSACC_RESET 2
+#define ERTS_MSACC_GATHER 3
+
+/*
+ * When adding a new state, you have to:
+ * * Add it here
+ * * Increment ERTS_MSACC_STATE_COUNT
+ * * Add string value to erts_msacc_states
+ * * Have to be in alphabetical order!
+ * * Only add states to the non-extended section after
+ * careful benchmarking to make sure the overhead
+ * when disabled is minimal.
+ */
+
+#ifndef ERTS_MSACC_EXTENDED_STATES
+#define ERTS_MSACC_STATE_AUX 0
+#define ERTS_MSACC_STATE_CHECK_IO 1
+#define ERTS_MSACC_STATE_EMULATOR 2
+#define ERTS_MSACC_STATE_GC 3
+#define ERTS_MSACC_STATE_OTHER 4
+#define ERTS_MSACC_STATE_PORT 5
+#define ERTS_MSACC_STATE_SLEEP 6
+
+#define ERTS_MSACC_STATE_COUNT 7
+
+#if ERTS_MSACC_STATE_STRINGS && ERTS_ENABLE_MSACC
+static char *erts_msacc_states[] = {
+ "aux",
+ "check_io",
+ "emulator",
+ "gc",
+ "other",
+ "port",
+ "sleep"
+};
+#endif
+
+#else
+
+#define ERTS_MSACC_STATE_ALLOC 0
+#define ERTS_MSACC_STATE_AUX 1
+#define ERTS_MSACC_STATE_BIF 2
+#define ERTS_MSACC_STATE_BUSY_WAIT 3
+#define ERTS_MSACC_STATE_CHECK_IO 4
+#define ERTS_MSACC_STATE_EMULATOR 5
+#define ERTS_MSACC_STATE_ETS 6
+#define ERTS_MSACC_STATE_GC 7
+#define ERTS_MSACC_STATE_GC_FULL 8
+#define ERTS_MSACC_STATE_NIF 9
+#define ERTS_MSACC_STATE_OTHER 10
+#define ERTS_MSACC_STATE_PORT 11
+#define ERTS_MSACC_STATE_SEND 12
+#define ERTS_MSACC_STATE_SLEEP 13
+#define ERTS_MSACC_STATE_TIMERS 14
+
+#define ERTS_MSACC_STATE_COUNT 15
+
+#if ERTS_MSACC_STATE_STRINGS
+static char *erts_msacc_states[] = {
+ "alloc",
+ "aux",
+ "bif",
+ "busy_wait",
+ "check_io",
+ "emulator",
+ "ets",
+ "gc",
+ "gc_full",
+ "nif",
+ "other",
+ "port",
+ "send",
+ "sleep",
+ "timers"
+};
+#endif
+
+#endif
+
+typedef struct erl_msacc_t_ ErtsMsAcc;
+
+struct erl_msacc_t_ {
+
+ /* the the values below are protected by mtx iff unmanaged = 1 */
+ ErtsSysPerfCounter perf_counter;
+ ErtsSysPerfCounter perf_counters[ERTS_MSACC_STATE_COUNT];
+#ifdef ERTS_MSACC_STATE_COUNTERS
+ Uint64 state_counters[ERTS_MSACC_STATE_COUNT];
+#endif
+ Uint state;
+
+ /* protected by msacc_mutex in erl_msacc.c, and should be constant */
+ int unmanaged;
+ erts_mtx_t mtx;
+ ErtsMsAcc *next;
+ erts_tid_t tid;
+ Eterm id;
+ char *type;
+};
+
+#if ERTS_ENABLE_MSACC
+
+#define ERTS_MSACC_INLINE ERTS_GLB_INLINE
+
+#ifdef USE_THREADS
+extern erts_tsd_key_t erts_msacc_key;
+#else
+extern ErtsMsAcc *erts_msacc;
+#endif
+
+#ifdef ERTS_MSACC_ALWAYS_ON
+#define erts_msacc_enabled 1
+#else
+extern int erts_msacc_enabled;
+#endif
+
+#ifdef USE_THREADS
+#define ERTS_MSACC_TSD_GET() erts_tsd_get(erts_msacc_key)
+#define ERTS_MSACC_TSD_SET(tsd) erts_tsd_set(erts_msacc_key,tsd)
+#else
+#define ERTS_MSACC_TSD_GET() erts_msacc
+#define ERTS_MSACC_TSD_SET(tsd) erts_msacc = tsd
+#endif
+
+void erts_msacc_early_init(void);
+void erts_msacc_init(void);
+void erts_msacc_init_thread(char *type, int id, int liberty);
+
+/* The defines below are used to instrument the vm code
+ * with different state changes. There are two variants
+ * of each define. One that has a cached ErtsMsAcc *
+ * that it can use, and one that does not.
+ * The cached values are necessary to have in order to get
+ * low enough overhead when running without msacc enabled.
+ *
+ * The two most common patterns to use the function with are:
+ *
+ * ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_NEWSTATE);
+ * ... call other function in new state ...
+ * ERTS_MSACC_POP_STATE();
+ *
+ * Note that the erts_msacc_push* function declare new variables, so
+ * to conform with C89 we have to call it in the beginning of a function.
+ * We might not want to change state it the beginning though, so we use this:
+ *
+ * ERTS_MSACC_PUSH_STATE();
+ * ... some other code ...
+ * ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_NEWSTATE);
+ * ... call other function in new state ...
+ * ERTS_MSACC_POP_STATE();
+ *
+ * Notice that we used the cached version of set_state as push_state already
+ * read the erts_msacc_enabled to the cache.
+ *
+ * Most macros also have other variants with the suffix _m which means that
+ * they are known to only be called in managed threads, or with the _x suffix
+ * which means that it should only be used in an emulator compiled with
+ * extended states.
+ *
+ * Here is a listing of the entire api:
+ *
+ * void ERTS_MSACC_DECLARE_CACHE()
+ * void ERTS_MSACC_UPDATE_CACHE()
+ * void ERTS_MSACC_IS_ENABLED()
+ * void ERTS_MSACC_IS_ENABLED_CACHED()
+ *
+ * void ERTS_MSACC_PUSH_STATE()
+ * void ERTS_MSACC_SET_STATE(int state)
+ * void ERTS_MSACC_PUSH_AND_SET_STATE(int state)
+ *
+ * void ERTS_MSACC_PUSH_STATE_CACHED()
+ * void ERTS_MSACC_SET_STATE_CACHED(int state)
+ * void ERTS_MSACC_PUSH_AND_SET_STATE_CACHED(int state)
+ * void ERTS_MSACC_POP_STATE()
+ *
+ * void ERTS_MSACC_PUSH_STATE_M()
+ * void ERTS_MSACC_PUSH_STATE_CACHED_M()
+ * void ERTS_MSACC_SET_STATE_CACHED_M(int state)
+ * void ERTS_MSACC_SET_STATE_M(int state)
+ * void ERTS_MSACC_POP_STATE_M()
+ * void ERTS_MSACC_PUSH_AND_SET_STATE_M(int state)
+ *
+ * Most functions are also available with an _x suffix that are only enabled
+ * when using the extra states. If they are not, just add them to the end
+ * of this file.
+ */
+
+/* cache handling functions */
+#define ERTS_MSACC_IS_ENABLED() ERTS_UNLIKELY(erts_msacc_enabled)
+#define ERTS_MSACC_DECLARE_CACHE() \
+ ErtsMsAcc *ERTS_MSACC_UPDATE_CACHE(); \
+ ERTS_DECLARE_DUMMY(Uint __erts_msacc_state) = ERTS_MSACC_STATE_OTHER;
+#define ERTS_MSACC_IS_ENABLED_CACHED() ERTS_UNLIKELY(__erts_msacc_cache != NULL)
+#define ERTS_MSACC_UPDATE_CACHE() \
+ __erts_msacc_cache = erts_msacc_enabled ? ERTS_MSACC_TSD_GET() : NULL
+
+
+/* The defines below implicitly declare and load a new cache */
+#define ERTS_MSACC_PUSH_STATE() \
+ ERTS_MSACC_DECLARE_CACHE(); \
+ ERTS_MSACC_PUSH_STATE_CACHED()
+#define ERTS_MSACC_SET_STATE(state) \
+ ERTS_MSACC_DECLARE_CACHE(); \
+ ERTS_MSACC_SET_STATE_CACHED(state)
+#define ERTS_MSACC_PUSH_AND_SET_STATE(state) \
+ ERTS_MSACC_PUSH_STATE(); ERTS_MSACC_SET_STATE_CACHED(state)
+
+/* The defines below need an already declared cache to work */
+#define ERTS_MSACC_PUSH_STATE_CACHED() \
+ __erts_msacc_state = ERTS_MSACC_IS_ENABLED_CACHED() ? \
+ erts_msacc_get_state_um__(__erts_msacc_cache) : ERTS_MSACC_STATE_OTHER
+#define ERTS_MSACC_SET_STATE_CACHED(state) \
+ if (ERTS_MSACC_IS_ENABLED_CACHED()) \
+ erts_msacc_set_state_um__(__erts_msacc_cache, state, 1)
+#define ERTS_MSACC_PUSH_AND_SET_STATE_CACHED(state) \
+ ERTS_MSACC_PUSH_STATE_CACHED(); ERTS_MSACC_SET_STATE_CACHED(state)
+#define ERTS_MSACC_POP_STATE() \
+ if (ERTS_MSACC_IS_ENABLED_CACHED()) \
+ erts_msacc_set_state_um__(__erts_msacc_cache, __erts_msacc_state, 0)
+
+/* Only use these defines when we know that we have in a managed thread */
+#define ERTS_MSACC_PUSH_STATE_M() \
+ ERTS_MSACC_DECLARE_CACHE(); \
+ ERTS_MSACC_PUSH_STATE_CACHED_M()
+#define ERTS_MSACC_PUSH_STATE_CACHED_M() \
+ __erts_msacc_state = ERTS_MSACC_IS_ENABLED_CACHED() ? \
+ erts_msacc_get_state_m__(__erts_msacc_cache) : ERTS_MSACC_STATE_OTHER
+#define ERTS_MSACC_SET_STATE_M(state) \
+ ERTS_MSACC_DECLARE_CACHE(); \
+ ERTS_MSACC_SET_STATE_CACHED_M(state)
+#define ERTS_MSACC_SET_STATE_CACHED_M(state) \
+ if (ERTS_MSACC_IS_ENABLED_CACHED()) \
+ erts_msacc_set_state_m__(__erts_msacc_cache, state, 1)
+#define ERTS_MSACC_POP_STATE_M() \
+ if (ERTS_MSACC_IS_ENABLED_CACHED()) \
+ erts_msacc_set_state_m__(__erts_msacc_cache, __erts_msacc_state, 0)
+#define ERTS_MSACC_PUSH_AND_SET_STATE_M(state) \
+ ERTS_MSACC_PUSH_STATE_M(); ERTS_MSACC_SET_STATE_CACHED_M(state)
+
+ERTS_MSACC_INLINE
+void erts_msacc_set_state_um__(ErtsMsAcc *msacc,Uint state,int increment);
+ERTS_MSACC_INLINE
+void erts_msacc_set_state_m__(ErtsMsAcc *msacc,Uint state,int increment);
+
+ERTS_MSACC_INLINE
+Uint erts_msacc_get_state_um__(ErtsMsAcc *msacc);
+ERTS_MSACC_INLINE
+Uint erts_msacc_get_state_m__(ErtsMsAcc *msacc);
+
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_MSACC_INLINE
+Uint erts_msacc_get_state_um__(ErtsMsAcc *msacc) {
+ Uint state;
+ if (msacc->unmanaged)
+ erts_mtx_lock(&msacc->mtx);
+ state = msacc->state;
+ if (msacc->unmanaged)
+ erts_mtx_unlock(&msacc->mtx);
+ return state;
+}
+
+ERTS_MSACC_INLINE
+Uint erts_msacc_get_state_m__(ErtsMsAcc *msacc) {
+ return msacc->state;
+}
+
+ERTS_MSACC_INLINE
+void erts_msacc_set_state_um__(ErtsMsAcc *msacc, Uint new_state, int increment) {
+ if (ERTS_UNLIKELY(msacc->unmanaged)) {
+ erts_mtx_lock(&msacc->mtx);
+ msacc->state = new_state;
+ if (ERTS_LIKELY(!msacc->perf_counter)) {
+ erts_mtx_unlock(&msacc->mtx);
+ return;
+ }
+ }
+
+ erts_msacc_set_state_m__(msacc,new_state,increment);
+
+ if (ERTS_UNLIKELY(msacc->unmanaged))
+ erts_mtx_unlock(&msacc->mtx);
+}
+
+ERTS_MSACC_INLINE
+void erts_msacc_set_state_m__(ErtsMsAcc *msacc, Uint new_state, int increment) {
+ ErtsSysPerfCounter prev_perf_counter;
+ Sint64 diff;
+
+ if (new_state == msacc->state)
+ return;
+
+ prev_perf_counter = msacc->perf_counter;
+ msacc->perf_counter = erts_sys_perf_counter();
+ diff = msacc->perf_counter - prev_perf_counter;
+ ASSERT(diff >= 0);
+ msacc->perf_counters[msacc->state] += diff;
+#ifdef ERTS_MSACC_STATE_COUNTERS
+ msacc->state_counters[new_state] += increment;
+#endif
+ msacc->state = new_state;
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
+#else
+
+#define ERTS_MSACC_IS_ENABLED() 0
+#define erts_msacc_early_init()
+#define erts_msacc_init()
+#define erts_msacc_init_thread(type, id, liberty)
+#define ERTS_MSACC_PUSH_STATE()
+#define ERTS_MSACC_PUSH_STATE_CACHED()
+#define ERTS_MSACC_POP_STATE()
+#define ERTS_MSACC_SET_STATE(state)
+#define ERTS_MSACC_SET_STATE_CACHED(state)
+#define ERTS_MSACC_PUSH_AND_SET_STATE(state)
+#define ERTS_MSACC_PUSH_AND_SET_STATE_CACHED(state)
+#define ERTS_MSACC_UPDATE_CACHE()
+#define ERTS_MSACC_IS_ENABLED_CACHED()
+#define ERTS_MSACC_DECLARE_CACHE()
+#define ERTS_MSACC_PUSH_STATE_M()
+#define ERTS_MSACC_PUSH_STATE_CACHED_M()
+#define ERTS_MSACC_SET_STATE_CACHED_M(state)
+#define ERTS_MSACC_POP_STATE_M()
+#define ERTS_MSACC_PUSH_AND_SET_STATE_M(state)
+
+
+#endif /* ERTS_ENABLE_MSACC */
+
+#ifndef ERTS_MSACC_EXTENDED_STATES
+
+#define ERTS_MSACC_PUSH_STATE_X()
+#define ERTS_MSACC_POP_STATE_X()
+#define ERTS_MSACC_SET_STATE_X(state)
+#define ERTS_MSACC_SET_STATE_M_X(state)
+#define ERTS_MSACC_SET_STATE_CACHED_X(state)
+#define ERTS_MSACC_PUSH_AND_SET_STATE_X(state)
+#define ERTS_MSACC_PUSH_AND_SET_STATE_CACHED_X(state)
+#define ERTS_MSACC_UPDATE_CACHE_X()
+#define ERTS_MSACC_IS_ENABLED_CACHED_X() 0
+#define ERTS_MSACC_DECLARE_CACHE_X()
+#define ERTS_MSACC_PUSH_STATE_M_X()
+#define ERTS_MSACC_PUSH_STATE_CACHED_M_X()
+#define ERTS_MSACC_SET_STATE_CACHED_M_X(state)
+#define ERTS_MSACC_POP_STATE_M_X()
+#define ERTS_MSACC_PUSH_AND_SET_STATE_M_X(state)
+
+#else
+
+#define ERTS_MSACC_PUSH_STATE_X() ERTS_MSACC_PUSH_STATE()
+#define ERTS_MSACC_POP_STATE_X() ERTS_MSACC_POP_STATE()
+#define ERTS_MSACC_SET_STATE_X(state) ERTS_MSACC_SET_STATE(state)
+#define ERTS_MSACC_SET_STATE_M_X(state) ERTS_MSACC_SET_STATE_M(state)
+#define ERTS_MSACC_SET_STATE_CACHED_X(state) ERTS_MSACC_SET_STATE_CACHED(state)
+#define ERTS_MSACC_PUSH_AND_SET_STATE_X(state) ERTS_MSACC_PUSH_AND_SET_STATE(state)
+#define ERTS_MSACC_PUSH_AND_SET_STATE_CACHED_X(state) ERTS_MSACC_PUSH_AND_SET_STATE_CACHED(state)
+#define ERTS_MSACC_UPDATE_CACHE_X() ERTS_MSACC_UPDATE_CACHE()
+#define ERTS_MSACC_IS_ENABLED_CACHED_X() ERTS_MSACC_IS_ENABLED_CACHED()
+#define ERTS_MSACC_DECLARE_CACHE_X() ERTS_MSACC_DECLARE_CACHE()
+#define ERTS_MSACC_PUSH_STATE_M_X() ERTS_MSACC_PUSH_STATE_M()
+#define ERTS_MSACC_PUSH_STATE_CACHED_M_X() ERTS_MSACC_PUSH_STATE_CACHED_M()
+#define ERTS_MSACC_SET_STATE_CACHED_M_X(state) ERTS_MSACC_SET_STATE_CACHED_M(state)
+#define ERTS_MSACC_POP_STATE_M_X() ERTS_MSACC_POP_STATE_M()
+#define ERTS_MSACC_PUSH_AND_SET_STATE_M_X(state) ERTS_MSACC_PUSH_AND_SET_STATE_M(state)
+
+#endif /* !ERTS_MSACC_EXTENDED_STATES */
+
+#endif /* ERL_MSACC_H__ */
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 2c09834d19..197b328fe2 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -1648,6 +1648,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
int active;
Uint64 start_time = 0;
ErtsSchedulerData *esdp = runq->scheduler;
+ ERTS_MSACC_PUSH_STATE_M();
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq));
@@ -1690,6 +1691,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
state = erts_atomic32_read_nob(&pp->state);
pp->reds = ERTS_PORT_REDS_EXECUTE;
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_PORT);
goto begin_handle_tasks;
while (1) {
@@ -1822,6 +1824,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
}
erts_unblock_fpe(fpe_was_unmasked);
+ ERTS_MSACC_POP_STATE_M();
if (io_tasks_executed) {
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 7d194f9840..8e1ebe795c 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -148,14 +148,14 @@ extern BeamInstr beam_apply[];
extern BeamInstr beam_exit[];
extern BeamInstr beam_continue_exit[];
-int erts_default_spo_flags = 0;
-int erts_eager_check_io = 1;
-int erts_sched_compact_load;
-int erts_sched_balance_util = 0;
-Uint erts_no_schedulers;
+int ERTS_WRITE_UNLIKELY(erts_default_spo_flags) = 0;
+int ERTS_WRITE_UNLIKELY(erts_eager_check_io) = 1;
+int ERTS_WRITE_UNLIKELY(erts_sched_compact_load);
+int ERTS_WRITE_UNLIKELY(erts_sched_balance_util) = 0;
+Uint ERTS_WRITE_UNLIKELY(erts_no_schedulers);
#ifdef ERTS_DIRTY_SCHEDULERS
-Uint erts_no_dirty_cpu_schedulers;
-Uint erts_no_dirty_io_schedulers;
+Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_cpu_schedulers);
+Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_io_schedulers);
#endif
static char *erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_NO_FLAGS] = {0};
@@ -295,7 +295,7 @@ do { \
erts_sched_stat_t erts_sched_stat;
#ifdef USE_THREADS
-static erts_tsd_key_t sched_data_key;
+static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key);
#endif
static erts_smp_atomic32_t function_calls;
@@ -335,10 +335,10 @@ static ErtsAlignedSchedulerSleepInfo *aligned_dirty_io_sched_sleep_info;
static Uint last_reductions;
static Uint last_exact_reductions;
-Eterm erts_system_monitor;
-Eterm erts_system_monitor_long_gc;
-Uint erts_system_monitor_long_schedule;
-Eterm erts_system_monitor_large_heap;
+Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor);
+Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor_long_gc);
+Uint ERTS_WRITE_UNLIKELY(erts_system_monitor_long_schedule);
+Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor_large_heap);
struct erts_system_monitor_flags_t erts_system_monitor_flags;
/* system performance monitor */
@@ -941,6 +941,12 @@ sched_wall_time_change(ErtsSchedulerData *esdp, int working)
}
}
}
+ if (!working) {
+ ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_BUSY_WAIT);
+ } else {
+ ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_OTHER);
+ }
+
}
typedef struct {
@@ -1696,15 +1702,17 @@ handle_delayed_dealloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waitin
int need_thr_progress = 0;
ErtsThrPrgrVal wakeup = ERTS_THR_PRGR_INVALID;
int more_work = 0;
-
+ ERTS_MSACC_PUSH_STATE_M_X();
#ifdef ERTS_DIRTY_SCHEDULERS
ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp));
#endif
unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD);
+ ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_ALLOC);
erts_alloc_scheduler_handle_delayed_dealloc((void *) awdp->esdp,
&need_thr_progress,
&wakeup,
&more_work);
+ ERTS_MSACC_POP_STATE_M_X();
if (more_work) {
if (set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD)
& ERTS_SSI_AUX_WORK_DD_THR_PRGR) {
@@ -2176,12 +2184,15 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting)
ERTS_DBG_CHK_AUX_WORK_VAL(aux_work); \
if (!(aux_work & ~ignore)) { \
ERTS_DBG_CHK_AUX_WORK_VAL(aux_work); \
+ ERTS_MSACC_UPDATE_CACHE(); \
+ ERTS_MSACC_POP_STATE_M(); \
return aux_work; \
} \
}
erts_aint32_t aux_work = orig_aux_work;
erts_aint32_t ignore = 0;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M(ERTS_MSACC_STATE_AUX);
ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp));
#ifdef ERTS_SMP
@@ -2272,6 +2283,8 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting)
haw_thr_prgr_current_check_progress(awdp);
#endif
+ ERTS_MSACC_UPDATE_CACHE();
+ ERTS_MSACC_POP_STATE_M();
return aux_work;
#undef HANDLE_AUX_WORK
@@ -2779,6 +2792,8 @@ aux_thread(void *unused)
ssi->event = erts_tse_fetch();
+ erts_msacc_init_thread("aux", 1, 1);
+
callbacks.arg = (void *) ssi;
callbacks.wakeup = thr_prgr_wakeup;
callbacks.prepare_wait = thr_prgr_prep_wait;
@@ -2789,6 +2804,7 @@ aux_thread(void *unused)
init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
+
sched_prep_spin_wait(ssi);
while (1) {
@@ -2842,6 +2858,9 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
#ifdef ERTS_SMP
int thr_prgr_active = 1;
erts_aint32_t flgs;
+#endif
+ ERTS_MSACC_PUSH_STATE_M();
+#ifdef ERTS_SMP
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
@@ -2898,6 +2917,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
sched_wall_time_change(esdp, 1);
}
aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1);
+ ERTS_MSACC_UPDATE_CACHE();
if (aux_work && erts_thr_progress_update(esdp))
erts_thr_progress_leader_update(esdp);
}
@@ -2959,7 +2979,9 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
- 1) + 1;
} else
timeout = -1;
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
res = erts_tse_twait(ssi->event, timeout);
+ ERTS_MSACC_POP_STATE_M();
current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 :
erts_get_monotonic_time(esdp);
} while (res == EINTR);
@@ -3028,9 +3050,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (working)
sched_wall_time_change(esdp, working = 0);
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_CHECK_IO);
+
ASSERT(!erts_port_task_have_outstanding_io_tasks());
erl_sys_schedule(1); /* Might give us something to do */
+ ERTS_MSACC_POP_STATE_M();
+
if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
current_time = erts_get_monotonic_time(esdp);
if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref))
@@ -3051,6 +3077,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
erts_thr_progress_active(esdp, thr_prgr_active = 1);
#endif
aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1);
+ ERTS_MSACC_UPDATE_CACHE();
#ifdef ERTS_SMP
if (aux_work && erts_thr_progress_update(esdp))
erts_thr_progress_leader_update(esdp);
@@ -3148,8 +3175,12 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
ASSERT(!erts_port_task_have_outstanding_io_tasks());
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_CHECK_IO);
+
erl_sys_schedule(0);
+ ERTS_MSACC_POP_STATE_M();
+
if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
ErtsMonotonicTime current_time = erts_get_monotonic_time(esdp);
if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref))
@@ -7926,6 +7957,8 @@ sched_thread_func(void *vesdp)
callbacks.wait = thr_prgr_wait;
callbacks.finalize_wait = thr_prgr_fin_wait;
+ erts_msacc_init_thread("scheduler", no, 1);
+
erts_thr_progress_register_managed_thread(esdp, &callbacks, 0);
erts_alloc_register_scheduler(vesdp);
#endif
@@ -9204,6 +9237,8 @@ Process *schedule(Process *p, int calls)
Uint32 flags;
erts_aint32_t state = 0; /* Supress warning... */
+ ERTS_MSACC_DECLARE_CACHE();
+
#ifdef USE_VM_PROBES
if (p != NULL && DTRACE_ENABLED(process_unscheduled)) {
DTRACE_CHARBUF(process_buf, DTRACE_TERM_BUF_SIZE);
@@ -9308,6 +9343,8 @@ Process *schedule(Process *p, int calls)
erts_smp_proc_unlock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS);
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_OTHER);
+
if (state & ERTS_PSFLG_FREE) {
#ifdef ERTS_SMP
ASSERT(esdp->free_process == p);
@@ -9468,7 +9505,7 @@ Process *schedule(Process *p, int calls)
scheduler_wait(&fcalls, esdp, rq);
flags = ERTS_RUNQ_FLGS_SET_NOB(rq, ERTS_RUNQ_FLG_EXEC);
flags |= ERTS_RUNQ_FLG_EXEC;
-
+ ERTS_MSACC_UPDATE_CACHE();
#ifdef ERTS_SMP
non_empty_runq(rq);
#endif
@@ -9483,6 +9520,8 @@ Process *schedule(Process *p, int calls)
* Schedule system-level activities.
*/
+ ERTS_MSACC_PUSH_STATE_CACHED_M();
+
erts_smp_atomic32_set_relb(&function_calls, 0);
fcalls = 0;
@@ -9490,7 +9529,9 @@ Process *schedule(Process *p, int calls)
erts_sys_schedule_interrupt(0);
#endif
erts_smp_runq_unlock(rq);
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_CHECK_IO);
erl_sys_schedule(1);
+ ERTS_MSACC_POP_STATE_M();
current_time = erts_get_monotonic_time(esdp);
if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref))
@@ -9568,9 +9609,12 @@ Process *schedule(Process *p, int calls)
case 0: /* No process at all */
default:
ASSERT(qmask == 0);
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_OTHER);
goto check_activities_to_run;
}
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_EMULATOR);
+
BM_START_TIMER(system);
/*
diff --git a/erts/emulator/beam/erl_threads.h b/erts/emulator/beam/erl_threads.h
index 34f91e2ec8..e689547d1d 100644
--- a/erts/emulator/beam/erl_threads.h
+++ b/erts/emulator/beam/erl_threads.h
@@ -2056,6 +2056,8 @@ erts_atomic64_read_dirty(erts_atomic64_t *var)
#endif /* !USE_THREADS */
+#include "erl_msacc.h"
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE void
@@ -2414,6 +2416,7 @@ erts_cnd_wait(erts_cnd_t *cnd, erts_mtx_t *mtx)
{
#ifdef USE_THREADS
int res;
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_SLEEP);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_unlock(&mtx->lc);
#endif
@@ -2432,6 +2435,7 @@ erts_cnd_wait(erts_cnd_t *cnd, erts_mtx_t *mtx)
#endif
if (res != 0 && res != EINTR)
erts_thr_fatal_error(res, "wait on condition variable");
+ ERTS_MSACC_POP_STATE();
#endif
}
@@ -3488,7 +3492,11 @@ ERTS_GLB_INLINE void erts_tse_reset(erts_tse_t *ep)
ERTS_GLB_INLINE int erts_tse_wait(erts_tse_t *ep)
{
#ifdef USE_THREADS
- return ethr_event_wait(&((ethr_ts_event *) ep)->event);
+ int res;
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_SLEEP);
+ res = ethr_event_wait(&((ethr_ts_event *) ep)->event);
+ ERTS_MSACC_POP_STATE();
+ return res;
#else
return ENOTSUP;
#endif
@@ -3497,7 +3505,11 @@ ERTS_GLB_INLINE int erts_tse_wait(erts_tse_t *ep)
ERTS_GLB_INLINE int erts_tse_swait(erts_tse_t *ep, int spincount)
{
#ifdef USE_THREADS
- return ethr_event_swait(&((ethr_ts_event *) ep)->event, spincount);
+ int res;
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_SLEEP);
+ res = ethr_event_swait(&((ethr_ts_event *) ep)->event, spincount);
+ ERTS_MSACC_POP_STATE();
+ return res;
#else
return ENOTSUP;
#endif
@@ -3506,8 +3518,12 @@ ERTS_GLB_INLINE int erts_tse_swait(erts_tse_t *ep, int spincount)
ERTS_GLB_INLINE int erts_tse_twait(erts_tse_t *ep, Sint64 tmo)
{
#ifdef USE_THREADS
- return ethr_event_twait(&((ethr_ts_event *) ep)->event,
- (ethr_sint64_t) tmo);
+ int res;
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_SLEEP);
+ res = ethr_event_twait(&((ethr_ts_event *) ep)->event,
+ (ethr_sint64_t) tmo);
+ ERTS_MSACC_POP_STATE();
+ return res;
#else
return ENOTSUP;
#endif
@@ -3516,9 +3532,13 @@ ERTS_GLB_INLINE int erts_tse_twait(erts_tse_t *ep, Sint64 tmo)
ERTS_GLB_INLINE int erts_tse_stwait(erts_tse_t *ep, int spincount, Sint64 tmo)
{
#ifdef USE_THREADS
- return ethr_event_stwait(&((ethr_ts_event *) ep)->event,
- spincount,
- (ethr_sint64_t) tmo);
+ int res;
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_SLEEP);
+ res = ethr_event_stwait(&((ethr_ts_event *) ep)->event,
+ spincount,
+ (ethr_sint64_t) tmo);
+ ERTS_MSACC_POP_STATE();
+ return res;
#else
return ENOTSUP;
#endif
diff --git a/erts/emulator/beam/erl_time.h b/erts/emulator/beam/erl_time.h
index 93a0d556bf..5242063550 100644
--- a/erts/emulator/beam/erl_time.h
+++ b/erts/emulator/beam/erl_time.h
@@ -142,6 +142,8 @@ erts_time_unit_conversion(Uint64 value,
Uint32 from_time_unit,
Uint32 to_time_unit);
+ErtsSysPerfCounter erts_perf_counter_unit(void);
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE Uint64
diff --git a/erts/emulator/beam/erl_time_sup.c b/erts/emulator/beam/erl_time_sup.c
index 668dbb5daa..98159fdf72 100644
--- a/erts/emulator/beam/erl_time_sup.c
+++ b/erts/emulator/beam/erl_time_sup.c
@@ -1749,7 +1749,7 @@ erts_get_monotonic_time(ErtsSchedulerData *esdp)
{
ErtsMonotonicTime mtime = time_sup.r.o.get_time();
update_last_mtime(esdp, mtime);
- return mtime;
+ return mtime;
}
ErtsMonotonicTime
@@ -2435,9 +2435,19 @@ BIF_RETTYPE os_system_time_0(BIF_ALIST_0)
BIF_RET(make_time_val(BIF_P, stime));
}
-BIF_RETTYPE os_system_time_1(BIF_ALIST_0)
+BIF_RETTYPE os_system_time_1(BIF_ALIST_1)
{
ErtsSystemTime stime = erts_os_system_time();
BIF_RET(time_unit_conversion(BIF_P, BIF_ARG_1, stime, 0));
}
+BIF_RETTYPE
+os_perf_counter_0(BIF_ALIST_0)
+{
+ BIF_RET(make_time_val(BIF_P, erts_sys_perf_counter()));
+}
+
+BIF_RETTYPE erts_internal_perf_counter_unit_0(BIF_ALIST_0)
+{
+ BIF_RET(make_time_val(BIF_P, erts_sys_perf_counter_unit()));
+}
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 3f5925765d..e8a7573e86 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -1448,6 +1448,8 @@ ERTS_GLB_FORCE_INLINE int erts_is_literal(Eterm tptr, Eterm *ptr)
#endif
+Eterm erts_msacc_request(Process *c_p, int action, Eterm *threads);
+
/*
** Call_trace uses this API for the parameter matching functions
*/
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index 07578fd24e..41b0fcdd1b 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -706,6 +706,7 @@ erts_open_driver(erts_driver_t* driver, /* Pointer to driver. */
error_number = error_type = 0;
if (driver->start) {
+ ERTS_MSACC_PUSH_STATE_M();
if (IS_TRACED_FL(port, F_TRACE_SCHED_PORTS)) {
trace_sched_ports_where(port, am_in, am_start);
}
@@ -716,6 +717,7 @@ erts_open_driver(erts_driver_t* driver, /* Pointer to driver. */
DTRACE3(driver_start, process_str, driver->name, port_str);
}
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_PORT);
fpe_was_unmasked = erts_block_fpe();
drv_data = (*driver->start)(ERTS_Port2ErlDrvPort(port), name, opts);
if (((SWord) drv_data) == -1)
@@ -735,6 +737,7 @@ erts_open_driver(erts_driver_t* driver, /* Pointer to driver. */
}
erts_unblock_fpe(fpe_was_unmasked);
+ ERTS_MSACC_POP_STATE_M();
port->caller = NIL;
if (IS_TRACED_FL(port, F_TRACE_SCHED_PORTS)) {
trace_sched_ports_where(port, am_out, am_start);
@@ -1710,6 +1713,7 @@ call_driver_outputv(int bang_op,
else {
ErtsSchedulerData *esdp = erts_get_scheduler_data();
ErlDrvSizeT size = evp->size;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M(ERTS_MSACC_STATE_PORT);
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt)
|| ERTS_IS_CRASH_DUMPING);
@@ -1730,6 +1734,8 @@ call_driver_outputv(int bang_op,
esdp->io.out += (Uint64) size;
else
erts_atomic64_add_nob(&bytes_out, (erts_aint64_t) size);
+
+ ERTS_MSACC_POP_STATE_M();
}
}
@@ -1810,6 +1816,7 @@ call_driver_output(int bang_op,
send_badsig(prt);
else {
ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ ERTS_MSACC_PUSH_AND_SET_STATE_M(ERTS_MSACC_STATE_PORT);
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt)
|| ERTS_IS_CRASH_DUMPING);
@@ -1829,6 +1836,8 @@ call_driver_output(int bang_op,
esdp->io.out += (Uint64) size;
else
erts_atomic64_add_nob(&bytes_out, (erts_aint64_t) size);
+
+ ERTS_MSACC_POP_STATE_M();
}
}
@@ -3473,6 +3482,7 @@ static void flush_port(Port *p)
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p));
if (p->drv_ptr->flush != NULL) {
+ ERTS_MSACC_PUSH_STATE_M();
#ifdef USE_VM_PROBES
if (DTRACE_ENABLED(driver_flush)) {
DTRACE_FORMAT_COMMON_PID_AND_PORT(ERTS_PORT_GET_CONNECTED(p), p)
@@ -3482,9 +3492,11 @@ static void flush_port(Port *p)
if (IS_TRACED_FL(p, F_TRACE_SCHED_PORTS)) {
trace_sched_ports_where(p, am_in, am_flush);
}
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_PORT);
fpe_was_unmasked = erts_block_fpe();
(*p->drv_ptr->flush)((ErlDrvData)p->drv_data);
erts_unblock_fpe(fpe_was_unmasked);
+ ERTS_MSACC_POP_STATE_M();
if (IS_TRACED_FL(p, F_TRACE_SCHED_PORTS)) {
trace_sched_ports_where(p, am_out, am_flush);
}
@@ -3532,6 +3544,7 @@ terminate_port(Port *prt)
drv = prt->drv_ptr;
if ((drv != NULL) && (drv->stop != NULL)) {
int fpe_was_unmasked = erts_block_fpe();
+ ERTS_MSACC_PUSH_AND_SET_STATE_M(ERTS_MSACC_STATE_PORT);
#ifdef USE_VM_PROBES
if (DTRACE_ENABLED(driver_stop)) {
DTRACE_FORMAT_COMMON_PID_AND_PORT(connected_id, prt)
@@ -3540,6 +3553,7 @@ terminate_port(Port *prt)
#endif
(*drv->stop)((ErlDrvData)prt->drv_data);
erts_unblock_fpe(fpe_was_unmasked);
+ ERTS_MSACC_POP_STATE_M();
#ifdef ERTS_SMP
if (prt->xports)
erts_port_handle_xports(prt);
@@ -3850,6 +3864,7 @@ call_driver_control(Eterm caller,
ErlDrvSizeT *from_size)
{
ErlDrvSSizeT cres;
+ ERTS_MSACC_PUSH_STATE_M();
if (!prt->drv_ptr->control)
return ERTS_PORT_OP_BADARG;
@@ -3863,6 +3878,8 @@ call_driver_control(Eterm caller,
command, size);
}
#endif
+
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_PORT);
prt->caller = caller;
cres = prt->drv_ptr->control((ErlDrvData) prt->drv_data,
@@ -3873,6 +3890,8 @@ call_driver_control(Eterm caller,
*from_size);
prt->caller = NIL;
+ ERTS_MSACC_POP_STATE_M();
+
if (cres < 0)
return ERTS_PORT_OP_BADARG;
@@ -4260,6 +4279,7 @@ call_driver_call(Eterm caller,
unsigned *ret_flagsp)
{
ErlDrvSSizeT cres;
+ ERTS_MSACC_PUSH_STATE_M();
if (!prt->drv_ptr->call)
return ERTS_PORT_OP_BADARG;
@@ -4275,6 +4295,8 @@ call_driver_call(Eterm caller,
}
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_PORT);
+
prt->caller = caller;
cres = prt->drv_ptr->call((ErlDrvData) prt->drv_data,
command,
@@ -4285,6 +4307,8 @@ call_driver_call(Eterm caller,
ret_flagsp);
prt->caller = NIL;
+ ERTS_MSACC_POP_STATE_M();
+
if (cres <= 0
|| ((byte) (*resp_bufp)[0]) != VERSION_MAGIC)
return ERTS_PORT_OP_BADARG;
@@ -4988,6 +5012,7 @@ int get_port_flags(ErlDrvPort ix)
void erts_raw_port_command(Port* p, byte* buf, Uint len)
{
int fpe_was_unmasked;
+ ERTS_MSACC_PUSH_STATE_M();
ERTS_SMP_CHK_NO_PROC_LOCKS;
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p));
@@ -5008,9 +5033,11 @@ void erts_raw_port_command(Port* p, byte* buf, Uint len)
DTRACE4(driver_output, "-raw-", port_str, p->name, len);
}
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_PORT);
fpe_was_unmasked = erts_block_fpe();
(*p->drv_ptr->output)((ErlDrvData)p->drv_data, (char*) buf, (int) len);
erts_unblock_fpe(fpe_was_unmasked);
+ ERTS_MSACC_POP_STATE_M();
}
int async_ready(Port *p, void* data)
@@ -5022,6 +5049,7 @@ int async_ready(Port *p, void* data)
if (p) {
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(p));
if (p->drv_ptr->ready_async != NULL) {
+ ERTS_MSACC_PUSH_AND_SET_STATE_M(ERTS_MSACC_STATE_PORT);
#ifdef USE_VM_PROBES
if (DTRACE_ENABLED(driver_ready_async)) {
DTRACE_FORMAT_COMMON_PID_AND_PORT(ERTS_PORT_GET_CONNECTED(p), p)
@@ -5030,6 +5058,7 @@ int async_ready(Port *p, void* data)
#endif
(*p->drv_ptr->ready_async)((ErlDrvData)p->drv_data, data);
need_free = 0;
+ ERTS_MSACC_POP_STATE_M();
}
erts_port_driver_callback_epilogue(p, NULL);
@@ -7053,6 +7082,7 @@ void erts_fire_port_monitor(Port *prt, Eterm ref)
void (*callback)(ErlDrvData drv_data, ErlDrvMonitor *monitor);
ErlDrvMonitor drv_monitor;
int fpe_was_unmasked;
+ ERTS_MSACC_PUSH_STATE_M();
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt));
ASSERT(prt->drv_ptr != NULL);
@@ -7064,6 +7094,7 @@ void erts_fire_port_monitor(Port *prt, Eterm ref)
callback = prt->drv_ptr->process_exit;
ASSERT(callback != NULL);
ref_to_driver_monitor(ref,&drv_monitor);
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_PORT);
DRV_MONITOR_UNLOCK_PDL(prt);
#ifdef USE_VM_PROBES
if (DTRACE_ENABLED(driver_process_exit)) {
@@ -7075,6 +7106,7 @@ void erts_fire_port_monitor(Port *prt, Eterm ref)
(*callback)((ErlDrvData) (prt->drv_data), &drv_monitor);
erts_unblock_fpe(fpe_was_unmasked);
DRV_MONITOR_LOCK_PDL(prt);
+ ERTS_MSACC_POP_STATE_M();
/* remove monitor *after* callback */
rmon = erts_remove_monitor(&ERTS_P_MONITORS(prt), ref);
DRV_MONITOR_UNLOCK_PDL(prt);
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 081c4108a0..9e53b4bfcc 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -988,6 +988,13 @@ move Discarded x==0 | move Something x==0 => move Something x=0
%endif
+call_ext u==0 u$func:os:perf_counter/0 => \
+ i_perf_counter
+call_ext_last u==0 u$func:os:perf_counter/0 D => \
+ i_perf_counter | deallocate_return D
+call_ext_only u==0 u$func:os:perf_counter/0 => \
+ i_perf_counter | return
+
#
# The general case for BIFs that have no special instructions.
# A BIF used in the tail must be followed by a return instruction.
@@ -1027,6 +1034,8 @@ i_apply_fun_only
i_hibernate
+i_perf_counter
+
call_bif e
#
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 279e4db0af..e77b4b2dc8 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -136,6 +136,17 @@ typedef ERTS_SYS_FD_TYPE ErtsSysFdType;
# define ERTS_LIKELY(BOOL) (BOOL)
# define ERTS_UNLIKELY(BOOL) (BOOL)
#endif
+
+#if ERTS_AT_LEAST_GCC_VSN__(2, 96, 0)
+#ifndef __llvm__
+# define ERTS_WRITE_UNLIKELY(X) X __attribute__ ((section ("ERTS_LOW_WRITE") ))
+#else
+# define ERTS_WRITE_UNLIKELY(X) X __attribute__ ((section ("__DATA,ERTS_LOW_WRITE") ))
+#endif
+#else
+# define ERTS_WRITE_UNLIKELY(X) X
+#endif
+
#ifdef __GNUC__
# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5)
# define ERTS_DECLARE_DUMMY(X) X __attribute__ ((unused))
diff --git a/erts/emulator/beam/time.c b/erts/emulator/beam/time.c
index 0ab6661c9f..48cb39333a 100644
--- a/erts/emulator/beam/time.c
+++ b/erts/emulator/beam/time.c
@@ -278,9 +278,14 @@ ErtsMonotonicTime
erts_check_next_timeout_time(ErtsSchedulerData *esdp)
{
ErtsTimerWheel *tiw = esdp->timer_wheel;
+ ErtsMonotonicTime time;
+ ERTS_MSACC_DECLARE_CACHE_X();
if (tiw->true_next_timeout_time)
return tiw->next_timeout_time;
- return find_next_timeout(esdp, tiw, 1, 0, 0);
+ ERTS_MSACC_PUSH_AND_SET_STATE_CACHED_X(ERTS_MSACC_STATE_TIMERS);
+ time = find_next_timeout(esdp, tiw, 1, 0, 0);
+ ERTS_MSACC_POP_STATE_M_X();
+ return time;
}
#ifndef ERTS_TW_DEBUG
@@ -336,6 +341,7 @@ erts_bump_timers(ErtsTimerWheel *tiw, ErtsMonotonicTime curr_time)
{
int tiw_pos_ix, slots, yielded_slot_restarted, yield_count;
ErtsMonotonicTime bump_to, tmp_slots, old_pos;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_TIMERS);
yield_count = ERTS_TWHEEL_BUMP_YIELD_LIMIT;
@@ -371,6 +377,7 @@ erts_bump_timers(ErtsTimerWheel *tiw, ErtsMonotonicTime curr_time)
tiw->next_timeout_time = curr_time + ERTS_MONOTONIC_DAY;
tiw->pos = bump_to;
tiw->yield_slot = ERTS_TWHEEL_SLOT_INACTIVE;
+ ERTS_MSACC_POP_STATE_M_X();
return;
}
@@ -382,6 +389,7 @@ erts_bump_timers(ErtsTimerWheel *tiw, ErtsMonotonicTime curr_time)
tiw->yield_slot = ERTS_TWHEEL_SLOT_AT_ONCE;
tiw->true_next_timeout_time = 1;
tiw->next_timeout_time = ERTS_CLKTCKS_TO_MONOTONIC(old_pos);
+ ERTS_MSACC_POP_STATE_M_X();
return;
}
@@ -400,8 +408,10 @@ erts_bump_timers(ErtsTimerWheel *tiw, ErtsMonotonicTime curr_time)
p = tiw->at_once.head;
}
- if (tiw->pos >= bump_to)
+ if (tiw->pos >= bump_to) {
+ ERTS_MSACC_POP_STATE_M_X();
break;
+ }
if (tiw->nto == 0)
goto empty_wheel;
@@ -478,6 +488,7 @@ erts_bump_timers(ErtsTimerWheel *tiw, ErtsMonotonicTime curr_time)
tiw->yield_slot = tiw_pos_ix;
tiw->yield_slots_left = slots;
tiw->yield_start_pos = old_pos;
+ ERTS_MSACC_POP_STATE_M_X();
return; /* Yield! */
}
@@ -500,6 +511,7 @@ erts_bump_timers(ErtsTimerWheel *tiw, ErtsMonotonicTime curr_time)
/* Search at most two seconds ahead... */
(void) find_next_timeout(NULL, tiw, 0, curr_time, ERTS_SEC_TO_MONOTONIC(2));
+ ERTS_MSACC_POP_STATE_M_X();
}
Uint
@@ -569,6 +581,7 @@ erts_twheel_set_timer(ErtsTimerWheel *tiw,
ErtsMonotonicTime timeout_pos)
{
ErtsMonotonicTime timeout_time;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_TIMERS);
p->u.func.timeout = timeout;
p->u.func.cancel = cancel;
@@ -612,6 +625,7 @@ erts_twheel_set_timer(ErtsTimerWheel *tiw,
tiw->true_next_timeout_time = 1;
tiw->next_timeout_time = timeout_time;
}
+ ERTS_MSACC_POP_STATE_M_X();
}
void
@@ -620,11 +634,13 @@ erts_twheel_cancel_timer(ErtsTimerWheel *tiw, ErtsTWheelTimer *p)
if (p->slot != ERTS_TWHEEL_SLOT_INACTIVE) {
ErlCancelProc cancel;
void *arg;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_TIMERS);
remove_timer(tiw, p);
cancel = p->u.func.cancel;
arg = p->u.func.arg;
if (cancel)
(*cancel)(arg);
+ ERTS_MSACC_POP_STATE_M_X();
}
}
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index 21fa214364..36ee94111c 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -74,6 +74,7 @@
#include "erl_thr_progress.h"
#include "erl_driver.h"
#include "erl_alloc.h"
+#include "erl_msacc.h"
#if !defined(ERTS_POLL_USE_EPOLL) \
&& !defined(ERTS_POLL_USE_DEVPOLL) \
@@ -2238,6 +2239,7 @@ static ERTS_INLINE int
check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
{
int res;
+ ERTS_MSACC_PUSH_STATE_M();
if (erts_smp_atomic_read_nob(&ps->no_of_user_fds) == 0
&& timeout_time == ERTS_POLL_NO_TIMEOUT) {
/* Nothing to poll and zero timeout; done... */
@@ -2259,6 +2261,7 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
#ifdef ERTS_SMP
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
timerfd_set(ps, &its);
res = epoll_wait(ps->kp_fd, ps->res_events, max_res, -1);
res = timerfd_clear(ps, res, max_res);
@@ -2268,10 +2271,12 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
}
#else /* !ERTS_POLL_USE_TIMERFD */
timeout = (int) get_timeout(ps, 1000, timeout_time);
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
res = epoll_wait(ps->kp_fd, ps->res_events, max_res, timeout);
#endif /* !ERTS_POLL_USE_TIMERFD */
#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
@@ -2279,10 +2284,12 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
if (max_res > ps->res_events_len)
grow_res_events(ps, max_res);
timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
#endif /* ----------------------------------------- */
}
@@ -2306,26 +2313,33 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
if (poll_res.dp_nfds > ps->res_events_len)
grow_res_events(ps, poll_res.dp_nfds);
poll_res.dp_fds = ps->res_events;
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
poll_res.dp_timeout = timeout;
res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */
struct timespec ts;
timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
res = ppoll(ps->poll_fds, ps->no_poll_fds, &ts, NULL);
#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */
timeout = (int) get_timeout(ps, 1000, timeout_time);
+
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
res = poll(ps->poll_fds, ps->no_poll_fds, timeout);
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
SysTimeval to;
@@ -2334,18 +2348,22 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
res = ERTS_SELECT(ps->max_fd + 1,
&ps->res_input_fds,
&ps->res_output_fds,
NULL,
&to);
#ifdef ERTS_SMP
- if (timeout)
+ if (timeout) {
erts_thr_progress_finalize_wait(NULL);
+ ERTS_MSACC_POP_STATE_M();
+ }
if (res < 0
&& errno == EBADF
&& ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
@@ -2380,10 +2398,12 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
return res;
#endif /* ----------------------------------------- */
}
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_finalize_wait(NULL);
#endif
+ ERTS_MSACC_POP_STATE_M();
+ }
return res;
}
}
diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h
index 0352ee1b3c..8b1822ca9f 100644
--- a/erts/emulator/sys/unix/erl_unix_sys.h
+++ b/erts/emulator/sys/unix/erl_unix_sys.h
@@ -101,6 +101,10 @@
#endif
#include <netdb.h>
+#ifdef HAVE_MACH_ABSOLUTE_TIME
+#include <mach/mach_time.h>
+#endif
+
#ifdef HAVE_POSIX_MEMALIGN
# define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1
#endif
@@ -157,6 +161,7 @@ typedef long long ErtsSysHrTime;
#endif
typedef ErtsMonotonicTime ErtsSystemTime;
+typedef ErtsSysHrTime ErtsSysPerfCounter;
#define ERTS_MONOTONIC_TIME_MIN (((ErtsMonotonicTime) 1) << 63)
#define ERTS_MONOTONIC_TIME_MAX (~ERTS_MONOTONIC_TIME_MIN)
@@ -205,6 +210,7 @@ ErtsSystemTime erts_os_system_time(void);
* It may or may not be monotonic.
*/
ErtsSysHrTime erts_sys_hrtime(void);
+#define ERTS_HRTIME_UNIT (1000*1000*1000)
struct erts_sys_time_read_only_data__ {
#ifdef ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__
@@ -213,6 +219,8 @@ struct erts_sys_time_read_only_data__ {
#ifdef ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__
void (*os_times)(ErtsMonotonicTime *, ErtsSystemTime *);
#endif
+ ErtsSysPerfCounter (*perf_counter)(void);
+ ErtsSysPerfCounter perf_counter_unit;
int ticks_per_sec;
};
@@ -266,7 +274,24 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
#endif /* ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT */
/*
- *
+ * Functions for getting the performance counter
+ */
+
+ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void);
+#define erts_sys_perf_counter_unit() erts_sys_time_data__.r.o.perf_counter_unit
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter()
+{
+ return (*erts_sys_time_data__.r.o.perf_counter)();
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
+/*
+ * Functions for measuring CPU time
*/
#if (defined(HAVE_GETHRVTIME) || defined(HAVE_CLOCK_GETTIME_CPU_TIME))
diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c
index 2e1914f564..03d39c7ce6 100644
--- a/erts/emulator/sys/unix/sys_time.c
+++ b/erts/emulator/sys/unix/sys_time.c
@@ -65,6 +65,8 @@
# include <fcntl.h>
#endif
+static void init_perf_counter(void);
+
/******************* Routines for time measurement *********************/
#undef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__
@@ -404,6 +406,8 @@ sys_init_time(ErtsSysInitTimeResult *init_resp)
# error Missing erts_os_system_time() implementation
#endif
+ init_perf_counter();
+
}
void
@@ -908,10 +912,120 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
#endif
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Performance counter functions *
+\* */
+
+
+/* What resolution to spin to in micro seconds */
+#define RESOLUTION 100
+/* How many iterations to spin */
+#define ITERATIONS 1
+/* How many significant figures to round to */
+#define SIGFIGS 3
+
+static ErtsSysPerfCounter calculate_perf_counter_unit(void) {
+ int i;
+ ErtsSysPerfCounter pre, post;
+ double value = 0;
+ double round_factor;
+#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME)
+ struct timespec basetime,comparetime;
+#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg)
+#define __GETUSEC(arg) (arg.tv_nsec / 1000)
+#else
+ SysTimeval basetime,comparetime;
+#define __GETTIME(arg) sys_gettimeofday(arg)
+#define __GETUSEC(arg) arg.tv_usec
+#endif
+
+ for (i = 0; i < ITERATIONS; i++) {
+ /* Make sure usec just flipped over at current resolution */
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ pre = erts_sys_perf_counter();
+
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ post = erts_sys_perf_counter();
+
+ value += post - pre;
+ }
+ /* After this value is ticks per us */
+ value /= (RESOLUTION*ITERATIONS);
+
+ /* We round to 3 significant figures */
+ round_factor = pow(10.0, SIGFIGS - ceil(log10(value)));
+ value = ((ErtsSysPerfCounter)(value * round_factor + 0.5)) / round_factor;
+
+ /* convert to ticks per second */
+ return 1000000 * value;
+}
+
+static int have_rdtscp(void)
+{
+#if defined(ETHR_X86_RUNTIME_CONF__)
+ /* On early x86 cpu's the tsc varies with the current speed of the cpu,
+ which means that the time per tick vary depending on the current
+ load of the cpu. We do not want this as it would give very scewed
+ numbers when the cpu is mostly idle.
+ The linux kernel seems to think that checking for constant and
+ reliable is enough to trust the counter so we do the same.
+
+ If this test is not good enough, I don't know what we'll do.
+ Maybe fallback on erts_sys_hrtime always, but that would be a shame as
+ rdtsc is about 3 times faster than hrtime... */
+ return ETHR_X86_RUNTIME_CONF_HAVE_CONSTANT_TSC__ &&
+ ETHR_X86_RUNTIME_CONF_HAVE_TSC_RELIABLE__;
+#else
+ return 0;
+#endif
+}
+
+static ErtsSysPerfCounter rdtsc(void)
+{
+ /* It may have been a good idea to put the cpuid instruction before
+ the rdtsc, but I decided against it because it is not really
+ needed for msacc, and it slows it down by quite a bit (5-7 times slower).
+ As a result though, this timestamp becomes much less
+ accurate as it might be re-ordered to be executed way before or after this
+ function is called.
+ */
+ ErtsSysPerfCounter ts;
+#if defined(__x86_64__)
+ __asm__ __volatile__ ("rdtsc\n\t"
+ "shl $32, %%rdx\n\t"
+ "or %%rdx, %0" : "=a" (ts) : : "rdx");
+#elif defined(__i386__)
+ __asm__ __volatile__ ("rdtsc\n\t"
+ : "=A" (ts) );
+#endif
+ return ts;
+}
+
+static void init_perf_counter(void)
+{
+ if (have_rdtscp()) {
+ erts_sys_time_data__.r.o.perf_counter = rdtsc;
+ erts_sys_time_data__.r.o.perf_counter_unit = calculate_perf_counter_unit();
+ } else {
+ erts_sys_time_data__.r.o.perf_counter = erts_sys_hrtime;
+ erts_sys_time_data__.r.o.perf_counter_unit = ERTS_HRTIME_UNIT;
+ }
+}
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#ifdef HAVE_GETHRVTIME_PROCFS_IOCTL
+/* The code below only has effect on solaris < 10,
+ needed in order for gehhrvtime to work properly */
int sys_start_hrvtime(void)
{
long msacct = PR_MSACCT;
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 466f4a3b48..660ded297a 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -27,6 +27,7 @@
#include "erl_alloc.h"
#include "erl_poll.h"
#include "erl_time.h"
+#include "erl_msacc.h"
/*
* Some debug macros
@@ -1188,16 +1189,19 @@ int erts_poll_wait(ErtsPollSet ps,
if (timeout > 0 && !erts_atomic32_read_nob(&break_waiter_state)) {
HANDLE harr[2] = {ps->event_io_ready, break_happened_event};
int num_h = 2;
+ ERTS_MSACC_PUSH_STATE_M();
HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout));
ERTS_POLLSET_UNLOCK(ps);
#ifdef ERTS_SMP
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
WaitForMultipleObjects(num_h, harr, FALSE, timeout);
#ifdef ERTS_SMP
erts_thr_progress_finalize_wait(NULL);
#endif
+ ERTS_MSACC_POP_STATE_M();
ERTS_POLLSET_LOCK(ps);
HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout));
woke_up(ps);
diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h
index 7e8dd8a4ca..99c1066ab3 100644
--- a/erts/emulator/sys/win32/erl_win_sys.h
+++ b/erts/emulator/sys/win32/erl_win_sys.h
@@ -183,6 +183,7 @@ typedef LONGLONG ErtsSysHrTime;
#endif
typedef ErtsMonotonicTime ErtsSystemTime;
+typedef ErtsMonotonicTime ErtsSysPerfCounter;
ErtsSystemTime erts_os_system_time(void);
@@ -213,6 +214,7 @@ ERTS_GLB_INLINE ErtsMonotonicTime erts_os_monotonic_time(void);
ERTS_GLB_INLINE void erts_os_times(ErtsMonotonicTime *,
ErtsSystemTime *);
ERTS_GLB_INLINE ErtsSysHrTime erts_sys_hrtime(void);
+ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void);
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
@@ -234,6 +236,18 @@ erts_sys_hrtime(void)
return (*erts_sys_time_data__.r.o.sys_hrtime)();
}
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter(void)
+{
+ return (*erts_sys_time_data__.r.o.sys_hrtime)();
+}
+
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter_unit(void)
+{
+ return 1000 * 1000 * 1000;
+}
+
#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
extern void sys_gettimeofday(SysTimeval *tv);
diff --git a/erts/emulator/sys/win32/sys_time.c b/erts/emulator/sys/win32/sys_time.c
index 9e5f78703a..3b4fd26d63 100644
--- a/erts/emulator/sys/win32/sys_time.c
+++ b/erts/emulator/sys/win32/sys_time.c
@@ -397,6 +397,7 @@ sys_init_time(ErtsSysInitTimeResult *init_resp)
erts_sys_time_data__.r.o.os_monotonic_time = os_mtime_func;
erts_sys_time_data__.r.o.os_times = os_times_func;
+ erts_sys_time_data__.r.o.sys_hrtime = sys_hrtime_func;
init_resp->os_monotonic_time_unit = time_unit;
init_resp->have_os_monotonic_time = 1;
init_resp->have_corrected_os_monotonic_time = 0;
diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl
index a6305d453c..aab3d3e3d2 100644
--- a/erts/emulator/test/statistics_SUITE.erl
+++ b/erts/emulator/test/statistics_SUITE.erl
@@ -32,7 +32,7 @@
run_queue_one/1,
scheduler_wall_time/1,
reductions/1, reductions_big/1, garbage_collection/1, io/1,
- badarg/1, run_queues_lengths_active_tasks/1]).
+ badarg/1, run_queues_lengths_active_tasks/1, msacc/1]).
%% Internal exports.
@@ -55,7 +55,8 @@ all() ->
[{group, wall_clock}, {group, runtime}, reductions,
reductions_big, {group, run_queue}, scheduler_wall_time,
garbage_collection, io, badarg,
- run_queues_lengths_active_tasks].
+ run_queues_lengths_active_tasks,
+ msacc].
groups() ->
[{wall_clock, [],
@@ -470,3 +471,114 @@ run_queues_lengths_active_tasks(Config) ->
TokLoops),
ok.
+
+msacc(doc) ->
+ "Tests that statistics(microstate_statistics) works.";
+msacc(Config) ->
+
+ %% Test if crypto nif is available
+ Niff = try crypto:strong_rand_bytes(1), ok catch _:_ -> nok end,
+ TmpFile = filename:join(proplists:get_value(priv_dir,Config),"file.tmp"),
+
+ false = erlang:system_flag(microstate_accounting, true),
+
+ msacc_test(TmpFile),
+
+ true = erlang:system_flag(microstate_accounting, false),
+
+ MsaccStats = erlang:statistics(microstate_accounting),
+
+ case os:type() of
+ {win32, _} ->
+ %% Some windows have a very poor accuracy on their
+ %% timing primitives, so we just make sure that
+ %% some state besides sleep has been triggered.
+ Sum = lists:sum(
+ lists:map(fun({sleep, _V}) -> 0;
+ ({_, V}) -> V
+ end, maps:to_list(msacc_sum_states()))
+ ),
+ if Sum > 0 ->
+ ok;
+ true ->
+ ct:fail({no_states_triggered, MsaccStats})
+ end;
+ _ ->
+
+ %% Make sure that all states were triggered at least once
+ maps:map(fun(nif, 0) ->
+ case Niff of
+ ok ->
+ ct:fail({zero_state,nif});
+ nok ->
+ ok
+ end;
+ (aux, 0) ->
+ %% aux will be zero if we do not have smp support
+ %% or no async threads
+ case erlang:system_info(smp_support) orelse
+ erlang:system_info(thread_pool_size) > 0
+ of
+ false ->
+ ok;
+ true ->
+ ct:log("msacc: ~p",[MsaccStats]),
+ ct:fail({zero_state,aux})
+ end;
+ (Key, 0) ->
+ ct:log("msacc: ~p",[MsaccStats]),
+ ct:fail({zero_state,Key});
+ (_,_) -> ok
+ end, msacc_sum_states())
+ end,
+
+ erlang:system_flag(microstate_accounting, reset),
+
+ msacc_test(TmpFile),
+
+ %% Make sure all counters are zero after stopping and resetting
+ maps:map(fun(_Key, 0) -> ok;
+ (Key,_) ->
+ ct:log("msacc: ~p",[erlang:statistics(microstate_accounting)]),
+ ct:fail({non_zero_state,Key})
+ end,msacc_sum_states()).
+
+%% This test tries to make sure to trigger all of the different available states
+msacc_test(TmpFile) ->
+
+ %% We write some data
+ [file:write_file(TmpFile,<<0:(1024*1024*8)>>,[raw]) || _ <- lists:seq(1,100)],
+
+ %% Do some ETS operations
+ Tid = ets:new(table, []),
+ ets:insert(Tid, {1, hello}),
+ ets:delete(Tid),
+
+ %% Collect some garbage
+ [erlang:garbage_collect() || _ <- lists:seq(1,100)],
+
+ %% Send some messages
+ [begin self() ! {hello},receive _ -> ok end end || _ <- lists:seq(1,100)],
+
+ %% Setup some timers
+ Refs = [erlang:send_after(10000,self(),ok) || _ <- lists:seq(1,100)],
+
+ %% Do some nif work
+ catch [crypto:strong_rand_bytes(128) || _ <- lists:seq(1,100)],
+
+ %% Cancel some timers
+ [erlang:cancel_timer(R) || R <- Refs],
+
+ %% Wait for a while
+ timer:sleep(100).
+
+msacc_sum_states() ->
+ Stats = erlang:statistics(microstate_accounting),
+ [#{ counters := C }|_] = Stats,
+ InitialCounters = maps:map(fun(_,_) -> 0 end,C),
+ lists:foldl(fun(#{ counters := Counters }, Cnt) ->
+ maps:fold(fun(Key, Value, Acc) ->
+ NewValue = Value+maps:get(Key,Acc),
+ maps:update(Key, NewValue, Acc)
+ end, Cnt, Counters)
+ end,InitialCounters,Stats).
diff --git a/erts/include/internal/ethr_internal.h b/erts/include/internal/ethr_internal.h
index d4ded6ff05..693b34df61 100644
--- a/erts/include/internal/ethr_internal.h
+++ b/erts/include/internal/ethr_internal.h
@@ -92,7 +92,6 @@ void ethr_run_exit_handlers__(void);
void ethr_ts_event_destructor__(void *vtsep);
#if defined(ETHR_X86_RUNTIME_CONF__)
-int ethr_x86_have_cpuid__(void);
void ethr_x86_cpuid__(int *eax, int *ebx, int *ecx, int *edx);
#endif
diff --git a/erts/include/internal/ethread.h b/erts/include/internal/ethread.h
index 4eeb7097f4..e5c5cdfa33 100644
--- a/erts/include/internal/ethread.h
+++ b/erts/include/internal/ethread.h
@@ -274,14 +274,40 @@ ETHR_PROTO_NORETURN__ ethr_fatal_error__(const char *file,
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))))
# define ETHR_X86_RUNTIME_CONF__
-# define ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__ \
- (__builtin_expect(ethr_runtime__.conf.have_dw_cmpxchg != 0, 1))
-# define ETHR_X86_RUNTIME_CONF_HAVE_NO_DW_CMPXCHG__ \
- (__builtin_expect(ethr_runtime__.conf.have_dw_cmpxchg == 0, 0))
-# define ETHR_X86_RUNTIME_CONF_HAVE_SSE2__ \
- (__builtin_expect(ethr_runtime__.conf.have_sse2 != 0, 1))
-# define ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__ \
- (__builtin_expect(ethr_runtime__.conf.have_sse2 == 0, 0))
+# define ETHR_X86_RUNTIME_CONF_HAVE_META(feature) \
+ (__builtin_expect(ethr_runtime__.conf.have_##feature != 0, 1))
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_META(feature) \
+ (__builtin_expect(ethr_runtime__.conf.have_##feature == 0, 0))
+
+# define ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_META(dw_cmpxchg)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_DW_CMPXCHG__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_NO_META(dw_cmpxchg)
+# define ETHR_X86_RUNTIME_CONF_HAVE_SSE2__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_META(sse2)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_SSE2__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_NO_META(sse2)
+# define ETHR_X86_RUNTIME_CONF_HAVE_RDTSCP__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_META(rdtscp)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_RDTSCP__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_NO_META(rdtscp)
+# define ETHR_X86_RUNTIME_CONF_HAVE_CONSTANT_TSC__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_META(constant_tsc)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_CONSTANT_TSC__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_NO_META(nonstop_tsc)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NONSTOP_TSC__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_META(nonstop_tsc)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_NONSTOP_TSC__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_NO_META(nonstop_tsc)
+# define ETHR_X86_RUNTIME_CONF_HAVE_TSC_RELIABLE__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_META(tsc_reliable)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_TSC_RELIABLE_TSC__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_NO_META(tsc_reliable)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NONSTOP_TSC_S3__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_META(nonstop_tsc_s3)
+# define ETHR_X86_RUNTIME_CONF_HAVE_NO_NONSTOP_TSC_S3__ \
+ ETHR_X86_RUNTIME_CONF_HAVE_NO_META(nonstop_tsc_s3)
+
#endif
#if (defined(__GNUC__) \
@@ -300,6 +326,11 @@ typedef struct {
#if defined(ETHR_X86_RUNTIME_CONF__)
int have_dw_cmpxchg;
int have_sse2;
+ int have_rdtscp;
+ int have_constant_tsc;
+ int have_tsc_reliable;
+ int have_nonstop_tsc;
+ int have_nonstop_tsc_s3;
#endif
#if defined(ETHR_PPC_RUNTIME_CONF__)
int have_lwsync;
diff --git a/erts/lib_src/common/ethr_aux.c b/erts/lib_src/common/ethr_aux.c
index 56fecf81b8..3e7aad16c7 100644
--- a/erts/lib_src/common/ethr_aux.c
+++ b/erts/lib_src/common/ethr_aux.c
@@ -139,6 +139,38 @@ x86_init(void)
#endif
/* bit 26 of edx is set if we have sse2 */
ethr_runtime__.conf.have_sse2 = (edx & (1 << 26));
+
+ /* check if we have extended feature set */
+ eax = 0x80000000;
+ ethr_x86_cpuid__(&eax, &ebx, &ecx, &edx);
+
+ if (eax < 0x80000001)
+ return;
+
+ if (eax >= 0x80000007) {
+ /* Advanced Power Management Information */
+ eax = 0x80000007;
+ ethr_x86_cpuid__(&eax, &ebx, &ecx, &edx);
+
+ /* I got the values below from:
+ http://lxr.free-electrons.com/source/arch/x86/include/asm/cpufeature.h
+ They can be gotten from the intel/amd manual as well.
+ */
+
+ ethr_runtime__.conf.have_constant_tsc = (edx & (1 << 8));
+ ethr_runtime__.conf.have_tsc_reliable = (edx & (1 << 23));
+ ethr_runtime__.conf.have_nonstop_tsc = (edx & (1 << 24));
+ ethr_runtime__.conf.have_nonstop_tsc_s3 = (edx & (1 << 30));
+
+ }
+
+ /* Extended Processor Info and Feature Bits */
+ eax = 0x80000001;
+ ethr_x86_cpuid__(&eax, &ebx, &ecx, &edx);
+
+ /* bit 27 of edx is set if we have rdtscp */
+ ethr_runtime__.conf.have_rdtscp = (edx & (1 << 27));
+
}
#endif /* ETHR_X86_RUNTIME_CONF__ */
diff --git a/erts/preloaded/ebin/erl_prim_loader.beam b/erts/preloaded/ebin/erl_prim_loader.beam
index f007880683..2a54c10273 100644
--- a/erts/preloaded/ebin/erl_prim_loader.beam
+++ b/erts/preloaded/ebin/erl_prim_loader.beam
Binary files differ
diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam
index a89de716bd..f1e48b3282 100644
--- a/erts/preloaded/ebin/erlang.beam
+++ b/erts/preloaded/ebin/erlang.beam
Binary files differ
diff --git a/erts/preloaded/ebin/erts_code_purger.beam b/erts/preloaded/ebin/erts_code_purger.beam
index 37e6b181d6..3d73b0cb1b 100644
--- a/erts/preloaded/ebin/erts_code_purger.beam
+++ b/erts/preloaded/ebin/erts_code_purger.beam
Binary files differ
diff --git a/erts/preloaded/ebin/erts_internal.beam b/erts/preloaded/ebin/erts_internal.beam
index 2380930767..24fa68506b 100644
--- a/erts/preloaded/ebin/erts_internal.beam
+++ b/erts/preloaded/ebin/erts_internal.beam
Binary files differ
diff --git a/erts/preloaded/ebin/init.beam b/erts/preloaded/ebin/init.beam
index 67159713f5..2f66b5f970 100644
--- a/erts/preloaded/ebin/init.beam
+++ b/erts/preloaded/ebin/init.beam
Binary files differ
diff --git a/erts/preloaded/ebin/otp_ring0.beam b/erts/preloaded/ebin/otp_ring0.beam
index fd80258d0e..057fe79054 100644
--- a/erts/preloaded/ebin/otp_ring0.beam
+++ b/erts/preloaded/ebin/otp_ring0.beam
Binary files differ
diff --git a/erts/preloaded/ebin/prim_eval.beam b/erts/preloaded/ebin/prim_eval.beam
index 0e51a1bfc7..33f5de7d3f 100644
--- a/erts/preloaded/ebin/prim_eval.beam
+++ b/erts/preloaded/ebin/prim_eval.beam
Binary files differ
diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam
index 663f5d46e6..b96927b72b 100644
--- a/erts/preloaded/ebin/prim_file.beam
+++ b/erts/preloaded/ebin/prim_file.beam
Binary files differ
diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam
index d8a8061bae..767a48d781 100644
--- a/erts/preloaded/ebin/prim_inet.beam
+++ b/erts/preloaded/ebin/prim_inet.beam
Binary files differ
diff --git a/erts/preloaded/ebin/prim_zip.beam b/erts/preloaded/ebin/prim_zip.beam
index 1d69c2e583..2bc85bbcbf 100644
--- a/erts/preloaded/ebin/prim_zip.beam
+++ b/erts/preloaded/ebin/prim_zip.beam
Binary files differ
diff --git a/erts/preloaded/ebin/zlib.beam b/erts/preloaded/ebin/zlib.beam
index 2ffd839f8a..0b4afc0a39 100644
--- a/erts/preloaded/ebin/zlib.beam
+++ b/erts/preloaded/ebin/zlib.beam
Binary files differ
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index bca366681d..ab51cf385c 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -71,7 +71,8 @@
| 'milli_seconds'
| 'micro_seconds'
| 'nano_seconds'
- | 'native'.
+ | 'native'
+ | 'perf_counter'.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Native code BIF stubs and their types
@@ -1347,6 +1348,7 @@ convert_time_unit(Time, FromUnit, ToUnit) ->
try
FU = case FromUnit of
native -> erts_internal:time_unit();
+ perf_counter -> erts_internal:perf_counter_unit();
nano_seconds -> 1000*1000*1000;
micro_seconds -> 1000*1000;
milli_seconds -> 1000;
@@ -1355,6 +1357,7 @@ convert_time_unit(Time, FromUnit, ToUnit) ->
end,
TU = case ToUnit of
native -> erts_internal:time_unit();
+ perf_counter -> erts_internal:perf_counter_unit();
nano_seconds -> 1000*1000*1000;
micro_seconds -> 1000*1000;
milli_seconds -> 1000;
@@ -2214,6 +2217,16 @@ spawn_opt(_Tuple) ->
(io) -> {{input, Input}, {output, Output}} when
Input :: non_neg_integer(),
Output :: non_neg_integer();
+ (microstate_accounting) -> [MSAcc_Thread] | undefined when
+ MSAcc_Thread :: #{ type => MSAcc_Thread_Type,
+ id => MSAcc_Thread_Id,
+ counters => MSAcc_Counters},
+ MSAcc_Thread_Type :: scheduler | async | aux,
+ MSAcc_Thread_Id :: non_neg_integer(),
+ MSAcc_Counters :: #{ MSAcc_Thread_State => non_neg_integer() },
+ MSAcc_Thread_State :: alloc | aux | bif | busy_wait | check_io |
+ emulator | ets | gc | gc_fullsweep | nif |
+ other | port | send | sleep | timers;
(reductions) -> {Total_Reductions,
Reductions_Since_Last_Call} when
Total_Reductions :: non_neg_integer(),
@@ -2268,6 +2281,9 @@ subtract(_,_) ->
(fullsweep_after, Number) -> OldNumber when
Number :: non_neg_integer(),
OldNumber :: non_neg_integer();
+ (microstate_accounting, Action) -> OldState when
+ Action :: true | false | reset,
+ OldState :: true | false;
(min_heap_size, MinHeapSize) -> OldMinHeapSize when
MinHeapSize :: non_neg_integer(),
OldMinHeapSize :: non_neg_integer();
diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl
index 84dedab930..a15355bb10 100644
--- a/erts/preloaded/src/erts_internal.erl
+++ b/erts/preloaded/src/erts_internal.erl
@@ -45,10 +45,13 @@
-export([await_result/1, gather_io_bytes/2]).
--export([time_unit/0]).
+-export([time_unit/0, perf_counter_unit/0]).
-export([is_system_process/1]).
+-export([await_microstate_accounting_modifications/3,
+ gather_microstate_accounting_result/2]).
+
%% Auto import name clash
-export([check_process_code/2]).
@@ -345,8 +348,38 @@ flush_monitor_messages(Ref, Multi, Res) when is_reference(Ref) ->
time_unit() ->
erlang:nif_error(undefined).
+-spec erts_internal:perf_counter_unit() -> pos_integer().
+
+perf_counter_unit() ->
+ erlang:nif_error(undefined).
+
-spec erts_internal:is_system_process(Pid) -> boolean() when
Pid :: pid().
is_system_process(_Pid) ->
erlang:nif_error(undefined).
+
+-spec await_microstate_accounting_modifications(Ref, Result, Threads) -> boolean() when
+ Ref :: reference(),
+ Result :: boolean(),
+ Threads :: pos_integer().
+
+await_microstate_accounting_modifications(Ref, Result, Threads) ->
+ _ = microstate_accounting(Ref,Threads),
+ Result.
+
+-spec gather_microstate_accounting_result(Ref, Threads) -> [#{}] when
+ Ref :: reference(),
+ Threads :: pos_integer().
+
+gather_microstate_accounting_result(Ref, Threads) ->
+ microstate_accounting(Ref, Threads).
+
+microstate_accounting(_Ref, 0) ->
+ [];
+microstate_accounting(Ref, Threads) ->
+ receive
+ Ref -> microstate_accounting(Ref, Threads - 1);
+ {Ref, Res} ->
+ [Res | microstate_accounting(Ref, Threads - 1)]
+ end.