aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2014-09-11 18:26:26 +0200
committerLukas Larsson <[email protected]>2016-02-02 10:45:21 +0100
commiteea5f896780e07f7ca76685061d01e7be5a7abaa (patch)
tree7d4a852b20f2075637457052844865e1c56b1e21
parentd76ee58c07f32dfc0652844ec2b513af2105ffa1 (diff)
downloadotp-eea5f896780e07f7ca76685061d01e7be5a7abaa.tar.gz
otp-eea5f896780e07f7ca76685061d01e7be5a7abaa.tar.bz2
otp-eea5f896780e07f7ca76685061d01e7be5a7abaa.zip
erts, kernel: Add os:perf_counter function
The perf_counter is a very very cheap and high resolution timer that can be used to timestamp system events. It does not have monoticity guarantees, but should on most OS's expose a monotonous time. A special instruction has been created for this counter to further speed up fetching it. OTP-12908
-rw-r--r--erts/doc/src/erlang.xml11
-rw-r--r--erts/emulator/beam/beam_emu.c32
-rw-r--r--erts/emulator/beam/bif.tab2
-rw-r--r--erts/emulator/beam/erl_time.h2
-rw-r--r--erts/emulator/beam/erl_time_sup.c76
-rw-r--r--erts/emulator/beam/ops.tab9
-rw-r--r--erts/emulator/sys/unix/erl_unix_sys.h30
-rw-r--r--erts/emulator/sys/unix/sys_time.c2
-rw-r--r--erts/emulator/sys/win32/erl_win_sys.h2
-rw-r--r--erts/preloaded/src/erlang.erl5
-rw-r--r--erts/preloaded/src/erts_internal.erl7
-rw-r--r--lib/kernel/doc/src/os.xml25
-rw-r--r--lib/kernel/src/os.erl16
-rw-r--r--lib/kernel/test/os_SUITE.erl46
14 files changed, 256 insertions, 9 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index d0d35ea25f..6915b35fcb 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -131,6 +131,17 @@
</note>
</item>
+ <tag><c>perf_counter</c></tag>
+ <item><p>Symbolic representation of the performance counter
+ time unit used by the Erlang runtime system.</p>
+
+ <p>The <c>perf_counter</c> time unit behaves much in the same way
+ as the <c>native</c> time unit. That is it might differ inbetween
+ run-time restarts. You get values of this type by calling
+ <seealso marker="kernel:os#perf_counter/0"><c>os:perf_counter()</c></seealso>
+ </p>
+ </item>
+
</taglist>
<p>The <c>time_unit/0</c> type may be extended. Use
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index 4d7b00b032..a0979c7b0e 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -4895,6 +4895,38 @@ do { \
}
}
+ /* This is optimised as an instruction because
+ it has to be very very fast */
+ OpCase(i_perf_counter): {
+ BeamInstr* next;
+ ErtsSysHrTime ts;
+ PreFetch(0, next);
+
+ sys_perf_counter(&ts);
+
+ if (IS_SSMALL(ts)) {
+ r(0) = make_small((Sint)ts);
+ } else {
+ TestHeap(ERTS_SINT64_HEAP_SIZE(ts),0);
+ r(0) = make_big(HTOP);
+#if defined(ARCH_32) || HALFWORD_HEAP
+ if (ts >= (((Uint64) 1) << 32)) {
+ *HTOP = make_pos_bignum_header(2);
+ BIG_DIGIT(HTOP, 0) = (Uint) (ts & ((Uint) 0xffffffff));
+ BIG_DIGIT(HTOP, 1) = (Uint) ((ts >> 32) & ((Uint) 0xffffffff));
+ HTOP += 3;
+ }
+ else
+#endif
+ {
+ *HTOP = make_pos_bignum_header(1);
+ BIG_DIGIT(HTOP, 0) = (Uint) ts;
+ HTOP += 2;
+ }
+ }
+ NextPF(0, next);
+ }
+
OpCase(i_debug_breakpoint): {
HEAVY_SWAPOUT;
I = call_error_handler(c_p, I-3, reg, am_breakpoint);
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 1b8ae8cef5..4efc055aaf 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -170,6 +170,7 @@ bif erts_internal:term_type/1
bif erts_internal:map_hashmap_children/1
bif erts_internal:time_unit/0
+bif erts_internal:perf_counter_unit/0
bif erts_internal:is_system_process/1
@@ -369,6 +370,7 @@ bif os:getpid/0
bif os:timestamp/0
bif os:system_time/0
bif os:system_time/1
+bif os:perf_counter/0
#
# Bifs in the erl_ddll module (the module actually does not exist)
diff --git a/erts/emulator/beam/erl_time.h b/erts/emulator/beam/erl_time.h
index 93a0d556bf..446adcf4af 100644
--- a/erts/emulator/beam/erl_time.h
+++ b/erts/emulator/beam/erl_time.h
@@ -142,6 +142,8 @@ erts_time_unit_conversion(Uint64 value,
Uint32 from_time_unit,
Uint32 to_time_unit);
+ErtsSysHrTime erts_perf_counter_unit(void);
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE Uint64
diff --git a/erts/emulator/beam/erl_time_sup.c b/erts/emulator/beam/erl_time_sup.c
index 668dbb5daa..6509c6a805 100644
--- a/erts/emulator/beam/erl_time_sup.c
+++ b/erts/emulator/beam/erl_time_sup.c
@@ -1749,7 +1749,7 @@ erts_get_monotonic_time(ErtsSchedulerData *esdp)
{
ErtsMonotonicTime mtime = time_sup.r.o.get_time();
update_last_mtime(esdp, mtime);
- return mtime;
+ return mtime;
}
ErtsMonotonicTime
@@ -2435,9 +2435,81 @@ BIF_RETTYPE os_system_time_0(BIF_ALIST_0)
BIF_RET(make_time_val(BIF_P, stime));
}
-BIF_RETTYPE os_system_time_1(BIF_ALIST_0)
+BIF_RETTYPE os_system_time_1(BIF_ALIST_1)
{
ErtsSystemTime stime = erts_os_system_time();
BIF_RET(time_unit_conversion(BIF_P, BIF_ARG_1, stime, 0));
}
+BIF_RETTYPE
+os_perf_counter_0(BIF_ALIST_0)
+{
+ ErtsSysHrTime pcounter;
+ sys_perf_counter(&pcounter);
+ BIF_RET(make_time_val(BIF_P, pcounter));
+}
+
+BIF_RETTYPE erts_internal_perf_counter_unit_0(BIF_ALIST_0)
+{
+ BIF_RET(make_time_val(BIF_P, SYS_PERF_COUNTER_UNIT));
+}
+
+/* What resolution to spin to in micro seconds */
+#define RESOLUTION 100
+/* How many iterations to spin */
+#define ITERATIONS 1
+/* How many significant figures to round to */
+#define SIGFIGS 3
+
+static ErtsSysHrTime perf_counter_unit = 0;
+
+static ErtsSysHrTime erts_calculate_perf_counter_unit(void);
+static ErtsSysHrTime erts_calculate_perf_counter_unit() {
+ int i;
+ ErtsSysHrTime pre, post;
+ double value = 0;
+ double round_factor;
+#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME)
+ struct timespec basetime,comparetime;
+#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg)
+#define __GETUSEC(arg) (arg.tv_nsec / 1000)
+#else
+ SysTimeval basetime,comparetime;
+#define __GETTIME(arg) sys_gettimeofday(arg)
+#define __GETUSEC(arg) arg.tv_usec
+#endif
+
+ for (i = 0; i < ITERATIONS; i++) {
+ /* Make sure usec just flipped over at current resolution */
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ sys_perf_counter(&pre);
+
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ sys_perf_counter(&post);
+
+ value += post - pre;
+ }
+ /* After this value is ticks per us */
+ value /= (RESOLUTION*ITERATIONS);
+
+ /* We round to 3 significant figures */
+ round_factor = pow(10.0, SIGFIGS - ceil(log10(value)));
+ value = ((ErtsSysHrTime)(value * round_factor + 0.5)) / round_factor;
+
+ /* convert to ticks per second */
+ return 1000000 * value;
+}
+
+ErtsSysHrTime erts_perf_counter_unit() {
+ if (perf_counter_unit == 0)
+ perf_counter_unit = erts_calculate_perf_counter_unit();
+ return perf_counter_unit;
+}
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 081c4108a0..9e53b4bfcc 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -988,6 +988,13 @@ move Discarded x==0 | move Something x==0 => move Something x=0
%endif
+call_ext u==0 u$func:os:perf_counter/0 => \
+ i_perf_counter
+call_ext_last u==0 u$func:os:perf_counter/0 D => \
+ i_perf_counter | deallocate_return D
+call_ext_only u==0 u$func:os:perf_counter/0 => \
+ i_perf_counter | return
+
#
# The general case for BIFs that have no special instructions.
# A BIF used in the tail must be followed by a return instruction.
@@ -1027,6 +1034,8 @@ i_apply_fun_only
i_hibernate
+i_perf_counter
+
call_bif e
#
diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h
index 0352ee1b3c..9f137048de 100644
--- a/erts/emulator/sys/unix/erl_unix_sys.h
+++ b/erts/emulator/sys/unix/erl_unix_sys.h
@@ -101,6 +101,10 @@
#endif
#include <netdb.h>
+#ifdef HAVE_MACH_ABSOLUTE_TIME
+#include <mach/mach_time.h>
+#endif
+
#ifdef HAVE_POSIX_MEMALIGN
# define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1
#endif
@@ -266,7 +270,31 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
#endif /* ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT */
/*
- *
+ * Functions for getting the performance counter
+ */
+
+#if defined(__x86_64__)
+ /* available on all x86_64. Best if used when we have constant_tsc and
+ nonstop_tsc cpu features. It may have been a good idea to put the
+ cpuid instruction before the rdtsc, but I decided against it
+ because it is not really needed for msacc, and it slows it down by
+ quite a bit. As a result though, this timestamp becomes much less
+ accurate as it might be re-ordered to be executed way before this
+ function is called.
+ */
+#define sys_perf_counter(ts) do { \
+ __asm__ __volatile__ ("rdtsc\n\t" \
+ "shl $32, %%rdx\n\t" \
+ "or %%rdx, %0" : "=a" (*ts) : : "rdx"); \
+ } while(0)
+#define SYS_PERF_COUNTER_UNIT erts_perf_counter_unit()
+#else
+#define sys_perf_counter(ts) *(ts) = erts_get_perf_counter()
+#define SYS_PERF_COUNTER_UNIT 1000000000LL
+#endif
+
+/*
+ * Functions for measuring CPU time
*/
#if (defined(HAVE_GETHRVTIME) || defined(HAVE_CLOCK_GETTIME_CPU_TIME))
diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c
index 2e1914f564..6fc4fc7dc4 100644
--- a/erts/emulator/sys/unix/sys_time.c
+++ b/erts/emulator/sys/unix/sys_time.c
@@ -912,6 +912,8 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
#ifdef HAVE_GETHRVTIME_PROCFS_IOCTL
+/* The code below only has effect on solaris < 10,
+ needed in order for gehhrvtime to work properly */
int sys_start_hrvtime(void)
{
long msacct = PR_MSACCT;
diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h
index 7e8dd8a4ca..1ab9eadefd 100644
--- a/erts/emulator/sys/win32/erl_win_sys.h
+++ b/erts/emulator/sys/win32/erl_win_sys.h
@@ -238,6 +238,8 @@ erts_sys_hrtime(void)
extern void sys_gettimeofday(SysTimeval *tv);
extern clock_t sys_times(SysTimes *buffer);
+#define sys_perf_counter(ts) *(ts) = erts_sys_hrtime()
+#define SYS_PERF_COUNTER_UNIT ERTS_I64_LITERAL(1000000000)
extern char *win_build_environment(char *);
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index bca366681d..6f56a81eec 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -71,7 +71,8 @@
| 'milli_seconds'
| 'micro_seconds'
| 'nano_seconds'
- | 'native'.
+ | 'native'
+ | 'perf_counter'.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Native code BIF stubs and their types
@@ -1347,6 +1348,7 @@ convert_time_unit(Time, FromUnit, ToUnit) ->
try
FU = case FromUnit of
native -> erts_internal:time_unit();
+ perf_counter -> erts_internal:perf_counter_unit();
nano_seconds -> 1000*1000*1000;
micro_seconds -> 1000*1000;
milli_seconds -> 1000;
@@ -1355,6 +1357,7 @@ convert_time_unit(Time, FromUnit, ToUnit) ->
end,
TU = case ToUnit of
native -> erts_internal:time_unit();
+ perf_counter -> erts_internal:perf_counter_unit();
nano_seconds -> 1000*1000*1000;
micro_seconds -> 1000*1000;
milli_seconds -> 1000;
diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl
index 84dedab930..c3a14d272d 100644
--- a/erts/preloaded/src/erts_internal.erl
+++ b/erts/preloaded/src/erts_internal.erl
@@ -45,7 +45,7 @@
-export([await_result/1, gather_io_bytes/2]).
--export([time_unit/0]).
+-export([time_unit/0, perf_counter_unit/0]).
-export([is_system_process/1]).
@@ -345,6 +345,11 @@ flush_monitor_messages(Ref, Multi, Res) when is_reference(Ref) ->
time_unit() ->
erlang:nif_error(undefined).
+-spec erts_internal:perf_counter_unit() -> pos_integer().
+
+perf_counter_unit() ->
+ erlang:nif_error(undefined).
+
-spec erts_internal:is_system_process(Pid) -> boolean() when
Pid :: pid().
diff --git a/lib/kernel/doc/src/os.xml b/lib/kernel/doc/src/os.xml
index 682d4a2eac..165160a909 100644
--- a/lib/kernel/doc/src/os.xml
+++ b/lib/kernel/doc/src/os.xml
@@ -37,6 +37,7 @@
use these functions can be of help in enabling a program to run on
most platforms.</p>
</description>
+
<funcs>
<func>
<name name="cmd" arity="1"/>
@@ -210,6 +211,30 @@ format_utc_timestamp() ->
</desc>
</func>
<func>
+ <name name="perf_counter" arity="0"/>
+ <fsummary>Returns a performance counter</fsummary>
+ <desc>
+ <p>Returns the current performance counter value in <c>perf_counter</c>
+ <seealso marker="erts:erlang#type_time_unit">time unit</seealso>.
+ This is a highly optimized call that might not be traceable.
+ </p>
+ </desc>
+ </func>
+ <func>
+ <name name="perf_counter" arity="1"/>
+ <fsummary>Returns a performance counter</fsummary>
+ <desc><p>Returns a performance counter that can be used as a very fast and
+ high resolution timestamp. This counter is read directly from the hardware or operating
+ system with the same guarantees. This means that two consecutive calls
+ to the function are not guaranteed to be monotonic, though it most likely will be.
+ The performance counter till be converted to the resolution passed as an argument.</p>
+ <pre>1> <input>T1 = os:perf_counter(1000),receive after 10000 -> ok end,T2 = os:perf_counter(1000).</input>
+176525861
+2> <input>T2 - T1.</input>
+10004</pre>
+ </desc>
+ </func>
+ <func>
<name name="type" arity="0"/>
<fsummary>Return the OS family and, in some cases, OS name of the current operating system</fsummary>
<desc>
diff --git a/lib/kernel/src/os.erl b/lib/kernel/src/os.erl
index 0022959c11..4947088635 100644
--- a/lib/kernel/src/os.erl
+++ b/lib/kernel/src/os.erl
@@ -27,7 +27,9 @@
%%% BIFs
--export([getenv/0, getenv/1, getenv/2, getpid/0, putenv/2, system_time/0, system_time/1,
+-export([getenv/0, getenv/1, getenv/2, getpid/0,
+ perf_counter/0, perf_counter/1,
+ putenv/2, system_time/0, system_time/1,
timestamp/0, unsetenv/1]).
-spec getenv() -> [string()].
@@ -60,6 +62,18 @@ getenv(VarName, DefaultValue) ->
getpid() ->
erlang:nif_error(undef).
+-spec perf_counter() -> Counter when
+ Counter :: integer().
+
+perf_counter() ->
+ erlang:nif_error(undef).
+
+-spec perf_counter(Unit) -> integer() when
+ Unit :: erlang:time_unit().
+
+perf_counter(Unit) ->
+ erlang:convert_time_unit(os:perf_counter(), perf_counter, Unit).
+
-spec putenv(VarName, Value) -> true when
VarName :: string(),
Value :: string().
diff --git a/lib/kernel/test/os_SUITE.erl b/lib/kernel/test/os_SUITE.erl
index 83a95019e7..e6e8568394 100644
--- a/lib/kernel/test/os_SUITE.erl
+++ b/lib/kernel/test/os_SUITE.erl
@@ -20,10 +20,11 @@
-module(os_SUITE).
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
- init_per_group/2,end_per_group/2]).
+ init_per_group/2,end_per_group/2,
+ init_per_testcase/2,end_per_testcase/2]).
-export([space_in_cwd/1, quoting/1, cmd_unicode/1, space_in_name/1, bad_command/1,
find_executable/1, unix_comment_in_command/1, deep_list_command/1,
- large_output_command/1]).
+ large_output_command/1, perf_counter_api/1]).
-include_lib("test_server/include/test_server.hrl").
@@ -32,7 +33,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
[space_in_cwd, quoting, cmd_unicode, space_in_name, bad_command,
find_executable, unix_comment_in_command, deep_list_command,
- large_output_command].
+ large_output_command, perf_counter_api].
groups() ->
[].
@@ -49,6 +50,11 @@ init_per_group(_GroupName, Config) ->
end_per_group(_GroupName, Config) ->
Config.
+init_per_testcase(_TC,Config) ->
+ Config.
+
+end_per_testcase(_,_Config) ->
+ ok.
space_in_cwd(doc) ->
"Test that executing a command in a current working directory "
@@ -278,6 +284,40 @@ large_output_command(Config) when is_list(Config) ->
AAA = lists:duplicate(7000, $a),
comp(AAA,os:cmd("echo " ++ AAA)).
+%% Test that the os:perf_counter api works as expected
+perf_counter_api(_Config) ->
+
+ true = is_integer(os:perf_counter()),
+ true = os:perf_counter() > 0,
+
+ T1 = os:perf_counter(),
+ timer:sleep(100),
+ T2 = os:perf_counter(),
+ TsDiff = erlang:convert_time_unit(T2 - T1, perf_counter, nano_seconds),
+ ct:pal("T1: ~p~n"
+ "T2: ~p~n"
+ "TsDiff: ~p~n",
+ [T1,T2,TsDiff]),
+
+ %% We allow a 15% diff
+ true = TsDiff < 115000000,
+ true = TsDiff > 85000000,
+
+ T1Ms = os:perf_counter(1000),
+ timer:sleep(100),
+ T2Ms = os:perf_counter(1000),
+ MsDiff = T2Ms - T1Ms,
+ ct:pal("T1Ms: ~p~n"
+ "T2Ms: ~p~n"
+ "MsDiff: ~p~n",
+ [T1Ms,T2Ms,MsDiff]),
+
+ %% We allow a 15% diff
+ true = MsDiff < 115,
+ true = MsDiff > 85.
+
+%% Util functions
+
comp(Expected, Got) ->
case strip_nl(Got) of
Expected ->