diff options
author | Lukas Larsson <[email protected]> | 2014-09-11 18:26:26 +0200 |
---|---|---|
committer | Lukas Larsson <[email protected]> | 2016-02-02 10:45:21 +0100 |
commit | eea5f896780e07f7ca76685061d01e7be5a7abaa (patch) | |
tree | 7d4a852b20f2075637457052844865e1c56b1e21 | |
parent | d76ee58c07f32dfc0652844ec2b513af2105ffa1 (diff) | |
download | otp-eea5f896780e07f7ca76685061d01e7be5a7abaa.tar.gz otp-eea5f896780e07f7ca76685061d01e7be5a7abaa.tar.bz2 otp-eea5f896780e07f7ca76685061d01e7be5a7abaa.zip |
erts, kernel: Add os:perf_counter function
The perf_counter is a very very cheap and high resolution timer
that can be used to timestamp system events. It does not have
monoticity guarantees, but should on most OS's expose a monotonous
time.
A special instruction has been created for this counter to further
speed up fetching it.
OTP-12908
-rw-r--r-- | erts/doc/src/erlang.xml | 11 | ||||
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 32 | ||||
-rw-r--r-- | erts/emulator/beam/bif.tab | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_time.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_time_sup.c | 76 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 9 | ||||
-rw-r--r-- | erts/emulator/sys/unix/erl_unix_sys.h | 30 | ||||
-rw-r--r-- | erts/emulator/sys/unix/sys_time.c | 2 | ||||
-rw-r--r-- | erts/emulator/sys/win32/erl_win_sys.h | 2 | ||||
-rw-r--r-- | erts/preloaded/src/erlang.erl | 5 | ||||
-rw-r--r-- | erts/preloaded/src/erts_internal.erl | 7 | ||||
-rw-r--r-- | lib/kernel/doc/src/os.xml | 25 | ||||
-rw-r--r-- | lib/kernel/src/os.erl | 16 | ||||
-rw-r--r-- | lib/kernel/test/os_SUITE.erl | 46 |
14 files changed, 256 insertions, 9 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index d0d35ea25f..6915b35fcb 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -131,6 +131,17 @@ </note> </item> + <tag><c>perf_counter</c></tag> + <item><p>Symbolic representation of the performance counter + time unit used by the Erlang runtime system.</p> + + <p>The <c>perf_counter</c> time unit behaves much in the same way + as the <c>native</c> time unit. That is it might differ inbetween + run-time restarts. You get values of this type by calling + <seealso marker="kernel:os#perf_counter/0"><c>os:perf_counter()</c></seealso> + </p> + </item> + </taglist> <p>The <c>time_unit/0</c> type may be extended. Use diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 4d7b00b032..a0979c7b0e 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -4895,6 +4895,38 @@ do { \ } } + /* This is optimised as an instruction because + it has to be very very fast */ + OpCase(i_perf_counter): { + BeamInstr* next; + ErtsSysHrTime ts; + PreFetch(0, next); + + sys_perf_counter(&ts); + + if (IS_SSMALL(ts)) { + r(0) = make_small((Sint)ts); + } else { + TestHeap(ERTS_SINT64_HEAP_SIZE(ts),0); + r(0) = make_big(HTOP); +#if defined(ARCH_32) || HALFWORD_HEAP + if (ts >= (((Uint64) 1) << 32)) { + *HTOP = make_pos_bignum_header(2); + BIG_DIGIT(HTOP, 0) = (Uint) (ts & ((Uint) 0xffffffff)); + BIG_DIGIT(HTOP, 1) = (Uint) ((ts >> 32) & ((Uint) 0xffffffff)); + HTOP += 3; + } + else +#endif + { + *HTOP = make_pos_bignum_header(1); + BIG_DIGIT(HTOP, 0) = (Uint) ts; + HTOP += 2; + } + } + NextPF(0, next); + } + OpCase(i_debug_breakpoint): { HEAVY_SWAPOUT; I = call_error_handler(c_p, I-3, reg, am_breakpoint); diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 1b8ae8cef5..4efc055aaf 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -170,6 +170,7 @@ bif erts_internal:term_type/1 bif erts_internal:map_hashmap_children/1 bif erts_internal:time_unit/0 +bif erts_internal:perf_counter_unit/0 bif erts_internal:is_system_process/1 @@ -369,6 +370,7 @@ bif os:getpid/0 bif os:timestamp/0 bif os:system_time/0 bif os:system_time/1 +bif os:perf_counter/0 # # Bifs in the erl_ddll module (the module actually does not exist) diff --git a/erts/emulator/beam/erl_time.h b/erts/emulator/beam/erl_time.h index 93a0d556bf..446adcf4af 100644 --- a/erts/emulator/beam/erl_time.h +++ b/erts/emulator/beam/erl_time.h @@ -142,6 +142,8 @@ erts_time_unit_conversion(Uint64 value, Uint32 from_time_unit, Uint32 to_time_unit); +ErtsSysHrTime erts_perf_counter_unit(void); + #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE Uint64 diff --git a/erts/emulator/beam/erl_time_sup.c b/erts/emulator/beam/erl_time_sup.c index 668dbb5daa..6509c6a805 100644 --- a/erts/emulator/beam/erl_time_sup.c +++ b/erts/emulator/beam/erl_time_sup.c @@ -1749,7 +1749,7 @@ erts_get_monotonic_time(ErtsSchedulerData *esdp) { ErtsMonotonicTime mtime = time_sup.r.o.get_time(); update_last_mtime(esdp, mtime); - return mtime; + return mtime; } ErtsMonotonicTime @@ -2435,9 +2435,81 @@ BIF_RETTYPE os_system_time_0(BIF_ALIST_0) BIF_RET(make_time_val(BIF_P, stime)); } -BIF_RETTYPE os_system_time_1(BIF_ALIST_0) +BIF_RETTYPE os_system_time_1(BIF_ALIST_1) { ErtsSystemTime stime = erts_os_system_time(); BIF_RET(time_unit_conversion(BIF_P, BIF_ARG_1, stime, 0)); } +BIF_RETTYPE +os_perf_counter_0(BIF_ALIST_0) +{ + ErtsSysHrTime pcounter; + sys_perf_counter(&pcounter); + BIF_RET(make_time_val(BIF_P, pcounter)); +} + +BIF_RETTYPE erts_internal_perf_counter_unit_0(BIF_ALIST_0) +{ + BIF_RET(make_time_val(BIF_P, SYS_PERF_COUNTER_UNIT)); +} + +/* What resolution to spin to in micro seconds */ +#define RESOLUTION 100 +/* How many iterations to spin */ +#define ITERATIONS 1 +/* How many significant figures to round to */ +#define SIGFIGS 3 + +static ErtsSysHrTime perf_counter_unit = 0; + +static ErtsSysHrTime erts_calculate_perf_counter_unit(void); +static ErtsSysHrTime erts_calculate_perf_counter_unit() { + int i; + ErtsSysHrTime pre, post; + double value = 0; + double round_factor; +#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME) + struct timespec basetime,comparetime; +#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg) +#define __GETUSEC(arg) (arg.tv_nsec / 1000) +#else + SysTimeval basetime,comparetime; +#define __GETTIME(arg) sys_gettimeofday(arg) +#define __GETUSEC(arg) arg.tv_usec +#endif + + for (i = 0; i < ITERATIONS; i++) { + /* Make sure usec just flipped over at current resolution */ + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + sys_perf_counter(&pre); + + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + sys_perf_counter(&post); + + value += post - pre; + } + /* After this value is ticks per us */ + value /= (RESOLUTION*ITERATIONS); + + /* We round to 3 significant figures */ + round_factor = pow(10.0, SIGFIGS - ceil(log10(value))); + value = ((ErtsSysHrTime)(value * round_factor + 0.5)) / round_factor; + + /* convert to ticks per second */ + return 1000000 * value; +} + +ErtsSysHrTime erts_perf_counter_unit() { + if (perf_counter_unit == 0) + perf_counter_unit = erts_calculate_perf_counter_unit(); + return perf_counter_unit; +} diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 081c4108a0..9e53b4bfcc 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -988,6 +988,13 @@ move Discarded x==0 | move Something x==0 => move Something x=0 %endif +call_ext u==0 u$func:os:perf_counter/0 => \ + i_perf_counter +call_ext_last u==0 u$func:os:perf_counter/0 D => \ + i_perf_counter | deallocate_return D +call_ext_only u==0 u$func:os:perf_counter/0 => \ + i_perf_counter | return + # # The general case for BIFs that have no special instructions. # A BIF used in the tail must be followed by a return instruction. @@ -1027,6 +1034,8 @@ i_apply_fun_only i_hibernate +i_perf_counter + call_bif e # diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h index 0352ee1b3c..9f137048de 100644 --- a/erts/emulator/sys/unix/erl_unix_sys.h +++ b/erts/emulator/sys/unix/erl_unix_sys.h @@ -101,6 +101,10 @@ #endif #include <netdb.h> +#ifdef HAVE_MACH_ABSOLUTE_TIME +#include <mach/mach_time.h> +#endif + #ifdef HAVE_POSIX_MEMALIGN # define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1 #endif @@ -266,7 +270,31 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) #endif /* ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT */ /* - * + * Functions for getting the performance counter + */ + +#if defined(__x86_64__) + /* available on all x86_64. Best if used when we have constant_tsc and + nonstop_tsc cpu features. It may have been a good idea to put the + cpuid instruction before the rdtsc, but I decided against it + because it is not really needed for msacc, and it slows it down by + quite a bit. As a result though, this timestamp becomes much less + accurate as it might be re-ordered to be executed way before this + function is called. + */ +#define sys_perf_counter(ts) do { \ + __asm__ __volatile__ ("rdtsc\n\t" \ + "shl $32, %%rdx\n\t" \ + "or %%rdx, %0" : "=a" (*ts) : : "rdx"); \ + } while(0) +#define SYS_PERF_COUNTER_UNIT erts_perf_counter_unit() +#else +#define sys_perf_counter(ts) *(ts) = erts_get_perf_counter() +#define SYS_PERF_COUNTER_UNIT 1000000000LL +#endif + +/* + * Functions for measuring CPU time */ #if (defined(HAVE_GETHRVTIME) || defined(HAVE_CLOCK_GETTIME_CPU_TIME)) diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c index 2e1914f564..6fc4fc7dc4 100644 --- a/erts/emulator/sys/unix/sys_time.c +++ b/erts/emulator/sys/unix/sys_time.c @@ -912,6 +912,8 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) #ifdef HAVE_GETHRVTIME_PROCFS_IOCTL +/* The code below only has effect on solaris < 10, + needed in order for gehhrvtime to work properly */ int sys_start_hrvtime(void) { long msacct = PR_MSACCT; diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h index 7e8dd8a4ca..1ab9eadefd 100644 --- a/erts/emulator/sys/win32/erl_win_sys.h +++ b/erts/emulator/sys/win32/erl_win_sys.h @@ -238,6 +238,8 @@ erts_sys_hrtime(void) extern void sys_gettimeofday(SysTimeval *tv); extern clock_t sys_times(SysTimes *buffer); +#define sys_perf_counter(ts) *(ts) = erts_sys_hrtime() +#define SYS_PERF_COUNTER_UNIT ERTS_I64_LITERAL(1000000000) extern char *win_build_environment(char *); diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index bca366681d..6f56a81eec 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -71,7 +71,8 @@ | 'milli_seconds' | 'micro_seconds' | 'nano_seconds' - | 'native'. + | 'native' + | 'perf_counter'. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Native code BIF stubs and their types @@ -1347,6 +1348,7 @@ convert_time_unit(Time, FromUnit, ToUnit) -> try FU = case FromUnit of native -> erts_internal:time_unit(); + perf_counter -> erts_internal:perf_counter_unit(); nano_seconds -> 1000*1000*1000; micro_seconds -> 1000*1000; milli_seconds -> 1000; @@ -1355,6 +1357,7 @@ convert_time_unit(Time, FromUnit, ToUnit) -> end, TU = case ToUnit of native -> erts_internal:time_unit(); + perf_counter -> erts_internal:perf_counter_unit(); nano_seconds -> 1000*1000*1000; micro_seconds -> 1000*1000; milli_seconds -> 1000; diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl index 84dedab930..c3a14d272d 100644 --- a/erts/preloaded/src/erts_internal.erl +++ b/erts/preloaded/src/erts_internal.erl @@ -45,7 +45,7 @@ -export([await_result/1, gather_io_bytes/2]). --export([time_unit/0]). +-export([time_unit/0, perf_counter_unit/0]). -export([is_system_process/1]). @@ -345,6 +345,11 @@ flush_monitor_messages(Ref, Multi, Res) when is_reference(Ref) -> time_unit() -> erlang:nif_error(undefined). +-spec erts_internal:perf_counter_unit() -> pos_integer(). + +perf_counter_unit() -> + erlang:nif_error(undefined). + -spec erts_internal:is_system_process(Pid) -> boolean() when Pid :: pid(). diff --git a/lib/kernel/doc/src/os.xml b/lib/kernel/doc/src/os.xml index 682d4a2eac..165160a909 100644 --- a/lib/kernel/doc/src/os.xml +++ b/lib/kernel/doc/src/os.xml @@ -37,6 +37,7 @@ use these functions can be of help in enabling a program to run on most platforms.</p> </description> + <funcs> <func> <name name="cmd" arity="1"/> @@ -210,6 +211,30 @@ format_utc_timestamp() -> </desc> </func> <func> + <name name="perf_counter" arity="0"/> + <fsummary>Returns a performance counter</fsummary> + <desc> + <p>Returns the current performance counter value in <c>perf_counter</c> + <seealso marker="erts:erlang#type_time_unit">time unit</seealso>. + This is a highly optimized call that might not be traceable. + </p> + </desc> + </func> + <func> + <name name="perf_counter" arity="1"/> + <fsummary>Returns a performance counter</fsummary> + <desc><p>Returns a performance counter that can be used as a very fast and + high resolution timestamp. This counter is read directly from the hardware or operating + system with the same guarantees. This means that two consecutive calls + to the function are not guaranteed to be monotonic, though it most likely will be. + The performance counter till be converted to the resolution passed as an argument.</p> + <pre>1> <input>T1 = os:perf_counter(1000),receive after 10000 -> ok end,T2 = os:perf_counter(1000).</input> +176525861 +2> <input>T2 - T1.</input> +10004</pre> + </desc> + </func> + <func> <name name="type" arity="0"/> <fsummary>Return the OS family and, in some cases, OS name of the current operating system</fsummary> <desc> diff --git a/lib/kernel/src/os.erl b/lib/kernel/src/os.erl index 0022959c11..4947088635 100644 --- a/lib/kernel/src/os.erl +++ b/lib/kernel/src/os.erl @@ -27,7 +27,9 @@ %%% BIFs --export([getenv/0, getenv/1, getenv/2, getpid/0, putenv/2, system_time/0, system_time/1, +-export([getenv/0, getenv/1, getenv/2, getpid/0, + perf_counter/0, perf_counter/1, + putenv/2, system_time/0, system_time/1, timestamp/0, unsetenv/1]). -spec getenv() -> [string()]. @@ -60,6 +62,18 @@ getenv(VarName, DefaultValue) -> getpid() -> erlang:nif_error(undef). +-spec perf_counter() -> Counter when + Counter :: integer(). + +perf_counter() -> + erlang:nif_error(undef). + +-spec perf_counter(Unit) -> integer() when + Unit :: erlang:time_unit(). + +perf_counter(Unit) -> + erlang:convert_time_unit(os:perf_counter(), perf_counter, Unit). + -spec putenv(VarName, Value) -> true when VarName :: string(), Value :: string(). diff --git a/lib/kernel/test/os_SUITE.erl b/lib/kernel/test/os_SUITE.erl index 83a95019e7..e6e8568394 100644 --- a/lib/kernel/test/os_SUITE.erl +++ b/lib/kernel/test/os_SUITE.erl @@ -20,10 +20,11 @@ -module(os_SUITE). -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, - init_per_group/2,end_per_group/2]). + init_per_group/2,end_per_group/2, + init_per_testcase/2,end_per_testcase/2]). -export([space_in_cwd/1, quoting/1, cmd_unicode/1, space_in_name/1, bad_command/1, find_executable/1, unix_comment_in_command/1, deep_list_command/1, - large_output_command/1]). + large_output_command/1, perf_counter_api/1]). -include_lib("test_server/include/test_server.hrl"). @@ -32,7 +33,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [space_in_cwd, quoting, cmd_unicode, space_in_name, bad_command, find_executable, unix_comment_in_command, deep_list_command, - large_output_command]. + large_output_command, perf_counter_api]. groups() -> []. @@ -49,6 +50,11 @@ init_per_group(_GroupName, Config) -> end_per_group(_GroupName, Config) -> Config. +init_per_testcase(_TC,Config) -> + Config. + +end_per_testcase(_,_Config) -> + ok. space_in_cwd(doc) -> "Test that executing a command in a current working directory " @@ -278,6 +284,40 @@ large_output_command(Config) when is_list(Config) -> AAA = lists:duplicate(7000, $a), comp(AAA,os:cmd("echo " ++ AAA)). +%% Test that the os:perf_counter api works as expected +perf_counter_api(_Config) -> + + true = is_integer(os:perf_counter()), + true = os:perf_counter() > 0, + + T1 = os:perf_counter(), + timer:sleep(100), + T2 = os:perf_counter(), + TsDiff = erlang:convert_time_unit(T2 - T1, perf_counter, nano_seconds), + ct:pal("T1: ~p~n" + "T2: ~p~n" + "TsDiff: ~p~n", + [T1,T2,TsDiff]), + + %% We allow a 15% diff + true = TsDiff < 115000000, + true = TsDiff > 85000000, + + T1Ms = os:perf_counter(1000), + timer:sleep(100), + T2Ms = os:perf_counter(1000), + MsDiff = T2Ms - T1Ms, + ct:pal("T1Ms: ~p~n" + "T2Ms: ~p~n" + "MsDiff: ~p~n", + [T1Ms,T2Ms,MsDiff]), + + %% We allow a 15% diff + true = MsDiff < 115, + true = MsDiff > 85. + +%% Util functions + comp(Expected, Got) -> case strip_nl(Got) of Expected -> |