From eea5f896780e07f7ca76685061d01e7be5a7abaa Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 11 Sep 2014 18:26:26 +0200 Subject: erts, kernel: Add os:perf_counter function The perf_counter is a very very cheap and high resolution timer that can be used to timestamp system events. It does not have monoticity guarantees, but should on most OS's expose a monotonous time. A special instruction has been created for this counter to further speed up fetching it. OTP-12908 --- erts/doc/src/erlang.xml | 11 +++++ erts/emulator/beam/beam_emu.c | 32 +++++++++++++++ erts/emulator/beam/bif.tab | 2 + erts/emulator/beam/erl_time.h | 2 + erts/emulator/beam/erl_time_sup.c | 76 ++++++++++++++++++++++++++++++++++- erts/emulator/beam/ops.tab | 9 +++++ erts/emulator/sys/unix/erl_unix_sys.h | 30 +++++++++++++- erts/emulator/sys/unix/sys_time.c | 2 + erts/emulator/sys/win32/erl_win_sys.h | 2 + erts/preloaded/src/erlang.erl | 5 ++- erts/preloaded/src/erts_internal.erl | 7 +++- 11 files changed, 173 insertions(+), 5 deletions(-) (limited to 'erts') diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index d0d35ea25f..6915b35fcb 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -131,6 +131,17 @@ + perf_counter +

Symbolic representation of the performance counter + time unit used by the Erlang runtime system.

+ +

The perf_counter time unit behaves much in the same way + as the native time unit. That is it might differ inbetween + run-time restarts. You get values of this type by calling + os:perf_counter() +

+
+

The time_unit/0 type may be extended. Use diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 4d7b00b032..a0979c7b0e 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -4895,6 +4895,38 @@ do { \ } } + /* This is optimised as an instruction because + it has to be very very fast */ + OpCase(i_perf_counter): { + BeamInstr* next; + ErtsSysHrTime ts; + PreFetch(0, next); + + sys_perf_counter(&ts); + + if (IS_SSMALL(ts)) { + r(0) = make_small((Sint)ts); + } else { + TestHeap(ERTS_SINT64_HEAP_SIZE(ts),0); + r(0) = make_big(HTOP); +#if defined(ARCH_32) || HALFWORD_HEAP + if (ts >= (((Uint64) 1) << 32)) { + *HTOP = make_pos_bignum_header(2); + BIG_DIGIT(HTOP, 0) = (Uint) (ts & ((Uint) 0xffffffff)); + BIG_DIGIT(HTOP, 1) = (Uint) ((ts >> 32) & ((Uint) 0xffffffff)); + HTOP += 3; + } + else +#endif + { + *HTOP = make_pos_bignum_header(1); + BIG_DIGIT(HTOP, 0) = (Uint) ts; + HTOP += 2; + } + } + NextPF(0, next); + } + OpCase(i_debug_breakpoint): { HEAVY_SWAPOUT; I = call_error_handler(c_p, I-3, reg, am_breakpoint); diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 1b8ae8cef5..4efc055aaf 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -170,6 +170,7 @@ bif erts_internal:term_type/1 bif erts_internal:map_hashmap_children/1 bif erts_internal:time_unit/0 +bif erts_internal:perf_counter_unit/0 bif erts_internal:is_system_process/1 @@ -369,6 +370,7 @@ bif os:getpid/0 bif os:timestamp/0 bif os:system_time/0 bif os:system_time/1 +bif os:perf_counter/0 # # Bifs in the erl_ddll module (the module actually does not exist) diff --git a/erts/emulator/beam/erl_time.h b/erts/emulator/beam/erl_time.h index 93a0d556bf..446adcf4af 100644 --- a/erts/emulator/beam/erl_time.h +++ b/erts/emulator/beam/erl_time.h @@ -142,6 +142,8 @@ erts_time_unit_conversion(Uint64 value, Uint32 from_time_unit, Uint32 to_time_unit); +ErtsSysHrTime erts_perf_counter_unit(void); + #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE Uint64 diff --git a/erts/emulator/beam/erl_time_sup.c b/erts/emulator/beam/erl_time_sup.c index 668dbb5daa..6509c6a805 100644 --- a/erts/emulator/beam/erl_time_sup.c +++ b/erts/emulator/beam/erl_time_sup.c @@ -1749,7 +1749,7 @@ erts_get_monotonic_time(ErtsSchedulerData *esdp) { ErtsMonotonicTime mtime = time_sup.r.o.get_time(); update_last_mtime(esdp, mtime); - return mtime; + return mtime; } ErtsMonotonicTime @@ -2435,9 +2435,81 @@ BIF_RETTYPE os_system_time_0(BIF_ALIST_0) BIF_RET(make_time_val(BIF_P, stime)); } -BIF_RETTYPE os_system_time_1(BIF_ALIST_0) +BIF_RETTYPE os_system_time_1(BIF_ALIST_1) { ErtsSystemTime stime = erts_os_system_time(); BIF_RET(time_unit_conversion(BIF_P, BIF_ARG_1, stime, 0)); } +BIF_RETTYPE +os_perf_counter_0(BIF_ALIST_0) +{ + ErtsSysHrTime pcounter; + sys_perf_counter(&pcounter); + BIF_RET(make_time_val(BIF_P, pcounter)); +} + +BIF_RETTYPE erts_internal_perf_counter_unit_0(BIF_ALIST_0) +{ + BIF_RET(make_time_val(BIF_P, SYS_PERF_COUNTER_UNIT)); +} + +/* What resolution to spin to in micro seconds */ +#define RESOLUTION 100 +/* How many iterations to spin */ +#define ITERATIONS 1 +/* How many significant figures to round to */ +#define SIGFIGS 3 + +static ErtsSysHrTime perf_counter_unit = 0; + +static ErtsSysHrTime erts_calculate_perf_counter_unit(void); +static ErtsSysHrTime erts_calculate_perf_counter_unit() { + int i; + ErtsSysHrTime pre, post; + double value = 0; + double round_factor; +#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME) + struct timespec basetime,comparetime; +#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg) +#define __GETUSEC(arg) (arg.tv_nsec / 1000) +#else + SysTimeval basetime,comparetime; +#define __GETTIME(arg) sys_gettimeofday(arg) +#define __GETUSEC(arg) arg.tv_usec +#endif + + for (i = 0; i < ITERATIONS; i++) { + /* Make sure usec just flipped over at current resolution */ + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + sys_perf_counter(&pre); + + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + sys_perf_counter(&post); + + value += post - pre; + } + /* After this value is ticks per us */ + value /= (RESOLUTION*ITERATIONS); + + /* We round to 3 significant figures */ + round_factor = pow(10.0, SIGFIGS - ceil(log10(value))); + value = ((ErtsSysHrTime)(value * round_factor + 0.5)) / round_factor; + + /* convert to ticks per second */ + return 1000000 * value; +} + +ErtsSysHrTime erts_perf_counter_unit() { + if (perf_counter_unit == 0) + perf_counter_unit = erts_calculate_perf_counter_unit(); + return perf_counter_unit; +} diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 081c4108a0..9e53b4bfcc 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -988,6 +988,13 @@ move Discarded x==0 | move Something x==0 => move Something x=0 %endif +call_ext u==0 u$func:os:perf_counter/0 => \ + i_perf_counter +call_ext_last u==0 u$func:os:perf_counter/0 D => \ + i_perf_counter | deallocate_return D +call_ext_only u==0 u$func:os:perf_counter/0 => \ + i_perf_counter | return + # # The general case for BIFs that have no special instructions. # A BIF used in the tail must be followed by a return instruction. @@ -1027,6 +1034,8 @@ i_apply_fun_only i_hibernate +i_perf_counter + call_bif e # diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h index 0352ee1b3c..9f137048de 100644 --- a/erts/emulator/sys/unix/erl_unix_sys.h +++ b/erts/emulator/sys/unix/erl_unix_sys.h @@ -101,6 +101,10 @@ #endif #include +#ifdef HAVE_MACH_ABSOLUTE_TIME +#include +#endif + #ifdef HAVE_POSIX_MEMALIGN # define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1 #endif @@ -266,7 +270,31 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) #endif /* ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT */ /* - * + * Functions for getting the performance counter + */ + +#if defined(__x86_64__) + /* available on all x86_64. Best if used when we have constant_tsc and + nonstop_tsc cpu features. It may have been a good idea to put the + cpuid instruction before the rdtsc, but I decided against it + because it is not really needed for msacc, and it slows it down by + quite a bit. As a result though, this timestamp becomes much less + accurate as it might be re-ordered to be executed way before this + function is called. + */ +#define sys_perf_counter(ts) do { \ + __asm__ __volatile__ ("rdtsc\n\t" \ + "shl $32, %%rdx\n\t" \ + "or %%rdx, %0" : "=a" (*ts) : : "rdx"); \ + } while(0) +#define SYS_PERF_COUNTER_UNIT erts_perf_counter_unit() +#else +#define sys_perf_counter(ts) *(ts) = erts_get_perf_counter() +#define SYS_PERF_COUNTER_UNIT 1000000000LL +#endif + +/* + * Functions for measuring CPU time */ #if (defined(HAVE_GETHRVTIME) || defined(HAVE_CLOCK_GETTIME_CPU_TIME)) diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c index 2e1914f564..6fc4fc7dc4 100644 --- a/erts/emulator/sys/unix/sys_time.c +++ b/erts/emulator/sys/unix/sys_time.c @@ -912,6 +912,8 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) #ifdef HAVE_GETHRVTIME_PROCFS_IOCTL +/* The code below only has effect on solaris < 10, + needed in order for gehhrvtime to work properly */ int sys_start_hrvtime(void) { long msacct = PR_MSACCT; diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h index 7e8dd8a4ca..1ab9eadefd 100644 --- a/erts/emulator/sys/win32/erl_win_sys.h +++ b/erts/emulator/sys/win32/erl_win_sys.h @@ -238,6 +238,8 @@ erts_sys_hrtime(void) extern void sys_gettimeofday(SysTimeval *tv); extern clock_t sys_times(SysTimes *buffer); +#define sys_perf_counter(ts) *(ts) = erts_sys_hrtime() +#define SYS_PERF_COUNTER_UNIT ERTS_I64_LITERAL(1000000000) extern char *win_build_environment(char *); diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index bca366681d..6f56a81eec 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -71,7 +71,8 @@ | 'milli_seconds' | 'micro_seconds' | 'nano_seconds' - | 'native'. + | 'native' + | 'perf_counter'. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Native code BIF stubs and their types @@ -1347,6 +1348,7 @@ convert_time_unit(Time, FromUnit, ToUnit) -> try FU = case FromUnit of native -> erts_internal:time_unit(); + perf_counter -> erts_internal:perf_counter_unit(); nano_seconds -> 1000*1000*1000; micro_seconds -> 1000*1000; milli_seconds -> 1000; @@ -1355,6 +1357,7 @@ convert_time_unit(Time, FromUnit, ToUnit) -> end, TU = case ToUnit of native -> erts_internal:time_unit(); + perf_counter -> erts_internal:perf_counter_unit(); nano_seconds -> 1000*1000*1000; micro_seconds -> 1000*1000; milli_seconds -> 1000; diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl index 84dedab930..c3a14d272d 100644 --- a/erts/preloaded/src/erts_internal.erl +++ b/erts/preloaded/src/erts_internal.erl @@ -45,7 +45,7 @@ -export([await_result/1, gather_io_bytes/2]). --export([time_unit/0]). +-export([time_unit/0, perf_counter_unit/0]). -export([is_system_process/1]). @@ -345,6 +345,11 @@ flush_monitor_messages(Ref, Multi, Res) when is_reference(Ref) -> time_unit() -> erlang:nif_error(undefined). +-spec erts_internal:perf_counter_unit() -> pos_integer(). + +perf_counter_unit() -> + erlang:nif_error(undefined). + -spec erts_internal:is_system_process(Pid) -> boolean() when Pid :: pid(). -- cgit v1.2.3