From eea5f896780e07f7ca76685061d01e7be5a7abaa Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 11 Sep 2014 18:26:26 +0200 Subject: erts, kernel: Add os:perf_counter function The perf_counter is a very very cheap and high resolution timer that can be used to timestamp system events. It does not have monoticity guarantees, but should on most OS's expose a monotonous time. A special instruction has been created for this counter to further speed up fetching it. OTP-12908 --- erts/emulator/sys/unix/sys_time.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'erts/emulator/sys/unix/sys_time.c') diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c index 2e1914f564..6fc4fc7dc4 100644 --- a/erts/emulator/sys/unix/sys_time.c +++ b/erts/emulator/sys/unix/sys_time.c @@ -912,6 +912,8 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) #ifdef HAVE_GETHRVTIME_PROCFS_IOCTL +/* The code below only has effect on solaris < 10, + needed in order for gehhrvtime to work properly */ int sys_start_hrvtime(void) { long msacct = PR_MSACCT; -- cgit v1.2.3 From 0399f5fc547ef035c4eb5e383f30b28ae73d936e Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 16 Jul 2015 11:27:00 +0200 Subject: erts: Refactor perf counter internal interface perf counter is now part of the function pointer interface and also the function returns the value instead of writing to a memory buffer. --- erts/emulator/sys/unix/sys_time.c | 112 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) (limited to 'erts/emulator/sys/unix/sys_time.c') diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c index 6fc4fc7dc4..9738a8c352 100644 --- a/erts/emulator/sys/unix/sys_time.c +++ b/erts/emulator/sys/unix/sys_time.c @@ -65,6 +65,8 @@ # include #endif +static void init_perf_counter(void); + /******************* Routines for time measurement *********************/ #undef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__ @@ -404,6 +406,8 @@ sys_init_time(ErtsSysInitTimeResult *init_resp) # error Missing erts_os_system_time() implementation #endif + init_perf_counter(); + } void @@ -908,6 +912,114 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) #endif +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ + * Performance counter functions * +\* */ + + +/* What resolution to spin to in micro seconds */ +#define RESOLUTION 100 +/* How many iterations to spin */ +#define ITERATIONS 1 +/* How many significant figures to round to */ +#define SIGFIGS 3 + +static ErtsSysPerfCounter calculate_perf_counter_unit(void) { + int i; + ErtsSysPerfCounter pre, post; + double value = 0; + double round_factor; +#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME) + struct timespec basetime,comparetime; +#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg) +#define __GETUSEC(arg) (arg.tv_nsec / 1000) +#else + SysTimeval basetime,comparetime; +#define __GETTIME(arg) sys_gettimeofday(arg) +#define __GETUSEC(arg) arg.tv_usec +#endif + + for (i = 0; i < ITERATIONS; i++) { + /* Make sure usec just flipped over at current resolution */ + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + pre = erts_sys_perf_counter(); + + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + post = erts_sys_perf_counter(); + + value += post - pre; + } + /* After this value is ticks per us */ + value /= (RESOLUTION*ITERATIONS); + + /* We round to 3 significant figures */ + round_factor = pow(10.0, SIGFIGS - ceil(log10(value))); + value = ((ErtsSysPerfCounter)(value * round_factor + 0.5)) / round_factor; + + /* convert to ticks per second */ + return 1000000 * value; +} + +static int have_rdtscp(void) +{ +#if defined(ETHR_X86_RUNTIME_CONF__) + /* On early x86 cpu's the tsc varies with + the current speed of the cpu, which means that the time per + tick vary depending on the current load of the cpu. We do not + want this as it would give very scewed numbers when the cpu is + mostly idle. + If we have the rdtscp feature it is a sign that the cpu is + relatively modern, and thus the tsc quite stable so we use it then. + + If this test is not good enough, I don't know what we'll do. + Maybe fallback on erts_sys_hrtime always, but that would be a shame as + rdtsc is about 3 times faster than hrtime... */ + return ETHR_X86_RUNTIME_CONF_HAVE_RDTSCP__; +#else + return 0; +#endif +} + +static ErtsSysPerfCounter rdtsc(void) +{ + /* It may have been a good idea to put the cpuid instruction before + the rdtsc, but I decided against it because it is not really + needed for msacc, and it slows it down by quite a bit (5-7 times slower). + As a result though, this timestamp becomes much less + accurate as it might be re-ordered to be executed way before or after this + function is called. + */ + ErtsSysPerfCounter ts; +#if defined(__x86_64__) + __asm__ __volatile__ ("rdtsc\n\t" + "shl $32, %%rdx\n\t" + "or %%rdx, %0" : "=a" (ts) : : "rdx"); +#elif defined(__i386__) + __asm__ __volatile__ ("rdtsc\n\t" + : "=A" (ts) ); +#endif + return ts; +} + +static void init_perf_counter(void) +{ + if (have_rdtscp()) { + erts_sys_time_data__.r.o.perf_counter = rdtsc; + erts_sys_time_data__.r.o.perf_counter_unit = calculate_perf_counter_unit(); + } else { + erts_sys_time_data__.r.o.perf_counter = erts_sys_hrtime; + erts_sys_time_data__.r.o.perf_counter_unit = ERTS_HRTIME_UNIT; + } +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifdef HAVE_GETHRVTIME_PROCFS_IOCTL -- cgit v1.2.3 From 6090f9c7e9b0ddbccef357641c1455475b348e94 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 16 Jul 2015 15:36:44 +0200 Subject: erts: Add power saving cpu feature tests and use them --- erts/emulator/sys/unix/sys_time.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'erts/emulator/sys/unix/sys_time.c') diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c index 9738a8c352..03d39c7ce6 100644 --- a/erts/emulator/sys/unix/sys_time.c +++ b/erts/emulator/sys/unix/sys_time.c @@ -971,18 +971,18 @@ static ErtsSysPerfCounter calculate_perf_counter_unit(void) { static int have_rdtscp(void) { #if defined(ETHR_X86_RUNTIME_CONF__) - /* On early x86 cpu's the tsc varies with - the current speed of the cpu, which means that the time per - tick vary depending on the current load of the cpu. We do not - want this as it would give very scewed numbers when the cpu is - mostly idle. - If we have the rdtscp feature it is a sign that the cpu is - relatively modern, and thus the tsc quite stable so we use it then. + /* On early x86 cpu's the tsc varies with the current speed of the cpu, + which means that the time per tick vary depending on the current + load of the cpu. We do not want this as it would give very scewed + numbers when the cpu is mostly idle. + The linux kernel seems to think that checking for constant and + reliable is enough to trust the counter so we do the same. If this test is not good enough, I don't know what we'll do. Maybe fallback on erts_sys_hrtime always, but that would be a shame as rdtsc is about 3 times faster than hrtime... */ - return ETHR_X86_RUNTIME_CONF_HAVE_RDTSCP__; + return ETHR_X86_RUNTIME_CONF_HAVE_CONSTANT_TSC__ && + ETHR_X86_RUNTIME_CONF_HAVE_TSC_RELIABLE__; #else return 0; #endif -- cgit v1.2.3