From 0399f5fc547ef035c4eb5e383f30b28ae73d936e Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 16 Jul 2015 11:27:00 +0200 Subject: erts: Refactor perf counter internal interface perf counter is now part of the function pointer interface and also the function returns the value instead of writing to a memory buffer. --- erts/emulator/beam/beam_emu.c | 4 +- erts/emulator/beam/erl_msacc.c | 14 ++--- erts/emulator/beam/erl_msacc.h | 8 +-- erts/emulator/beam/erl_time.h | 2 +- erts/emulator/beam/erl_time_sup.c | 66 +------------------- erts/emulator/sys/unix/erl_unix_sys.h | 35 +++++------ erts/emulator/sys/unix/sys_time.c | 112 ++++++++++++++++++++++++++++++++++ erts/emulator/sys/win32/erl_win_sys.h | 16 ++++- 8 files changed, 158 insertions(+), 99 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 4be311ae82..9f143c22bf 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -4938,10 +4938,10 @@ do { \ it has to be very very fast */ OpCase(i_perf_counter): { BeamInstr* next; - ErtsSysHrTime ts; + ErtsSysPerfCounter ts; PreFetch(0, next); - sys_perf_counter(&ts); + ts = erts_sys_perf_counter(); if (IS_SSMALL(ts)) { r(0) = make_small((Sint)ts); diff --git a/erts/emulator/beam/erl_msacc.c b/erts/emulator/beam/erl_msacc.c index bf1c06dea7..71e3fd8b6e 100644 --- a/erts/emulator/beam/erl_msacc.c +++ b/erts/emulator/beam/erl_msacc.c @@ -117,7 +117,7 @@ void erts_msacc_init_thread(char *type, int id, int managed) { #ifdef ERTS_MSACC_ALWAYS_ON ERTS_MSACC_TSD_SET(msacc); - sys_perf_counter(&msacc->perf_counter); + msacc->perf_counter = erts_sys_perf_counter(); msacc->state = ERTS_MSACC_STATE_OTHER; #endif } @@ -278,7 +278,7 @@ reply_msacc(void *vmsaccrp) if (msaccrp->action == ERTS_MSACC_ENABLE && !msacc) { msacc = get_msacc(); - sys_perf_counter(&msacc->perf_counter); + msacc->perf_counter = erts_sys_perf_counter(); msacc->state = ERTS_MSACC_STATE_OTHER; @@ -412,9 +412,9 @@ erts_msacc_request(Process *c_p, int action, Eterm *threads) for (i = 0; i < unmanaged_count; i++) { erts_mtx_lock(&unmanaged[i]->mtx); if (unmanaged[i]->perf_counter) { - ErtsSysHrTime perf_counter; + ErtsSysPerfCounter perf_counter; /* if enabled update stats */ - sys_perf_counter(&perf_counter); + perf_counter = erts_sys_perf_counter(); unmanaged[i]->perf_counters[unmanaged[i]->state] += perf_counter - unmanaged[i]->perf_counter; unmanaged[i]->perf_counter = perf_counter; @@ -439,7 +439,7 @@ erts_msacc_request(Process *c_p, int action, Eterm *threads) erts_rwmtx_rlock(&msacc_mutex); for (msacc = msacc_unmanaged; msacc != NULL; msacc = msacc->next) { erts_mtx_lock(&msacc->mtx); - sys_perf_counter(&msacc->perf_counter); + msacc->perf_counter = erts_sys_perf_counter(); /* we assume the unmanaged thread is sleeping */ msacc->state = ERTS_MSACC_STATE_SLEEP; erts_mtx_unlock(&msacc->mtx); @@ -448,12 +448,12 @@ erts_msacc_request(Process *c_p, int action, Eterm *threads) break; } case ERTS_MSACC_DISABLE: { - ErtsSysHrTime perf_counter; + ErtsSysPerfCounter perf_counter; erts_rwmtx_rlock(&msacc_mutex); /* make sure to update stats with latest results */ for (msacc = msacc_unmanaged; msacc != NULL; msacc = msacc->next) { erts_mtx_lock(&msacc->mtx); - sys_perf_counter(&perf_counter); + perf_counter = erts_sys_perf_counter(); msacc->perf_counters[msacc->state] += perf_counter - msacc->perf_counter; msacc->perf_counter = 0; erts_mtx_unlock(&msacc->mtx); diff --git a/erts/emulator/beam/erl_msacc.h b/erts/emulator/beam/erl_msacc.h index 1b4b7a408a..284388f7aa 100644 --- a/erts/emulator/beam/erl_msacc.h +++ b/erts/emulator/beam/erl_msacc.h @@ -121,8 +121,8 @@ typedef struct erl_msacc_t_ ErtsMsAcc; struct erl_msacc_t_ { /* the the values below are protected by mtx iff unmanaged = 1 */ - ErtsSysHrTime perf_counter; - ErtsSysHrTime perf_counters[ERTS_MSACC_STATE_COUNT]; + ErtsSysPerfCounter perf_counter; + ErtsSysPerfCounter perf_counters[ERTS_MSACC_STATE_COUNT]; #ifdef ERTS_MSACC_STATE_COUNTERS Uint64 state_counters[ERTS_MSACC_STATE_COUNT]; #endif @@ -324,14 +324,14 @@ void erts_msacc_set_state_um__(ErtsMsAcc *msacc, Uint new_state, int increment) ERTS_MSACC_INLINE void erts_msacc_set_state_m__(ErtsMsAcc *msacc, Uint new_state, int increment) { - ErtsSysHrTime prev_perf_counter; + ErtsSysPerfCounter prev_perf_counter; Sint64 diff; if (new_state == msacc->state) return; prev_perf_counter = msacc->perf_counter; - sys_perf_counter(&msacc->perf_counter); + msacc->perf_counter = erts_sys_perf_counter(); diff = msacc->perf_counter - prev_perf_counter; ASSERT(diff >= 0); msacc->perf_counters[msacc->state] += diff; diff --git a/erts/emulator/beam/erl_time.h b/erts/emulator/beam/erl_time.h index 446adcf4af..5242063550 100644 --- a/erts/emulator/beam/erl_time.h +++ b/erts/emulator/beam/erl_time.h @@ -142,7 +142,7 @@ erts_time_unit_conversion(Uint64 value, Uint32 from_time_unit, Uint32 to_time_unit); -ErtsSysHrTime erts_perf_counter_unit(void); +ErtsSysPerfCounter erts_perf_counter_unit(void); #if ERTS_GLB_INLINE_INCL_FUNC_DEF diff --git a/erts/emulator/beam/erl_time_sup.c b/erts/emulator/beam/erl_time_sup.c index 6509c6a805..98159fdf72 100644 --- a/erts/emulator/beam/erl_time_sup.c +++ b/erts/emulator/beam/erl_time_sup.c @@ -2444,72 +2444,10 @@ BIF_RETTYPE os_system_time_1(BIF_ALIST_1) BIF_RETTYPE os_perf_counter_0(BIF_ALIST_0) { - ErtsSysHrTime pcounter; - sys_perf_counter(&pcounter); - BIF_RET(make_time_val(BIF_P, pcounter)); + BIF_RET(make_time_val(BIF_P, erts_sys_perf_counter())); } BIF_RETTYPE erts_internal_perf_counter_unit_0(BIF_ALIST_0) { - BIF_RET(make_time_val(BIF_P, SYS_PERF_COUNTER_UNIT)); -} - -/* What resolution to spin to in micro seconds */ -#define RESOLUTION 100 -/* How many iterations to spin */ -#define ITERATIONS 1 -/* How many significant figures to round to */ -#define SIGFIGS 3 - -static ErtsSysHrTime perf_counter_unit = 0; - -static ErtsSysHrTime erts_calculate_perf_counter_unit(void); -static ErtsSysHrTime erts_calculate_perf_counter_unit() { - int i; - ErtsSysHrTime pre, post; - double value = 0; - double round_factor; -#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME) - struct timespec basetime,comparetime; -#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg) -#define __GETUSEC(arg) (arg.tv_nsec / 1000) -#else - SysTimeval basetime,comparetime; -#define __GETTIME(arg) sys_gettimeofday(arg) -#define __GETUSEC(arg) arg.tv_usec -#endif - - for (i = 0; i < ITERATIONS; i++) { - /* Make sure usec just flipped over at current resolution */ - __GETTIME(&basetime); - do { - __GETTIME(&comparetime); - } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); - - sys_perf_counter(&pre); - - __GETTIME(&basetime); - do { - __GETTIME(&comparetime); - } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); - - sys_perf_counter(&post); - - value += post - pre; - } - /* After this value is ticks per us */ - value /= (RESOLUTION*ITERATIONS); - - /* We round to 3 significant figures */ - round_factor = pow(10.0, SIGFIGS - ceil(log10(value))); - value = ((ErtsSysHrTime)(value * round_factor + 0.5)) / round_factor; - - /* convert to ticks per second */ - return 1000000 * value; -} - -ErtsSysHrTime erts_perf_counter_unit() { - if (perf_counter_unit == 0) - perf_counter_unit = erts_calculate_perf_counter_unit(); - return perf_counter_unit; + BIF_RET(make_time_val(BIF_P, erts_sys_perf_counter_unit())); } diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h index 3a03d6be49..8b1822ca9f 100644 --- a/erts/emulator/sys/unix/erl_unix_sys.h +++ b/erts/emulator/sys/unix/erl_unix_sys.h @@ -161,6 +161,7 @@ typedef long long ErtsSysHrTime; #endif typedef ErtsMonotonicTime ErtsSystemTime; +typedef ErtsSysHrTime ErtsSysPerfCounter; #define ERTS_MONOTONIC_TIME_MIN (((ErtsMonotonicTime) 1) << 63) #define ERTS_MONOTONIC_TIME_MAX (~ERTS_MONOTONIC_TIME_MIN) @@ -209,6 +210,7 @@ ErtsSystemTime erts_os_system_time(void); * It may or may not be monotonic. */ ErtsSysHrTime erts_sys_hrtime(void); +#define ERTS_HRTIME_UNIT (1000*1000*1000) struct erts_sys_time_read_only_data__ { #ifdef ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__ @@ -217,6 +219,8 @@ struct erts_sys_time_read_only_data__ { #ifdef ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__ void (*os_times)(ErtsMonotonicTime *, ErtsSystemTime *); #endif + ErtsSysPerfCounter (*perf_counter)(void); + ErtsSysPerfCounter perf_counter_unit; int ticks_per_sec; }; @@ -273,25 +277,18 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) * Functions for getting the performance counter */ -#if defined(__x86_64__) - /* available on all x86_64. Best if used when we have constant_tsc and - nonstop_tsc cpu features. It may have been a good idea to put the - cpuid instruction before the rdtsc, but I decided against it - because it is not really needed for msacc, and it slows it down by - quite a bit. As a result though, this timestamp becomes much less - accurate as it might be re-ordered to be executed way before this - function is called. - */ -#define sys_perf_counter(ts) do { \ - __asm__ __volatile__ ("rdtsc\n\t" \ - "shl $32, %%rdx\n\t" \ - "or %%rdx, %0" : "=a" (*ts) : : "rdx"); \ - } while(0) -#define SYS_PERF_COUNTER_UNIT erts_perf_counter_unit() -#else -#define sys_perf_counter(ts) *(ts) = erts_sys_hrtime() -#define SYS_PERF_COUNTER_UNIT 1000000000LL -#endif +ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void); +#define erts_sys_perf_counter_unit() erts_sys_time_data__.r.o.perf_counter_unit + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE ErtsSysPerfCounter +erts_sys_perf_counter() +{ + return (*erts_sys_time_data__.r.o.perf_counter)(); +} + +#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ /* * Functions for measuring CPU time diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c index 6fc4fc7dc4..9738a8c352 100644 --- a/erts/emulator/sys/unix/sys_time.c +++ b/erts/emulator/sys/unix/sys_time.c @@ -65,6 +65,8 @@ # include #endif +static void init_perf_counter(void); + /******************* Routines for time measurement *********************/ #undef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__ @@ -404,6 +406,8 @@ sys_init_time(ErtsSysInitTimeResult *init_resp) # error Missing erts_os_system_time() implementation #endif + init_perf_counter(); + } void @@ -908,6 +912,114 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) #endif +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ + * Performance counter functions * +\* */ + + +/* What resolution to spin to in micro seconds */ +#define RESOLUTION 100 +/* How many iterations to spin */ +#define ITERATIONS 1 +/* How many significant figures to round to */ +#define SIGFIGS 3 + +static ErtsSysPerfCounter calculate_perf_counter_unit(void) { + int i; + ErtsSysPerfCounter pre, post; + double value = 0; + double round_factor; +#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME) + struct timespec basetime,comparetime; +#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg) +#define __GETUSEC(arg) (arg.tv_nsec / 1000) +#else + SysTimeval basetime,comparetime; +#define __GETTIME(arg) sys_gettimeofday(arg) +#define __GETUSEC(arg) arg.tv_usec +#endif + + for (i = 0; i < ITERATIONS; i++) { + /* Make sure usec just flipped over at current resolution */ + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + pre = erts_sys_perf_counter(); + + __GETTIME(&basetime); + do { + __GETTIME(&comparetime); + } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION)); + + post = erts_sys_perf_counter(); + + value += post - pre; + } + /* After this value is ticks per us */ + value /= (RESOLUTION*ITERATIONS); + + /* We round to 3 significant figures */ + round_factor = pow(10.0, SIGFIGS - ceil(log10(value))); + value = ((ErtsSysPerfCounter)(value * round_factor + 0.5)) / round_factor; + + /* convert to ticks per second */ + return 1000000 * value; +} + +static int have_rdtscp(void) +{ +#if defined(ETHR_X86_RUNTIME_CONF__) + /* On early x86 cpu's the tsc varies with + the current speed of the cpu, which means that the time per + tick vary depending on the current load of the cpu. We do not + want this as it would give very scewed numbers when the cpu is + mostly idle. + If we have the rdtscp feature it is a sign that the cpu is + relatively modern, and thus the tsc quite stable so we use it then. + + If this test is not good enough, I don't know what we'll do. + Maybe fallback on erts_sys_hrtime always, but that would be a shame as + rdtsc is about 3 times faster than hrtime... */ + return ETHR_X86_RUNTIME_CONF_HAVE_RDTSCP__; +#else + return 0; +#endif +} + +static ErtsSysPerfCounter rdtsc(void) +{ + /* It may have been a good idea to put the cpuid instruction before + the rdtsc, but I decided against it because it is not really + needed for msacc, and it slows it down by quite a bit (5-7 times slower). + As a result though, this timestamp becomes much less + accurate as it might be re-ordered to be executed way before or after this + function is called. + */ + ErtsSysPerfCounter ts; +#if defined(__x86_64__) + __asm__ __volatile__ ("rdtsc\n\t" + "shl $32, %%rdx\n\t" + "or %%rdx, %0" : "=a" (ts) : : "rdx"); +#elif defined(__i386__) + __asm__ __volatile__ ("rdtsc\n\t" + : "=A" (ts) ); +#endif + return ts; +} + +static void init_perf_counter(void) +{ + if (have_rdtscp()) { + erts_sys_time_data__.r.o.perf_counter = rdtsc; + erts_sys_time_data__.r.o.perf_counter_unit = calculate_perf_counter_unit(); + } else { + erts_sys_time_data__.r.o.perf_counter = erts_sys_hrtime; + erts_sys_time_data__.r.o.perf_counter_unit = ERTS_HRTIME_UNIT; + } +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifdef HAVE_GETHRVTIME_PROCFS_IOCTL diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h index 1ab9eadefd..99c1066ab3 100644 --- a/erts/emulator/sys/win32/erl_win_sys.h +++ b/erts/emulator/sys/win32/erl_win_sys.h @@ -183,6 +183,7 @@ typedef LONGLONG ErtsSysHrTime; #endif typedef ErtsMonotonicTime ErtsSystemTime; +typedef ErtsMonotonicTime ErtsSysPerfCounter; ErtsSystemTime erts_os_system_time(void); @@ -213,6 +214,7 @@ ERTS_GLB_INLINE ErtsMonotonicTime erts_os_monotonic_time(void); ERTS_GLB_INLINE void erts_os_times(ErtsMonotonicTime *, ErtsSystemTime *); ERTS_GLB_INLINE ErtsSysHrTime erts_sys_hrtime(void); +ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void); #if ERTS_GLB_INLINE_INCL_FUNC_DEF @@ -234,12 +236,22 @@ erts_sys_hrtime(void) return (*erts_sys_time_data__.r.o.sys_hrtime)(); } +ERTS_GLB_INLINE ErtsSysPerfCounter +erts_sys_perf_counter(void) +{ + return (*erts_sys_time_data__.r.o.sys_hrtime)(); +} + +ERTS_GLB_INLINE ErtsSysPerfCounter +erts_sys_perf_counter_unit(void) +{ + return 1000 * 1000 * 1000; +} + #endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ extern void sys_gettimeofday(SysTimeval *tv); extern clock_t sys_times(SysTimes *buffer); -#define sys_perf_counter(ts) *(ts) = erts_sys_hrtime() -#define SYS_PERF_COUNTER_UNIT ERTS_I64_LITERAL(1000000000) extern char *win_build_environment(char *); -- cgit v1.2.3