aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2015-07-16 11:27:00 +0200
committerLukas Larsson <[email protected]>2016-02-02 10:45:22 +0100
commit0399f5fc547ef035c4eb5e383f30b28ae73d936e (patch)
treee4179769e6a09cc6de68f5226671fb403b5ef4e2
parentdc1e3933e633d9d7527e6df044895d12d3845e14 (diff)
downloadotp-0399f5fc547ef035c4eb5e383f30b28ae73d936e.tar.gz
otp-0399f5fc547ef035c4eb5e383f30b28ae73d936e.tar.bz2
otp-0399f5fc547ef035c4eb5e383f30b28ae73d936e.zip
erts: Refactor perf counter internal interface
perf counter is now part of the function pointer interface and also the function returns the value instead of writing to a memory buffer.
-rw-r--r--erts/emulator/beam/beam_emu.c4
-rw-r--r--erts/emulator/beam/erl_msacc.c14
-rw-r--r--erts/emulator/beam/erl_msacc.h8
-rw-r--r--erts/emulator/beam/erl_time.h2
-rw-r--r--erts/emulator/beam/erl_time_sup.c66
-rw-r--r--erts/emulator/sys/unix/erl_unix_sys.h35
-rw-r--r--erts/emulator/sys/unix/sys_time.c112
-rw-r--r--erts/emulator/sys/win32/erl_win_sys.h16
8 files changed, 158 insertions, 99 deletions
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index 4be311ae82..9f143c22bf 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -4938,10 +4938,10 @@ do { \
it has to be very very fast */
OpCase(i_perf_counter): {
BeamInstr* next;
- ErtsSysHrTime ts;
+ ErtsSysPerfCounter ts;
PreFetch(0, next);
- sys_perf_counter(&ts);
+ ts = erts_sys_perf_counter();
if (IS_SSMALL(ts)) {
r(0) = make_small((Sint)ts);
diff --git a/erts/emulator/beam/erl_msacc.c b/erts/emulator/beam/erl_msacc.c
index bf1c06dea7..71e3fd8b6e 100644
--- a/erts/emulator/beam/erl_msacc.c
+++ b/erts/emulator/beam/erl_msacc.c
@@ -117,7 +117,7 @@ void erts_msacc_init_thread(char *type, int id, int managed) {
#ifdef ERTS_MSACC_ALWAYS_ON
ERTS_MSACC_TSD_SET(msacc);
- sys_perf_counter(&msacc->perf_counter);
+ msacc->perf_counter = erts_sys_perf_counter();
msacc->state = ERTS_MSACC_STATE_OTHER;
#endif
}
@@ -278,7 +278,7 @@ reply_msacc(void *vmsaccrp)
if (msaccrp->action == ERTS_MSACC_ENABLE && !msacc) {
msacc = get_msacc();
- sys_perf_counter(&msacc->perf_counter);
+ msacc->perf_counter = erts_sys_perf_counter();
msacc->state = ERTS_MSACC_STATE_OTHER;
@@ -412,9 +412,9 @@ erts_msacc_request(Process *c_p, int action, Eterm *threads)
for (i = 0; i < unmanaged_count; i++) {
erts_mtx_lock(&unmanaged[i]->mtx);
if (unmanaged[i]->perf_counter) {
- ErtsSysHrTime perf_counter;
+ ErtsSysPerfCounter perf_counter;
/* if enabled update stats */
- sys_perf_counter(&perf_counter);
+ perf_counter = erts_sys_perf_counter();
unmanaged[i]->perf_counters[unmanaged[i]->state] +=
perf_counter - unmanaged[i]->perf_counter;
unmanaged[i]->perf_counter = perf_counter;
@@ -439,7 +439,7 @@ erts_msacc_request(Process *c_p, int action, Eterm *threads)
erts_rwmtx_rlock(&msacc_mutex);
for (msacc = msacc_unmanaged; msacc != NULL; msacc = msacc->next) {
erts_mtx_lock(&msacc->mtx);
- sys_perf_counter(&msacc->perf_counter);
+ msacc->perf_counter = erts_sys_perf_counter();
/* we assume the unmanaged thread is sleeping */
msacc->state = ERTS_MSACC_STATE_SLEEP;
erts_mtx_unlock(&msacc->mtx);
@@ -448,12 +448,12 @@ erts_msacc_request(Process *c_p, int action, Eterm *threads)
break;
}
case ERTS_MSACC_DISABLE: {
- ErtsSysHrTime perf_counter;
+ ErtsSysPerfCounter perf_counter;
erts_rwmtx_rlock(&msacc_mutex);
/* make sure to update stats with latest results */
for (msacc = msacc_unmanaged; msacc != NULL; msacc = msacc->next) {
erts_mtx_lock(&msacc->mtx);
- sys_perf_counter(&perf_counter);
+ perf_counter = erts_sys_perf_counter();
msacc->perf_counters[msacc->state] += perf_counter - msacc->perf_counter;
msacc->perf_counter = 0;
erts_mtx_unlock(&msacc->mtx);
diff --git a/erts/emulator/beam/erl_msacc.h b/erts/emulator/beam/erl_msacc.h
index 1b4b7a408a..284388f7aa 100644
--- a/erts/emulator/beam/erl_msacc.h
+++ b/erts/emulator/beam/erl_msacc.h
@@ -121,8 +121,8 @@ typedef struct erl_msacc_t_ ErtsMsAcc;
struct erl_msacc_t_ {
/* the the values below are protected by mtx iff unmanaged = 1 */
- ErtsSysHrTime perf_counter;
- ErtsSysHrTime perf_counters[ERTS_MSACC_STATE_COUNT];
+ ErtsSysPerfCounter perf_counter;
+ ErtsSysPerfCounter perf_counters[ERTS_MSACC_STATE_COUNT];
#ifdef ERTS_MSACC_STATE_COUNTERS
Uint64 state_counters[ERTS_MSACC_STATE_COUNT];
#endif
@@ -324,14 +324,14 @@ void erts_msacc_set_state_um__(ErtsMsAcc *msacc, Uint new_state, int increment)
ERTS_MSACC_INLINE
void erts_msacc_set_state_m__(ErtsMsAcc *msacc, Uint new_state, int increment) {
- ErtsSysHrTime prev_perf_counter;
+ ErtsSysPerfCounter prev_perf_counter;
Sint64 diff;
if (new_state == msacc->state)
return;
prev_perf_counter = msacc->perf_counter;
- sys_perf_counter(&msacc->perf_counter);
+ msacc->perf_counter = erts_sys_perf_counter();
diff = msacc->perf_counter - prev_perf_counter;
ASSERT(diff >= 0);
msacc->perf_counters[msacc->state] += diff;
diff --git a/erts/emulator/beam/erl_time.h b/erts/emulator/beam/erl_time.h
index 446adcf4af..5242063550 100644
--- a/erts/emulator/beam/erl_time.h
+++ b/erts/emulator/beam/erl_time.h
@@ -142,7 +142,7 @@ erts_time_unit_conversion(Uint64 value,
Uint32 from_time_unit,
Uint32 to_time_unit);
-ErtsSysHrTime erts_perf_counter_unit(void);
+ErtsSysPerfCounter erts_perf_counter_unit(void);
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
diff --git a/erts/emulator/beam/erl_time_sup.c b/erts/emulator/beam/erl_time_sup.c
index 6509c6a805..98159fdf72 100644
--- a/erts/emulator/beam/erl_time_sup.c
+++ b/erts/emulator/beam/erl_time_sup.c
@@ -2444,72 +2444,10 @@ BIF_RETTYPE os_system_time_1(BIF_ALIST_1)
BIF_RETTYPE
os_perf_counter_0(BIF_ALIST_0)
{
- ErtsSysHrTime pcounter;
- sys_perf_counter(&pcounter);
- BIF_RET(make_time_val(BIF_P, pcounter));
+ BIF_RET(make_time_val(BIF_P, erts_sys_perf_counter()));
}
BIF_RETTYPE erts_internal_perf_counter_unit_0(BIF_ALIST_0)
{
- BIF_RET(make_time_val(BIF_P, SYS_PERF_COUNTER_UNIT));
-}
-
-/* What resolution to spin to in micro seconds */
-#define RESOLUTION 100
-/* How many iterations to spin */
-#define ITERATIONS 1
-/* How many significant figures to round to */
-#define SIGFIGS 3
-
-static ErtsSysHrTime perf_counter_unit = 0;
-
-static ErtsSysHrTime erts_calculate_perf_counter_unit(void);
-static ErtsSysHrTime erts_calculate_perf_counter_unit() {
- int i;
- ErtsSysHrTime pre, post;
- double value = 0;
- double round_factor;
-#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME)
- struct timespec basetime,comparetime;
-#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg)
-#define __GETUSEC(arg) (arg.tv_nsec / 1000)
-#else
- SysTimeval basetime,comparetime;
-#define __GETTIME(arg) sys_gettimeofday(arg)
-#define __GETUSEC(arg) arg.tv_usec
-#endif
-
- for (i = 0; i < ITERATIONS; i++) {
- /* Make sure usec just flipped over at current resolution */
- __GETTIME(&basetime);
- do {
- __GETTIME(&comparetime);
- } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
-
- sys_perf_counter(&pre);
-
- __GETTIME(&basetime);
- do {
- __GETTIME(&comparetime);
- } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
-
- sys_perf_counter(&post);
-
- value += post - pre;
- }
- /* After this value is ticks per us */
- value /= (RESOLUTION*ITERATIONS);
-
- /* We round to 3 significant figures */
- round_factor = pow(10.0, SIGFIGS - ceil(log10(value)));
- value = ((ErtsSysHrTime)(value * round_factor + 0.5)) / round_factor;
-
- /* convert to ticks per second */
- return 1000000 * value;
-}
-
-ErtsSysHrTime erts_perf_counter_unit() {
- if (perf_counter_unit == 0)
- perf_counter_unit = erts_calculate_perf_counter_unit();
- return perf_counter_unit;
+ BIF_RET(make_time_val(BIF_P, erts_sys_perf_counter_unit()));
}
diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h
index 3a03d6be49..8b1822ca9f 100644
--- a/erts/emulator/sys/unix/erl_unix_sys.h
+++ b/erts/emulator/sys/unix/erl_unix_sys.h
@@ -161,6 +161,7 @@ typedef long long ErtsSysHrTime;
#endif
typedef ErtsMonotonicTime ErtsSystemTime;
+typedef ErtsSysHrTime ErtsSysPerfCounter;
#define ERTS_MONOTONIC_TIME_MIN (((ErtsMonotonicTime) 1) << 63)
#define ERTS_MONOTONIC_TIME_MAX (~ERTS_MONOTONIC_TIME_MIN)
@@ -209,6 +210,7 @@ ErtsSystemTime erts_os_system_time(void);
* It may or may not be monotonic.
*/
ErtsSysHrTime erts_sys_hrtime(void);
+#define ERTS_HRTIME_UNIT (1000*1000*1000)
struct erts_sys_time_read_only_data__ {
#ifdef ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__
@@ -217,6 +219,8 @@ struct erts_sys_time_read_only_data__ {
#ifdef ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__
void (*os_times)(ErtsMonotonicTime *, ErtsSystemTime *);
#endif
+ ErtsSysPerfCounter (*perf_counter)(void);
+ ErtsSysPerfCounter perf_counter_unit;
int ticks_per_sec;
};
@@ -273,25 +277,18 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
* Functions for getting the performance counter
*/
-#if defined(__x86_64__)
- /* available on all x86_64. Best if used when we have constant_tsc and
- nonstop_tsc cpu features. It may have been a good idea to put the
- cpuid instruction before the rdtsc, but I decided against it
- because it is not really needed for msacc, and it slows it down by
- quite a bit. As a result though, this timestamp becomes much less
- accurate as it might be re-ordered to be executed way before this
- function is called.
- */
-#define sys_perf_counter(ts) do { \
- __asm__ __volatile__ ("rdtsc\n\t" \
- "shl $32, %%rdx\n\t" \
- "or %%rdx, %0" : "=a" (*ts) : : "rdx"); \
- } while(0)
-#define SYS_PERF_COUNTER_UNIT erts_perf_counter_unit()
-#else
-#define sys_perf_counter(ts) *(ts) = erts_sys_hrtime()
-#define SYS_PERF_COUNTER_UNIT 1000000000LL
-#endif
+ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void);
+#define erts_sys_perf_counter_unit() erts_sys_time_data__.r.o.perf_counter_unit
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter()
+{
+ return (*erts_sys_time_data__.r.o.perf_counter)();
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
/*
* Functions for measuring CPU time
diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c
index 6fc4fc7dc4..9738a8c352 100644
--- a/erts/emulator/sys/unix/sys_time.c
+++ b/erts/emulator/sys/unix/sys_time.c
@@ -65,6 +65,8 @@
# include <fcntl.h>
#endif
+static void init_perf_counter(void);
+
/******************* Routines for time measurement *********************/
#undef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__
@@ -404,6 +406,8 @@ sys_init_time(ErtsSysInitTimeResult *init_resp)
# error Missing erts_os_system_time() implementation
#endif
+ init_perf_counter();
+
}
void
@@ -908,6 +912,114 @@ erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
#endif
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Performance counter functions *
+\* */
+
+
+/* What resolution to spin to in micro seconds */
+#define RESOLUTION 100
+/* How many iterations to spin */
+#define ITERATIONS 1
+/* How many significant figures to round to */
+#define SIGFIGS 3
+
+static ErtsSysPerfCounter calculate_perf_counter_unit(void) {
+ int i;
+ ErtsSysPerfCounter pre, post;
+ double value = 0;
+ double round_factor;
+#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME)
+ struct timespec basetime,comparetime;
+#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg)
+#define __GETUSEC(arg) (arg.tv_nsec / 1000)
+#else
+ SysTimeval basetime,comparetime;
+#define __GETTIME(arg) sys_gettimeofday(arg)
+#define __GETUSEC(arg) arg.tv_usec
+#endif
+
+ for (i = 0; i < ITERATIONS; i++) {
+ /* Make sure usec just flipped over at current resolution */
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ pre = erts_sys_perf_counter();
+
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ post = erts_sys_perf_counter();
+
+ value += post - pre;
+ }
+ /* After this value is ticks per us */
+ value /= (RESOLUTION*ITERATIONS);
+
+ /* We round to 3 significant figures */
+ round_factor = pow(10.0, SIGFIGS - ceil(log10(value)));
+ value = ((ErtsSysPerfCounter)(value * round_factor + 0.5)) / round_factor;
+
+ /* convert to ticks per second */
+ return 1000000 * value;
+}
+
+static int have_rdtscp(void)
+{
+#if defined(ETHR_X86_RUNTIME_CONF__)
+ /* On early x86 cpu's the tsc varies with
+ the current speed of the cpu, which means that the time per
+ tick vary depending on the current load of the cpu. We do not
+ want this as it would give very scewed numbers when the cpu is
+ mostly idle.
+ If we have the rdtscp feature it is a sign that the cpu is
+ relatively modern, and thus the tsc quite stable so we use it then.
+
+ If this test is not good enough, I don't know what we'll do.
+ Maybe fallback on erts_sys_hrtime always, but that would be a shame as
+ rdtsc is about 3 times faster than hrtime... */
+ return ETHR_X86_RUNTIME_CONF_HAVE_RDTSCP__;
+#else
+ return 0;
+#endif
+}
+
+static ErtsSysPerfCounter rdtsc(void)
+{
+ /* It may have been a good idea to put the cpuid instruction before
+ the rdtsc, but I decided against it because it is not really
+ needed for msacc, and it slows it down by quite a bit (5-7 times slower).
+ As a result though, this timestamp becomes much less
+ accurate as it might be re-ordered to be executed way before or after this
+ function is called.
+ */
+ ErtsSysPerfCounter ts;
+#if defined(__x86_64__)
+ __asm__ __volatile__ ("rdtsc\n\t"
+ "shl $32, %%rdx\n\t"
+ "or %%rdx, %0" : "=a" (ts) : : "rdx");
+#elif defined(__i386__)
+ __asm__ __volatile__ ("rdtsc\n\t"
+ : "=A" (ts) );
+#endif
+ return ts;
+}
+
+static void init_perf_counter(void)
+{
+ if (have_rdtscp()) {
+ erts_sys_time_data__.r.o.perf_counter = rdtsc;
+ erts_sys_time_data__.r.o.perf_counter_unit = calculate_perf_counter_unit();
+ } else {
+ erts_sys_time_data__.r.o.perf_counter = erts_sys_hrtime;
+ erts_sys_time_data__.r.o.perf_counter_unit = ERTS_HRTIME_UNIT;
+ }
+}
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#ifdef HAVE_GETHRVTIME_PROCFS_IOCTL
diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h
index 1ab9eadefd..99c1066ab3 100644
--- a/erts/emulator/sys/win32/erl_win_sys.h
+++ b/erts/emulator/sys/win32/erl_win_sys.h
@@ -183,6 +183,7 @@ typedef LONGLONG ErtsSysHrTime;
#endif
typedef ErtsMonotonicTime ErtsSystemTime;
+typedef ErtsMonotonicTime ErtsSysPerfCounter;
ErtsSystemTime erts_os_system_time(void);
@@ -213,6 +214,7 @@ ERTS_GLB_INLINE ErtsMonotonicTime erts_os_monotonic_time(void);
ERTS_GLB_INLINE void erts_os_times(ErtsMonotonicTime *,
ErtsSystemTime *);
ERTS_GLB_INLINE ErtsSysHrTime erts_sys_hrtime(void);
+ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void);
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
@@ -234,12 +236,22 @@ erts_sys_hrtime(void)
return (*erts_sys_time_data__.r.o.sys_hrtime)();
}
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter(void)
+{
+ return (*erts_sys_time_data__.r.o.sys_hrtime)();
+}
+
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter_unit(void)
+{
+ return 1000 * 1000 * 1000;
+}
+
#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
extern void sys_gettimeofday(SysTimeval *tv);
extern clock_t sys_times(SysTimes *buffer);
-#define sys_perf_counter(ts) *(ts) = erts_sys_hrtime()
-#define SYS_PERF_COUNTER_UNIT ERTS_I64_LITERAL(1000000000)
extern char *win_build_environment(char *);