* [PATCH v2] perf test x86: address multiplexing in rdpmc test @ 2020-03-21 17:37 Ian Rogers 2020-03-22 10:18 ` Peter Zijlstra 0 siblings, 1 reply; 5+ messages in thread From: Ian Rogers @ 2020-03-21 17:37 UTC (permalink / raw) To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim, Andi Kleen, Kan Liang, linux-kernel Cc: Stephane Eranian, Ian Rogers Counters may be being used for pinned or other events which inhibit the instruction counter in the test from being scheduled - time_enabled > 0 but time_running == 0. This causes the test to fail with division by 0. Make time_running an out parameter of mmap_read_self and add a sleep loop to ensure that the counter is running before computing the delta. v2. Address review feedback from Peter Zijlstra. Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/arch/x86/tests/rdpmc.c | 57 +++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 1ea916656a2d..64145d4e3d4d 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -34,20 +34,33 @@ static u64 rdtsc(void) return low | ((u64)high) << 32; } -static u64 mmap_read_self(void *addr) +/* + * Return a user rdpmc result. The kernel may be multiplexing events and so the + * result is scaled to the period of time the counter was enabled. The + * time_running out parameter is set to the amount of time the counter has been + * running. The result will be zero if time_running is zero. + */ +static u64 mmap_read_self(void *addr, u64 *running) { struct perf_event_mmap_page *pc = addr; u32 seq, idx, time_mult = 0, time_shift = 0; - u64 count, cyc = 0, time_offset = 0, enabled, running, delta; + u64 count, cyc = 0, time_offset = 0, enabled, delta; do { seq = pc->lock; barrier(); enabled = pc->time_enabled; - running = pc->time_running; - - if (enabled != running) { + *running = pc->time_running; + + if (*running == 0) { + /* + * Counter won't have a value as due to multiplexing the + * event wasn't scheduled. + */ + return 0; + } + if (enabled != *running) { cyc = rdtsc(); time_mult = pc->time_mult; time_shift = pc->time_shift; @@ -62,7 +75,7 @@ static u64 mmap_read_self(void *addr) barrier(); } while (pc->lock != seq); - if (enabled != running) { + if (enabled != *running) { u64 quot, rem; quot = (cyc >> time_shift); @@ -72,11 +85,11 @@ static u64 mmap_read_self(void *addr) enabled += delta; if (idx) - running += delta; + *running += delta; - quot = count / running; - rem = count % running; - count = quot * enabled + (rem * enabled) / running; + quot = count / *running; + rem = count % *running; + count = quot * enabled + (rem * enabled) / *running; } return count; @@ -104,7 +117,7 @@ static int __test__rdpmc(void) .config = PERF_COUNT_HW_INSTRUCTIONS, .exclude_kernel = 1, }; - u64 delta_sum = 0; + u64 delta_sum = 0, sleep_count = 0; struct sigaction sa; char sbuf[STRERR_BUFSIZE]; @@ -130,14 +143,23 @@ static int __test__rdpmc(void) } for (n = 0; n < 6; n++) { - u64 stamp, now, delta; - - stamp = mmap_read_self(addr); + u64 stamp, now, delta, running; + + for (;;) { + stamp = mmap_read_self(addr, &running); + if (running != 0) + break; + /* Try to wait for event to be running. */ + sleep_count++; + if (sleep_count > 60) + goto out_never_run; + sleep(1); + } for (i = 0; i < loops; i++) tmp++; - now = mmap_read_self(addr); + now = mmap_read_self(addr, &running); loops *= 10; delta = now - stamp; @@ -155,6 +177,11 @@ static int __test__rdpmc(void) return -1; return 0; + +out_never_run: + close(fd); + pr_err("Event counter failed to multiplexed in. Are higher priority counters being sampled by a different process?\n"); + return -1; } int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused) -- 2.25.1.696.g5e7596f4ac-goog ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v2] perf test x86: address multiplexing in rdpmc test 2020-03-21 17:37 [PATCH v2] perf test x86: address multiplexing in rdpmc test Ian Rogers @ 2020-03-22 10:18 ` Peter Zijlstra 2020-03-22 23:18 ` Andi Kleen 2020-03-23 0:14 ` Ian Rogers 0 siblings, 2 replies; 5+ messages in thread From: Peter Zijlstra @ 2020-03-22 10:18 UTC (permalink / raw) To: Ian Rogers Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim, Andi Kleen, Kan Liang, linux-kernel, Stephane Eranian On Sat, Mar 21, 2020 at 10:37:10AM -0700, Ian Rogers wrote: > +static u64 mmap_read_self(void *addr, u64 *running) > { > struct perf_event_mmap_page *pc = addr; > u32 seq, idx, time_mult = 0, time_shift = 0; > - u64 count, cyc = 0, time_offset = 0, enabled, running, delta; > + u64 count, cyc = 0, time_offset = 0, enabled, delta; > > do { > seq = pc->lock; > barrier(); > > enabled = pc->time_enabled; > - running = pc->time_running; > - > - if (enabled != running) { > + *running = pc->time_running; > + > + if (*running == 0) { > + /* > + * Counter won't have a value as due to multiplexing the > + * event wasn't scheduled. > + */ > + return 0; > + } I still think adding code for an error case here is a bad idea. And only passing running as an argument is inconsistent. Also, then I had a look at what the compiler made of that function and cried. Here's something a little better. Much of it copied from linux/math64.h and asm/div64.h. --- diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 1ea916656a2d..386a6dacb21e 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -34,20 +34,98 @@ static u64 rdtsc(void) return low | ((u64)high) << 32; } -static u64 mmap_read_self(void *addr) +#ifdef __x86_64__ +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) +{ + u64 q; + + asm ("mulq %2; divq %3" : "=a" (q) + : "a" (a), "rm" (b), "rm" (c) + : "rdx"); + + return q; +} +#define mul_u64_u64_div64 mul_u64_u64_div64 +#endif + +#ifdef __SIZEOF_INT128__ + +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * b) >> shift); +} + +#ifndef mul_u64_u64_div64 +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) +{ + unsigned __int128 m = a; + m *= b; + return m / c; +} +#endif + +#else + +#ifdef __i386__ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#else +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#endif + +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) +{ + u32 ah, al; + u64 ret; + + al = a; + ah = a >> 32; + + ret = mul_u32_u32(al, mul) >> shift; + if (ah) + ret += mul_u32_u32(ah, mul) << (32 - shift); + + return ret; +} + +#ifndef mul_u64_u64_div64 +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) +{ + u64 quot, rem; + + quot = a / c; + rem = a % c; + + return qout * b + (rem * b) / c; +} +#endif + +#endif + +static u64 mmap_read_self(void *addr, u64 *enabled, u64 *running) { struct perf_event_mmap_page *pc = addr; u32 seq, idx, time_mult = 0, time_shift = 0; - u64 count, cyc = 0, time_offset = 0, enabled, running, delta; + u64 count, cyc = 0, time_offset = 0; do { seq = pc->lock; barrier(); - enabled = pc->time_enabled; - running = pc->time_running; + *enabled = pc->time_enabled; + *running = pc->time_running; - if (enabled != running) { + if (*enabled != *running) { cyc = rdtsc(); time_mult = pc->time_mult; time_shift = pc->time_shift; @@ -62,21 +140,13 @@ static u64 mmap_read_self(void *addr) barrier(); } while (pc->lock != seq); - if (enabled != running) { - u64 quot, rem; - - quot = (cyc >> time_shift); - rem = cyc & (((u64)1 << time_shift) - 1); - delta = time_offset + quot * time_mult + - ((rem * time_mult) >> time_shift); - - enabled += delta; + if (*enabled != *running) { + u64 delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift); + *enabled += delta; if (idx) - running += delta; + *running += delta; - quot = count / running; - rem = count % running; - count = quot * enabled + (rem * enabled) / running; + count = mul_u64_u64_div64(count, *enabled, *running); } return count; @@ -130,14 +200,18 @@ static int __test__rdpmc(void) } for (n = 0; n < 6; n++) { - u64 stamp, now, delta; + u64 stamp, now, delta, enabled, running; - stamp = mmap_read_self(addr); + stamp = mmap_read_self(addr, &enabled, &running); for (i = 0; i < loops; i++) tmp++; - now = mmap_read_self(addr); + now = mmap_read_self(addr, &enabled, &running); + + if (enabled && !running) + goto out_error; + loops *= 10; delta = now - stamp; @@ -155,6 +229,11 @@ static int __test__rdpmc(void) return -1; return 0; + +out_error: + close(fd); + pr_err("counter never ran; you loose\n"); + return -1; } int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused) ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v2] perf test x86: address multiplexing in rdpmc test 2020-03-22 10:18 ` Peter Zijlstra @ 2020-03-22 23:18 ` Andi Kleen 2020-03-23 12:29 ` Peter Zijlstra 2020-03-23 0:14 ` Ian Rogers 1 sibling, 1 reply; 5+ messages in thread From: Andi Kleen @ 2020-03-22 23:18 UTC (permalink / raw) To: Peter Zijlstra Cc: Ian Rogers, Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim, Kan Liang, linux-kernel, Stephane Eranian > Here's something a little better. Much of it copied from linux/math64.h > and asm/div64.h. Not sure what the point is of micro optimizing a unit test? This is never run in production. -Andi ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] perf test x86: address multiplexing in rdpmc test 2020-03-22 23:18 ` Andi Kleen @ 2020-03-23 12:29 ` Peter Zijlstra 0 siblings, 0 replies; 5+ messages in thread From: Peter Zijlstra @ 2020-03-23 12:29 UTC (permalink / raw) To: Andi Kleen Cc: Ian Rogers, Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim, Kan Liang, linux-kernel, Stephane Eranian On Sun, Mar 22, 2020 at 04:18:20PM -0700, Andi Kleen wrote: > > Here's something a little better. Much of it copied from linux/math64.h > > and asm/div64.h. > > Not sure what the point is of micro optimizing a unit test? > > This is never run in production. People might use it as an example of how to use the stuff.. and then copy horrendous crap code. ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] perf test x86: address multiplexing in rdpmc test 2020-03-22 10:18 ` Peter Zijlstra 2020-03-22 23:18 ` Andi Kleen @ 2020-03-23 0:14 ` Ian Rogers 1 sibling, 0 replies; 5+ messages in thread From: Ian Rogers @ 2020-03-23 0:14 UTC (permalink / raw) To: Peter Zijlstra Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim, Andi Kleen, Kan Liang, LKML, Stephane Eranian On Sun, Mar 22, 2020 at 3:18 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Sat, Mar 21, 2020 at 10:37:10AM -0700, Ian Rogers wrote: > > > +static u64 mmap_read_self(void *addr, u64 *running) > > { > > struct perf_event_mmap_page *pc = addr; > > u32 seq, idx, time_mult = 0, time_shift = 0; > > - u64 count, cyc = 0, time_offset = 0, enabled, running, delta; > > + u64 count, cyc = 0, time_offset = 0, enabled, delta; > > > > do { > > seq = pc->lock; > > barrier(); > > > > enabled = pc->time_enabled; > > - running = pc->time_running; > > - > > - if (enabled != running) { > > + *running = pc->time_running; > > + > > + if (*running == 0) { > > + /* > > + * Counter won't have a value as due to multiplexing the > > + * event wasn't scheduled. > > + */ > > + return 0; > > + } > > I still think adding code for an error case here is a bad idea. And only > passing running as an argument is inconsistent. > > Also, then I had a look at what the compiler made of that function and > cried. > > Here's something a little better. Much of it copied from linux/math64.h > and asm/div64.h. > > --- > diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c > index 1ea916656a2d..386a6dacb21e 100644 > --- a/tools/perf/arch/x86/tests/rdpmc.c > +++ b/tools/perf/arch/x86/tests/rdpmc.c > @@ -34,20 +34,98 @@ static u64 rdtsc(void) > return low | ((u64)high) << 32; > } > > -static u64 mmap_read_self(void *addr) > +#ifdef __x86_64__ > +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) > +{ > + u64 q; > + > + asm ("mulq %2; divq %3" : "=a" (q) > + : "a" (a), "rm" (b), "rm" (c) > + : "rdx"); > + > + return q; > +} > +#define mul_u64_u64_div64 mul_u64_u64_div64 > +#endif > + > +#ifdef __SIZEOF_INT128__ > + > +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) > +{ > + return (u64)(((unsigned __int128)a * b) >> shift); > +} > + > +#ifndef mul_u64_u64_div64 > +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) > +{ > + unsigned __int128 m = a; > + m *= b; > + return m / c; > +} > +#endif > + > +#else > + > +#ifdef __i386__ > +static inline u64 mul_u32_u32(u32 a, u32 b) > +{ > + u32 high, low; > + > + asm ("mull %[b]" : "=a" (low), "=d" (high) > + : [a] "a" (a), [b] "rm" (b) ); > + > + return low | ((u64)high) << 32; > +} > +#else > +static inline u64 mul_u32_u32(u32 a, u32 b) > +{ > + return (u64)a * b; > +} > +#endif > + > +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) > +{ > + u32 ah, al; > + u64 ret; > + > + al = a; > + ah = a >> 32; > + > + ret = mul_u32_u32(al, mul) >> shift; > + if (ah) > + ret += mul_u32_u32(ah, mul) << (32 - shift); > + > + return ret; > +} > + > +#ifndef mul_u64_u64_div64 > +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) > +{ > + u64 quot, rem; > + > + quot = a / c; > + rem = a % c; > + > + return qout * b + (rem * b) / c; > +} > +#endif > + > +#endif > + > +static u64 mmap_read_self(void *addr, u64 *enabled, u64 *running) > { > struct perf_event_mmap_page *pc = addr; > u32 seq, idx, time_mult = 0, time_shift = 0; > - u64 count, cyc = 0, time_offset = 0, enabled, running, delta; > + u64 count, cyc = 0, time_offset = 0; > > do { > seq = pc->lock; > barrier(); > > - enabled = pc->time_enabled; > - running = pc->time_running; > + *enabled = pc->time_enabled; > + *running = pc->time_running; > > - if (enabled != running) { > + if (*enabled != *running) { > cyc = rdtsc(); > time_mult = pc->time_mult; > time_shift = pc->time_shift; > @@ -62,21 +140,13 @@ static u64 mmap_read_self(void *addr) > barrier(); > } while (pc->lock != seq); > > - if (enabled != running) { > - u64 quot, rem; > - > - quot = (cyc >> time_shift); > - rem = cyc & (((u64)1 << time_shift) - 1); > - delta = time_offset + quot * time_mult + > - ((rem * time_mult) >> time_shift); > - > - enabled += delta; > + if (*enabled != *running) { > + u64 delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift); > + *enabled += delta; > if (idx) > - running += delta; > + *running += delta; > > - quot = count / running; > - rem = count % running; > - count = quot * enabled + (rem * enabled) / running; > + count = mul_u64_u64_div64(count, *enabled, *running); *running may be 0 here, because of multiplexing, and so it will yield a SIGFPE. > } > > return count; > @@ -130,14 +200,18 @@ static int __test__rdpmc(void) > } > > for (n = 0; n < 6; n++) { > - u64 stamp, now, delta; > + u64 stamp, now, delta, enabled, running; > > - stamp = mmap_read_self(addr); > + stamp = mmap_read_self(addr, &enabled, &running); > > for (i = 0; i < loops; i++) > tmp++; > > - now = mmap_read_self(addr); > + now = mmap_read_self(addr, &enabled, &running); > + > + if (enabled && !running) > + goto out_error; > + > loops *= 10; > > delta = now - stamp; > @@ -155,6 +229,11 @@ static int __test__rdpmc(void) > return -1; > > return 0; > + > +out_error: > + close(fd); > + pr_err("counter never ran; you loose\n"); > + return -1; I'd prefer to retry in this case as I'm not running the test on an isolated machine. Perhaps -2 (TEST_SKIP) rather than -1 (TEST_FAIL), some kind of resource not available error would be better. Thanks, Ian > } > > int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused) > ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-03-23 12:29 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-03-21 17:37 [PATCH v2] perf test x86: address multiplexing in rdpmc test Ian Rogers 2020-03-22 10:18 ` Peter Zijlstra 2020-03-22 23:18 ` Andi Kleen 2020-03-23 12:29 ` Peter Zijlstra 2020-03-23 0:14 ` Ian Rogers
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).