All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf bench mem: Always memset source before memcpy
@ 2020-08-10 13:34 Vincent Whitchurch
  2020-08-12 12:25 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 2+ messages in thread
From: Vincent Whitchurch @ 2020-08-10 13:34 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim
  Cc: kernel, linux-kernel, Vincent Whitchurch

For memcpy, the source pages are memset to zero only when --cycles is
used.  This leads to wildly different results with or without --cycles,
since all sources pages are likely to be mapped to the same zero page
without explicit writes.

Before this fix:

$ export cmd="./perf stat -e LLC-loads -- ./perf bench \
  mem memcpy -s 1024MB -l 100 -f default"
$ $cmd

         2,935,826      LLC-loads
       3.821677452 seconds time elapsed

$ $cmd --cycles

       217,533,436      LLC-loads
       8.616725985 seconds time elapsed

After this fix:

$ $cmd

       214,459,686      LLC-loads
       8.674301124 seconds time elapsed

$ $cmd --cycles

       214,758,651      LLC-loads
       8.644480006 seconds time elapsed

Fixes: 47b5757bac03c3387c ("perf bench mem: Move boilerplate memory allocation to the infrastructure")
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 tools/perf/bench/mem-functions.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9235b76501be..19d45c377ac1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
 	return 0;
 }
 
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
 {
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	memcpy_t fn = r->fn.memcpy;
-	int i;
-
 	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
 	memset(src, 0, size);
 
@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
 	 * to not measure page fault overhead:
 	 */
 	fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memcpy_t fn = r->fn.memcpy;
+	int i;
+
+	memcpy_prefault(fn, size, src, dst);
 
 	cycle_start = get_cycles();
 	for (i = 0; i < nr_loops; ++i)
@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
 	memcpy_t fn = r->fn.memcpy;
 	int i;
 
-	/*
-	 * We prefault the freshly allocated memory range here,
-	 * to not measure page fault overhead:
-	 */
-	fn(dst, src, size);
+	memcpy_prefault(fn, size, src, dst);
 
 	BUG_ON(gettimeofday(&tv_start, NULL));
 	for (i = 0; i < nr_loops; ++i)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] perf bench mem: Always memset source before memcpy
  2020-08-10 13:34 [PATCH] perf bench mem: Always memset source before memcpy Vincent Whitchurch
@ 2020-08-12 12:25 ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 2+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-08-12 12:25 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, kernel, linux-kernel

Em Mon, Aug 10, 2020 at 03:34:04PM +0200, Vincent Whitchurch escreveu:
> For memcpy, the source pages are memset to zero only when --cycles is
> used.  This leads to wildly different results with or without --cycles,
> since all sources pages are likely to be mapped to the same zero page
> without explicit writes.

Thanks, applied.

- Arnaldo
 
> Before this fix:
> 
> $ export cmd="./perf stat -e LLC-loads -- ./perf bench \
>   mem memcpy -s 1024MB -l 100 -f default"
> $ $cmd
> 
>          2,935,826      LLC-loads
>        3.821677452 seconds time elapsed
> 
> $ $cmd --cycles
> 
>        217,533,436      LLC-loads
>        8.616725985 seconds time elapsed
> 
> After this fix:
> 
> $ $cmd
> 
>        214,459,686      LLC-loads
>        8.674301124 seconds time elapsed
> 
> $ $cmd --cycles
> 
>        214,758,651      LLC-loads
>        8.644480006 seconds time elapsed
> 
> Fixes: 47b5757bac03c3387c ("perf bench mem: Move boilerplate memory allocation to the infrastructure")
> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
> ---
>  tools/perf/bench/mem-functions.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
> index 9235b76501be..19d45c377ac1 100644
> --- a/tools/perf/bench/mem-functions.c
> +++ b/tools/perf/bench/mem-functions.c
> @@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
>  	return 0;
>  }
>  
> -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
> +static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
>  {
> -	u64 cycle_start = 0ULL, cycle_end = 0ULL;
> -	memcpy_t fn = r->fn.memcpy;
> -	int i;
> -
>  	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
>  	memset(src, 0, size);
>  
> @@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
>  	 * to not measure page fault overhead:
>  	 */
>  	fn(dst, src, size);
> +}
> +
> +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
> +{
> +	u64 cycle_start = 0ULL, cycle_end = 0ULL;
> +	memcpy_t fn = r->fn.memcpy;
> +	int i;
> +
> +	memcpy_prefault(fn, size, src, dst);
>  
>  	cycle_start = get_cycles();
>  	for (i = 0; i < nr_loops; ++i)
> @@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
>  	memcpy_t fn = r->fn.memcpy;
>  	int i;
>  
> -	/*
> -	 * We prefault the freshly allocated memory range here,
> -	 * to not measure page fault overhead:
> -	 */
> -	fn(dst, src, size);
> +	memcpy_prefault(fn, size, src, dst);
>  
>  	BUG_ON(gettimeofday(&tv_start, NULL));
>  	for (i = 0; i < nr_loops; ++i)
> -- 
> 2.25.1
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-08-12 12:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-10 13:34 [PATCH] perf bench mem: Always memset source before memcpy Vincent Whitchurch
2020-08-12 12:25 ` Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.