All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] perf bench futex: benchmark only online CPUs
@ 2017-11-23  0:25 Kim Phillips
  2017-11-23 15:09 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 6+ messages in thread
From: Kim Phillips @ 2017-11-23  0:25 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, Thomas Gleixner,
	Darren Hart, Colin Ian King
  Cc: James Yang, linux-kernel

From: James Yang <james.yang@arm.com>

The "perf bench futex" benchmarks have a problem when not all CPUs in
the system are online:  perf assumes the CPUs that are online are
contiguously numbered and assigns processor affinity to the threads by
modulo striping.  When the online CPUs are not contiguously numbered,
perf errors out with:

$ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online
0
$ ./oldperf bench futex all
perf: pthread_create: Operation not permitted
Run summary [PID 14934]: 7 threads, each operating on 1024 [private] futexes for 10 secs.

$

This patch makes perf not assume all CPUs configured are online, and
adds a mapping table to stripe affinity across the CPUs that are
online.

Signed-off-by: James Yang <james.yang@arm.com>
Signed-off-by: Kim Phillips <kim.phillips@arm.com>
---
 tools/perf/bench/Build                 |  1 +
 tools/perf/bench/cpu-online-map.c      | 21 +++++++++++++++++++++
 tools/perf/bench/cpu-online-map.h      |  6 ++++++
 tools/perf/bench/futex-hash.c          | 14 ++++++++++++--
 tools/perf/bench/futex-lock-pi.c       | 15 ++++++++++++---
 tools/perf/bench/futex-requeue.c       | 14 +++++++++++---
 tools/perf/bench/futex-wake-parallel.c | 17 ++++++++++++++---
 tools/perf/bench/futex-wake.c          | 15 ++++++++++++---
 8 files changed, 89 insertions(+), 14 deletions(-)
 create mode 100644 tools/perf/bench/cpu-online-map.c
 create mode 100644 tools/perf/bench/cpu-online-map.h

diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 60bf11943047..bc79d3577ace 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -6,6 +6,7 @@ perf-y += futex-wake.o
 perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 perf-y += futex-lock-pi.o
+perf-y += cpu-online-map.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/cpu-online-map.c b/tools/perf/bench/cpu-online-map.c
new file mode 100644
index 000000000000..4157d2220bf4
--- /dev/null
+++ b/tools/perf/bench/cpu-online-map.c
@@ -0,0 +1,21 @@
+#include <sched.h>
+#include <unistd.h>
+#include <err.h>
+#include <stdlib.h>
+#include "cpu-online-map.h"
+
+void compute_cpu_online_map(int *cpu_online_map)
+{
+	long ncpus_conf = sysconf(_SC_NPROCESSORS_CONF);
+	cpu_set_t set;
+	size_t j = 0;
+
+	CPU_ZERO(&set);
+
+	if (sched_getaffinity(0, sizeof(set), &set))
+		err(EXIT_FAILURE, "sched_getaffinity");
+
+	for (int i = 0; i < ncpus_conf; i++)
+		if (CPU_ISSET(i, &set))
+			cpu_online_map[j++] = i;
+}
diff --git a/tools/perf/bench/cpu-online-map.h b/tools/perf/bench/cpu-online-map.h
new file mode 100644
index 000000000000..0620a674c6d8
--- /dev/null
+++ b/tools/perf/bench/cpu-online-map.h
@@ -0,0 +1,6 @@
+#ifndef CPU_ONLINE_MAP_H
+#define CPU_ONLINE_MAP_H
+
+void compute_cpu_online_map(int *cpu_online_map);
+
+#endif
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 58ae6ed8f38b..81314eea1cf5 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -24,6 +24,7 @@
 #include <subcmd/parse-options.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpu-online-map.h"
 
 #include <err.h>
 #include <sys/time.h>
@@ -120,9 +121,11 @@ int bench_futex_hash(int argc, const char **argv)
 	int ret = 0;
 	cpu_set_t cpu;
 	struct sigaction act;
-	unsigned int i, ncpus;
+	unsigned int i;
+	long ncpus;
 	pthread_attr_t thread_attr;
 	struct worker *worker = NULL;
+	int *cpu_map;
 
 	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
 	if (argc) {
@@ -132,6 +135,12 @@ int bench_futex_hash(int argc, const char **argv)
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
 
+	cpu_map = calloc(ncpus, sizeof(int));
+	if (!cpu_map)
+		goto errmem;
+
+	compute_cpu_online_map(cpu_map);
+
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
@@ -164,7 +173,7 @@ int bench_futex_hash(int argc, const char **argv)
 			goto errmem;
 
 		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_SET(cpu_map[i % ncpus], &cpu);
 
 		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
 		if (ret)
@@ -217,6 +226,7 @@ int bench_futex_hash(int argc, const char **argv)
 	print_summary();
 
 	free(worker);
+	free(cpu_map);
 	return ret;
 errmem:
 	err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 08653ae8a8c4..42ab351b2c50 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -15,6 +15,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpu-online-map.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -113,7 +114,8 @@ static void *workerfn(void *arg)
 	return NULL;
 }
 
-static void create_threads(struct worker *w, pthread_attr_t thread_attr)
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+			   int *cpu_map)
 {
 	cpu_set_t cpu;
 	unsigned int i;
@@ -131,7 +133,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
 			worker[i].futex = &global_futex;
 
 		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_SET(cpu_map[i % ncpus], &cpu);
 
 		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -147,12 +149,18 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	unsigned int i;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	int *cpu_map;
 
 	argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
 	if (argc)
 		goto err;
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu_map = calloc(ncpus, sizeof(int));
+	if (!cpu_map)
+		err(EXIT_FAILURE, "calloc");
+
+	compute_cpu_online_map(cpu_map);
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
@@ -180,7 +188,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_attr_init(&thread_attr);
 	gettimeofday(&start, NULL);
 
-	create_threads(worker, thread_attr);
+	create_threads(worker, thread_attr, cpu_map);
 	pthread_attr_destroy(&thread_attr);
 
 	pthread_mutex_lock(&thread_lock);
@@ -218,6 +226,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	print_summary();
 
 	free(worker);
+	free(cpu_map);
 	return ret;
 err:
 	usage_with_options(bench_futex_lock_pi_usage, options);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 1058c194608a..366961b901c6 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpu-online-map.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -83,7 +84,7 @@ static void *workerfn(void *arg __maybe_unused)
 }
 
 static void block_threads(pthread_t *w,
-			  pthread_attr_t thread_attr)
+			  pthread_attr_t thread_attr, int *cpu_map)
 {
 	cpu_set_t cpu;
 	unsigned int i;
@@ -93,7 +94,7 @@ static void block_threads(pthread_t *w,
 	/* create and block all threads */
 	for (i = 0; i < nthreads; i++) {
 		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_SET(cpu_map[i % ncpus], &cpu);
 
 		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -116,12 +117,18 @@ int bench_futex_requeue(int argc, const char **argv)
 	unsigned int i, j;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	int *cpu_map;
 
 	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
 	if (argc)
 		goto err;
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu_map = calloc(ncpus, sizeof(int));
+	if (!cpu_map)
+		err(EXIT_FAILURE, "calloc");
+
+	compute_cpu_online_map(cpu_map);
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
@@ -156,7 +163,7 @@ int bench_futex_requeue(int argc, const char **argv)
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
-		block_threads(worker, thread_attr);
+		block_threads(worker, thread_attr, cpu_map);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
@@ -210,6 +217,7 @@ int bench_futex_requeue(int argc, const char **argv)
 	print_summary();
 
 	free(worker);
+	free(cpu_map);
 	return ret;
 err:
 	usage_with_options(bench_futex_requeue_usage, options);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index b4732dad9f89..5617bcd17e55 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -21,6 +21,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpu-online-map.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -119,7 +120,8 @@ static void *blocked_workerfn(void *arg __maybe_unused)
 	return NULL;
 }
 
-static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
+			  int *cpu_map)
 {
 	cpu_set_t cpu;
 	unsigned int i;
@@ -129,7 +131,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
 	/* create and block all threads */
 	for (i = 0; i < nblocked_threads; i++) {
 		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_SET(cpu_map[i % ncpus], &cpu);
 
 		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -205,6 +207,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	struct sigaction act;
 	pthread_attr_t thread_attr;
 	struct thread_data *waking_worker;
+	int *cpu_map;
 
 	argc = parse_options(argc, argv, options,
 			     bench_futex_wake_parallel_usage, 0);
@@ -218,6 +221,13 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	sigaction(SIGINT, &act, NULL);
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+	cpu_map = calloc(ncpus, sizeof(int));
+	if (!cpu_map)
+		err(EXIT_FAILURE, "calloc");
+
+	compute_cpu_online_map(cpu_map);
+
 	if (!nblocked_threads)
 		nblocked_threads = ncpus;
 
@@ -259,7 +269,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 			err(EXIT_FAILURE, "calloc");
 
 		/* create, launch & block all threads */
-		block_threads(blocked_worker, thread_attr);
+		block_threads(blocked_worker, thread_attr, cpu_map);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
@@ -295,5 +305,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	print_summary();
 
 	free(blocked_worker);
+	free(cpu_map);
 	return ret;
 }
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 8c5c0b6b5c97..c0c71a769d2f 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpu-online-map.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -89,7 +90,7 @@ static void print_summary(void)
 }
 
 static void block_threads(pthread_t *w,
-			  pthread_attr_t thread_attr)
+			  pthread_attr_t thread_attr, int *cpu_map)
 {
 	cpu_set_t cpu;
 	unsigned int i;
@@ -99,7 +100,7 @@ static void block_threads(pthread_t *w,
 	/* create and block all threads */
 	for (i = 0; i < nthreads; i++) {
 		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_SET(cpu_map[i % ncpus], &cpu);
 
 		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv)
 	unsigned int i, j;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	int *cpu_map;
 
 	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
 	if (argc) {
@@ -131,6 +133,12 @@ int bench_futex_wake(int argc, const char **argv)
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
 
+	cpu_map = calloc(ncpus, sizeof(int));
+	if (! cpu_map)
+		err(EXIT_FAILURE, "calloc");
+
+	compute_cpu_online_map(cpu_map);
+
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
@@ -161,7 +169,7 @@ int bench_futex_wake(int argc, const char **argv)
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
-		block_threads(worker, thread_attr);
+		block_threads(worker, thread_attr, cpu_map);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
@@ -204,5 +212,6 @@ int bench_futex_wake(int argc, const char **argv)
 	print_summary();
 
 	free(worker);
+	free(cpu_map);
 	return ret;
 }
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] perf bench futex: benchmark only online CPUs
  2017-11-23  0:25 [PATCH 1/3] perf bench futex: benchmark only online CPUs Kim Phillips
@ 2017-11-23 15:09 ` Arnaldo Carvalho de Melo
  2017-11-23 15:11   ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-11-23 15:09 UTC (permalink / raw)
  To: James Yang
  Cc: Kim Phillips, Peter Zijlstra, Ingo Molnar, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Thomas Gleixner, Darren Hart,
	Colin Ian King, linux-kernel

Em Wed, Nov 22, 2017 at 06:25:28PM -0600, Kim Phillips escreveu:
> From: James Yang <james.yang@arm.com>
> 
> The "perf bench futex" benchmarks have a problem when not all CPUs in
> the system are online:  perf assumes the CPUs that are online are
> contiguously numbered and assigns processor affinity to the threads by
> modulo striping.  When the online CPUs are not contiguously numbered,
> perf errors out with:
> 
> $ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online
> 0
> $ ./oldperf bench futex all
> perf: pthread_create: Operation not permitted
> Run summary [PID 14934]: 7 threads, each operating on 1024 [private] futexes for 10 secs.
> 
> $
> 
> This patch makes perf not assume all CPUs configured are online, and
> adds a mapping table to stripe affinity across the CPUs that are
> online.

So, have you looked at tools/perf/util/cpumap.c? I think you can use:

	int i;
	struct cpu_map *cpus = cpu_map__new(NULL);

	for (i = 0; i < cpus->nr; ++i) {
		int cpu = cpus->map[i];
		...
	}

No?

- Arnaldo
 
> Signed-off-by: James Yang <james.yang@arm.com>
> Signed-off-by: Kim Phillips <kim.phillips@arm.com>
> ---
>  tools/perf/bench/Build                 |  1 +
>  tools/perf/bench/cpu-online-map.c      | 21 +++++++++++++++++++++
>  tools/perf/bench/cpu-online-map.h      |  6 ++++++
>  tools/perf/bench/futex-hash.c          | 14 ++++++++++++--
>  tools/perf/bench/futex-lock-pi.c       | 15 ++++++++++++---
>  tools/perf/bench/futex-requeue.c       | 14 +++++++++++---
>  tools/perf/bench/futex-wake-parallel.c | 17 ++++++++++++++---
>  tools/perf/bench/futex-wake.c          | 15 ++++++++++++---
>  8 files changed, 89 insertions(+), 14 deletions(-)
>  create mode 100644 tools/perf/bench/cpu-online-map.c
>  create mode 100644 tools/perf/bench/cpu-online-map.h
> 
> diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
> index 60bf11943047..bc79d3577ace 100644
> --- a/tools/perf/bench/Build
> +++ b/tools/perf/bench/Build
> @@ -6,6 +6,7 @@ perf-y += futex-wake.o
>  perf-y += futex-wake-parallel.o
>  perf-y += futex-requeue.o
>  perf-y += futex-lock-pi.o
> +perf-y += cpu-online-map.o
>  
>  perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
>  perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
> diff --git a/tools/perf/bench/cpu-online-map.c b/tools/perf/bench/cpu-online-map.c
> new file mode 100644
> index 000000000000..4157d2220bf4
> --- /dev/null
> +++ b/tools/perf/bench/cpu-online-map.c
> @@ -0,0 +1,21 @@
> +#include <sched.h>
> +#include <unistd.h>
> +#include <err.h>
> +#include <stdlib.h>
> +#include "cpu-online-map.h"
> +
> +void compute_cpu_online_map(int *cpu_online_map)
> +{
> +	long ncpus_conf = sysconf(_SC_NPROCESSORS_CONF);
> +	cpu_set_t set;
> +	size_t j = 0;
> +
> +	CPU_ZERO(&set);
> +
> +	if (sched_getaffinity(0, sizeof(set), &set))
> +		err(EXIT_FAILURE, "sched_getaffinity");
> +
> +	for (int i = 0; i < ncpus_conf; i++)
> +		if (CPU_ISSET(i, &set))
> +			cpu_online_map[j++] = i;
> +}
> diff --git a/tools/perf/bench/cpu-online-map.h b/tools/perf/bench/cpu-online-map.h
> new file mode 100644
> index 000000000000..0620a674c6d8
> --- /dev/null
> +++ b/tools/perf/bench/cpu-online-map.h
> @@ -0,0 +1,6 @@
> +#ifndef CPU_ONLINE_MAP_H
> +#define CPU_ONLINE_MAP_H
> +
> +void compute_cpu_online_map(int *cpu_online_map);
> +
> +#endif
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index 58ae6ed8f38b..81314eea1cf5 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -24,6 +24,7 @@
>  #include <subcmd/parse-options.h>
>  #include "bench.h"
>  #include "futex.h"
> +#include "cpu-online-map.h"
>  
>  #include <err.h>
>  #include <sys/time.h>
> @@ -120,9 +121,11 @@ int bench_futex_hash(int argc, const char **argv)
>  	int ret = 0;
>  	cpu_set_t cpu;
>  	struct sigaction act;
> -	unsigned int i, ncpus;
> +	unsigned int i;
> +	long ncpus;
>  	pthread_attr_t thread_attr;
>  	struct worker *worker = NULL;
> +	int *cpu_map;
>  
>  	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
>  	if (argc) {
> @@ -132,6 +135,12 @@ int bench_futex_hash(int argc, const char **argv)
>  
>  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>  
> +	cpu_map = calloc(ncpus, sizeof(int));
> +	if (!cpu_map)
> +		goto errmem;
> +
> +	compute_cpu_online_map(cpu_map);
> +
>  	sigfillset(&act.sa_mask);
>  	act.sa_sigaction = toggle_done;
>  	sigaction(SIGINT, &act, NULL);
> @@ -164,7 +173,7 @@ int bench_futex_hash(int argc, const char **argv)
>  			goto errmem;
>  
>  		CPU_ZERO(&cpu);
> -		CPU_SET(i % ncpus, &cpu);
> +		CPU_SET(cpu_map[i % ncpus], &cpu);
>  
>  		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
>  		if (ret)
> @@ -217,6 +226,7 @@ int bench_futex_hash(int argc, const char **argv)
>  	print_summary();
>  
>  	free(worker);
> +	free(cpu_map);
>  	return ret;
>  errmem:
>  	err(EXIT_FAILURE, "calloc");
> diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
> index 08653ae8a8c4..42ab351b2c50 100644
> --- a/tools/perf/bench/futex-lock-pi.c
> +++ b/tools/perf/bench/futex-lock-pi.c
> @@ -15,6 +15,7 @@
>  #include <errno.h>
>  #include "bench.h"
>  #include "futex.h"
> +#include "cpu-online-map.h"
>  
>  #include <err.h>
>  #include <stdlib.h>
> @@ -113,7 +114,8 @@ static void *workerfn(void *arg)
>  	return NULL;
>  }
>  
> -static void create_threads(struct worker *w, pthread_attr_t thread_attr)
> +static void create_threads(struct worker *w, pthread_attr_t thread_attr,
> +			   int *cpu_map)
>  {
>  	cpu_set_t cpu;
>  	unsigned int i;
> @@ -131,7 +133,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
>  			worker[i].futex = &global_futex;
>  
>  		CPU_ZERO(&cpu);
> -		CPU_SET(i % ncpus, &cpu);
> +		CPU_SET(cpu_map[i % ncpus], &cpu);
>  
>  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> @@ -147,12 +149,18 @@ int bench_futex_lock_pi(int argc, const char **argv)
>  	unsigned int i;
>  	struct sigaction act;
>  	pthread_attr_t thread_attr;
> +	int *cpu_map;
>  
>  	argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
>  	if (argc)
>  		goto err;
>  
>  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +	cpu_map = calloc(ncpus, sizeof(int));
> +	if (!cpu_map)
> +		err(EXIT_FAILURE, "calloc");
> +
> +	compute_cpu_online_map(cpu_map);
>  
>  	sigfillset(&act.sa_mask);
>  	act.sa_sigaction = toggle_done;
> @@ -180,7 +188,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
>  	pthread_attr_init(&thread_attr);
>  	gettimeofday(&start, NULL);
>  
> -	create_threads(worker, thread_attr);
> +	create_threads(worker, thread_attr, cpu_map);
>  	pthread_attr_destroy(&thread_attr);
>  
>  	pthread_mutex_lock(&thread_lock);
> @@ -218,6 +226,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
>  	print_summary();
>  
>  	free(worker);
> +	free(cpu_map);
>  	return ret;
>  err:
>  	usage_with_options(bench_futex_lock_pi_usage, options);
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index 1058c194608a..366961b901c6 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -22,6 +22,7 @@
>  #include <errno.h>
>  #include "bench.h"
>  #include "futex.h"
> +#include "cpu-online-map.h"
>  
>  #include <err.h>
>  #include <stdlib.h>
> @@ -83,7 +84,7 @@ static void *workerfn(void *arg __maybe_unused)
>  }
>  
>  static void block_threads(pthread_t *w,
> -			  pthread_attr_t thread_attr)
> +			  pthread_attr_t thread_attr, int *cpu_map)
>  {
>  	cpu_set_t cpu;
>  	unsigned int i;
> @@ -93,7 +94,7 @@ static void block_threads(pthread_t *w,
>  	/* create and block all threads */
>  	for (i = 0; i < nthreads; i++) {
>  		CPU_ZERO(&cpu);
> -		CPU_SET(i % ncpus, &cpu);
> +		CPU_SET(cpu_map[i % ncpus], &cpu);
>  
>  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> @@ -116,12 +117,18 @@ int bench_futex_requeue(int argc, const char **argv)
>  	unsigned int i, j;
>  	struct sigaction act;
>  	pthread_attr_t thread_attr;
> +	int *cpu_map;
>  
>  	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
>  	if (argc)
>  		goto err;
>  
>  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +	cpu_map = calloc(ncpus, sizeof(int));
> +	if (!cpu_map)
> +		err(EXIT_FAILURE, "calloc");
> +
> +	compute_cpu_online_map(cpu_map);
>  
>  	sigfillset(&act.sa_mask);
>  	act.sa_sigaction = toggle_done;
> @@ -156,7 +163,7 @@ int bench_futex_requeue(int argc, const char **argv)
>  		struct timeval start, end, runtime;
>  
>  		/* create, launch & block all threads */
> -		block_threads(worker, thread_attr);
> +		block_threads(worker, thread_attr, cpu_map);
>  
>  		/* make sure all threads are already blocked */
>  		pthread_mutex_lock(&thread_lock);
> @@ -210,6 +217,7 @@ int bench_futex_requeue(int argc, const char **argv)
>  	print_summary();
>  
>  	free(worker);
> +	free(cpu_map);
>  	return ret;
>  err:
>  	usage_with_options(bench_futex_requeue_usage, options);
> diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
> index b4732dad9f89..5617bcd17e55 100644
> --- a/tools/perf/bench/futex-wake-parallel.c
> +++ b/tools/perf/bench/futex-wake-parallel.c
> @@ -21,6 +21,7 @@
>  #include <errno.h>
>  #include "bench.h"
>  #include "futex.h"
> +#include "cpu-online-map.h"
>  
>  #include <err.h>
>  #include <stdlib.h>
> @@ -119,7 +120,8 @@ static void *blocked_workerfn(void *arg __maybe_unused)
>  	return NULL;
>  }
>  
> -static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
> +static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
> +			  int *cpu_map)
>  {
>  	cpu_set_t cpu;
>  	unsigned int i;
> @@ -129,7 +131,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
>  	/* create and block all threads */
>  	for (i = 0; i < nblocked_threads; i++) {
>  		CPU_ZERO(&cpu);
> -		CPU_SET(i % ncpus, &cpu);
> +		CPU_SET(cpu_map[i % ncpus], &cpu);
>  
>  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> @@ -205,6 +207,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
>  	struct sigaction act;
>  	pthread_attr_t thread_attr;
>  	struct thread_data *waking_worker;
> +	int *cpu_map;
>  
>  	argc = parse_options(argc, argv, options,
>  			     bench_futex_wake_parallel_usage, 0);
> @@ -218,6 +221,13 @@ int bench_futex_wake_parallel(int argc, const char **argv)
>  	sigaction(SIGINT, &act, NULL);
>  
>  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +
> +	cpu_map = calloc(ncpus, sizeof(int));
> +	if (!cpu_map)
> +		err(EXIT_FAILURE, "calloc");
> +
> +	compute_cpu_online_map(cpu_map);
> +
>  	if (!nblocked_threads)
>  		nblocked_threads = ncpus;
>  
> @@ -259,7 +269,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
>  			err(EXIT_FAILURE, "calloc");
>  
>  		/* create, launch & block all threads */
> -		block_threads(blocked_worker, thread_attr);
> +		block_threads(blocked_worker, thread_attr, cpu_map);
>  
>  		/* make sure all threads are already blocked */
>  		pthread_mutex_lock(&thread_lock);
> @@ -295,5 +305,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
>  	print_summary();
>  
>  	free(blocked_worker);
> +	free(cpu_map);
>  	return ret;
>  }
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index 8c5c0b6b5c97..c0c71a769d2f 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -22,6 +22,7 @@
>  #include <errno.h>
>  #include "bench.h"
>  #include "futex.h"
> +#include "cpu-online-map.h"
>  
>  #include <err.h>
>  #include <stdlib.h>
> @@ -89,7 +90,7 @@ static void print_summary(void)
>  }
>  
>  static void block_threads(pthread_t *w,
> -			  pthread_attr_t thread_attr)
> +			  pthread_attr_t thread_attr, int *cpu_map)
>  {
>  	cpu_set_t cpu;
>  	unsigned int i;
> @@ -99,7 +100,7 @@ static void block_threads(pthread_t *w,
>  	/* create and block all threads */
>  	for (i = 0; i < nthreads; i++) {
>  		CPU_ZERO(&cpu);
> -		CPU_SET(i % ncpus, &cpu);
> +		CPU_SET(cpu_map[i % ncpus], &cpu);
>  
>  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> @@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv)
>  	unsigned int i, j;
>  	struct sigaction act;
>  	pthread_attr_t thread_attr;
> +	int *cpu_map;
>  
>  	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
>  	if (argc) {
> @@ -131,6 +133,12 @@ int bench_futex_wake(int argc, const char **argv)
>  
>  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>  
> +	cpu_map = calloc(ncpus, sizeof(int));
> +	if (! cpu_map)
> +		err(EXIT_FAILURE, "calloc");
> +
> +	compute_cpu_online_map(cpu_map);
> +
>  	sigfillset(&act.sa_mask);
>  	act.sa_sigaction = toggle_done;
>  	sigaction(SIGINT, &act, NULL);
> @@ -161,7 +169,7 @@ int bench_futex_wake(int argc, const char **argv)
>  		struct timeval start, end, runtime;
>  
>  		/* create, launch & block all threads */
> -		block_threads(worker, thread_attr);
> +		block_threads(worker, thread_attr, cpu_map);
>  
>  		/* make sure all threads are already blocked */
>  		pthread_mutex_lock(&thread_lock);
> @@ -204,5 +212,6 @@ int bench_futex_wake(int argc, const char **argv)
>  	print_summary();
>  
>  	free(worker);
> +	free(cpu_map);
>  	return ret;
>  }
> -- 
> 2.15.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] perf bench futex: benchmark only online CPUs
  2017-11-23 15:09 ` Arnaldo Carvalho de Melo
@ 2017-11-23 15:11   ` Arnaldo Carvalho de Melo
  2017-11-24 15:32     ` Davidlohr Bueso
  0 siblings, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-11-23 15:11 UTC (permalink / raw)
  To: James Yang
  Cc: Kim Phillips, Peter Zijlstra, Ingo Molnar, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Thomas Gleixner, Darren Hart,
	Colin Ian King, linux-kernel

Em Thu, Nov 23, 2017 at 12:09:48PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Wed, Nov 22, 2017 at 06:25:28PM -0600, Kim Phillips escreveu:
> > From: James Yang <james.yang@arm.com>
> > 
> > The "perf bench futex" benchmarks have a problem when not all CPUs in
> > the system are online:  perf assumes the CPUs that are online are
> > contiguously numbered and assigns processor affinity to the threads by
> > modulo striping.  When the online CPUs are not contiguously numbered,
> > perf errors out with:
> > 
> > $ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online
> > 0
> > $ ./oldperf bench futex all
> > perf: pthread_create: Operation not permitted
> > Run summary [PID 14934]: 7 threads, each operating on 1024 [private] futexes for 10 secs.
> > 
> > $
> > 
> > This patch makes perf not assume all CPUs configured are online, and
> > adds a mapping table to stripe affinity across the CPUs that are
> > online.
> 
> So, have you looked at tools/perf/util/cpumap.c? I think you can use:
> 
> 	int i;
> 	struct cpu_map *cpus = cpu_map__new(NULL);
> 
> 	for (i = 0; i < cpus->nr; ++i) {
> 		int cpu = cpus->map[i];
> 		...
> 	}
> 
> No?

But then, this can be done later, as probably will result in more
changes, I'm continuing to review the other patches.

- Arnaldo
 
> - Arnaldo
>  
> > Signed-off-by: James Yang <james.yang@arm.com>
> > Signed-off-by: Kim Phillips <kim.phillips@arm.com>
> > ---
> >  tools/perf/bench/Build                 |  1 +
> >  tools/perf/bench/cpu-online-map.c      | 21 +++++++++++++++++++++
> >  tools/perf/bench/cpu-online-map.h      |  6 ++++++
> >  tools/perf/bench/futex-hash.c          | 14 ++++++++++++--
> >  tools/perf/bench/futex-lock-pi.c       | 15 ++++++++++++---
> >  tools/perf/bench/futex-requeue.c       | 14 +++++++++++---
> >  tools/perf/bench/futex-wake-parallel.c | 17 ++++++++++++++---
> >  tools/perf/bench/futex-wake.c          | 15 ++++++++++++---
> >  8 files changed, 89 insertions(+), 14 deletions(-)
> >  create mode 100644 tools/perf/bench/cpu-online-map.c
> >  create mode 100644 tools/perf/bench/cpu-online-map.h
> > 
> > diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
> > index 60bf11943047..bc79d3577ace 100644
> > --- a/tools/perf/bench/Build
> > +++ b/tools/perf/bench/Build
> > @@ -6,6 +6,7 @@ perf-y += futex-wake.o
> >  perf-y += futex-wake-parallel.o
> >  perf-y += futex-requeue.o
> >  perf-y += futex-lock-pi.o
> > +perf-y += cpu-online-map.o
> >  
> >  perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
> >  perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
> > diff --git a/tools/perf/bench/cpu-online-map.c b/tools/perf/bench/cpu-online-map.c
> > new file mode 100644
> > index 000000000000..4157d2220bf4
> > --- /dev/null
> > +++ b/tools/perf/bench/cpu-online-map.c
> > @@ -0,0 +1,21 @@
> > +#include <sched.h>
> > +#include <unistd.h>
> > +#include <err.h>
> > +#include <stdlib.h>
> > +#include "cpu-online-map.h"
> > +
> > +void compute_cpu_online_map(int *cpu_online_map)
> > +{
> > +	long ncpus_conf = sysconf(_SC_NPROCESSORS_CONF);
> > +	cpu_set_t set;
> > +	size_t j = 0;
> > +
> > +	CPU_ZERO(&set);
> > +
> > +	if (sched_getaffinity(0, sizeof(set), &set))
> > +		err(EXIT_FAILURE, "sched_getaffinity");
> > +
> > +	for (int i = 0; i < ncpus_conf; i++)
> > +		if (CPU_ISSET(i, &set))
> > +			cpu_online_map[j++] = i;
> > +}
> > diff --git a/tools/perf/bench/cpu-online-map.h b/tools/perf/bench/cpu-online-map.h
> > new file mode 100644
> > index 000000000000..0620a674c6d8
> > --- /dev/null
> > +++ b/tools/perf/bench/cpu-online-map.h
> > @@ -0,0 +1,6 @@
> > +#ifndef CPU_ONLINE_MAP_H
> > +#define CPU_ONLINE_MAP_H
> > +
> > +void compute_cpu_online_map(int *cpu_online_map);
> > +
> > +#endif
> > diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> > index 58ae6ed8f38b..81314eea1cf5 100644
> > --- a/tools/perf/bench/futex-hash.c
> > +++ b/tools/perf/bench/futex-hash.c
> > @@ -24,6 +24,7 @@
> >  #include <subcmd/parse-options.h>
> >  #include "bench.h"
> >  #include "futex.h"
> > +#include "cpu-online-map.h"
> >  
> >  #include <err.h>
> >  #include <sys/time.h>
> > @@ -120,9 +121,11 @@ int bench_futex_hash(int argc, const char **argv)
> >  	int ret = 0;
> >  	cpu_set_t cpu;
> >  	struct sigaction act;
> > -	unsigned int i, ncpus;
> > +	unsigned int i;
> > +	long ncpus;
> >  	pthread_attr_t thread_attr;
> >  	struct worker *worker = NULL;
> > +	int *cpu_map;
> >  
> >  	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
> >  	if (argc) {
> > @@ -132,6 +135,12 @@ int bench_futex_hash(int argc, const char **argv)
> >  
> >  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> >  
> > +	cpu_map = calloc(ncpus, sizeof(int));
> > +	if (!cpu_map)
> > +		goto errmem;
> > +
> > +	compute_cpu_online_map(cpu_map);
> > +
> >  	sigfillset(&act.sa_mask);
> >  	act.sa_sigaction = toggle_done;
> >  	sigaction(SIGINT, &act, NULL);
> > @@ -164,7 +173,7 @@ int bench_futex_hash(int argc, const char **argv)
> >  			goto errmem;
> >  
> >  		CPU_ZERO(&cpu);
> > -		CPU_SET(i % ncpus, &cpu);
> > +		CPU_SET(cpu_map[i % ncpus], &cpu);
> >  
> >  		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
> >  		if (ret)
> > @@ -217,6 +226,7 @@ int bench_futex_hash(int argc, const char **argv)
> >  	print_summary();
> >  
> >  	free(worker);
> > +	free(cpu_map);
> >  	return ret;
> >  errmem:
> >  	err(EXIT_FAILURE, "calloc");
> > diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
> > index 08653ae8a8c4..42ab351b2c50 100644
> > --- a/tools/perf/bench/futex-lock-pi.c
> > +++ b/tools/perf/bench/futex-lock-pi.c
> > @@ -15,6 +15,7 @@
> >  #include <errno.h>
> >  #include "bench.h"
> >  #include "futex.h"
> > +#include "cpu-online-map.h"
> >  
> >  #include <err.h>
> >  #include <stdlib.h>
> > @@ -113,7 +114,8 @@ static void *workerfn(void *arg)
> >  	return NULL;
> >  }
> >  
> > -static void create_threads(struct worker *w, pthread_attr_t thread_attr)
> > +static void create_threads(struct worker *w, pthread_attr_t thread_attr,
> > +			   int *cpu_map)
> >  {
> >  	cpu_set_t cpu;
> >  	unsigned int i;
> > @@ -131,7 +133,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
> >  			worker[i].futex = &global_futex;
> >  
> >  		CPU_ZERO(&cpu);
> > -		CPU_SET(i % ncpus, &cpu);
> > +		CPU_SET(cpu_map[i % ncpus], &cpu);
> >  
> >  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
> >  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> > @@ -147,12 +149,18 @@ int bench_futex_lock_pi(int argc, const char **argv)
> >  	unsigned int i;
> >  	struct sigaction act;
> >  	pthread_attr_t thread_attr;
> > +	int *cpu_map;
> >  
> >  	argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
> >  	if (argc)
> >  		goto err;
> >  
> >  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > +	cpu_map = calloc(ncpus, sizeof(int));
> > +	if (!cpu_map)
> > +		err(EXIT_FAILURE, "calloc");
> > +
> > +	compute_cpu_online_map(cpu_map);
> >  
> >  	sigfillset(&act.sa_mask);
> >  	act.sa_sigaction = toggle_done;
> > @@ -180,7 +188,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
> >  	pthread_attr_init(&thread_attr);
> >  	gettimeofday(&start, NULL);
> >  
> > -	create_threads(worker, thread_attr);
> > +	create_threads(worker, thread_attr, cpu_map);
> >  	pthread_attr_destroy(&thread_attr);
> >  
> >  	pthread_mutex_lock(&thread_lock);
> > @@ -218,6 +226,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
> >  	print_summary();
> >  
> >  	free(worker);
> > +	free(cpu_map);
> >  	return ret;
> >  err:
> >  	usage_with_options(bench_futex_lock_pi_usage, options);
> > diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> > index 1058c194608a..366961b901c6 100644
> > --- a/tools/perf/bench/futex-requeue.c
> > +++ b/tools/perf/bench/futex-requeue.c
> > @@ -22,6 +22,7 @@
> >  #include <errno.h>
> >  #include "bench.h"
> >  #include "futex.h"
> > +#include "cpu-online-map.h"
> >  
> >  #include <err.h>
> >  #include <stdlib.h>
> > @@ -83,7 +84,7 @@ static void *workerfn(void *arg __maybe_unused)
> >  }
> >  
> >  static void block_threads(pthread_t *w,
> > -			  pthread_attr_t thread_attr)
> > +			  pthread_attr_t thread_attr, int *cpu_map)
> >  {
> >  	cpu_set_t cpu;
> >  	unsigned int i;
> > @@ -93,7 +94,7 @@ static void block_threads(pthread_t *w,
> >  	/* create and block all threads */
> >  	for (i = 0; i < nthreads; i++) {
> >  		CPU_ZERO(&cpu);
> > -		CPU_SET(i % ncpus, &cpu);
> > +		CPU_SET(cpu_map[i % ncpus], &cpu);
> >  
> >  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
> >  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> > @@ -116,12 +117,18 @@ int bench_futex_requeue(int argc, const char **argv)
> >  	unsigned int i, j;
> >  	struct sigaction act;
> >  	pthread_attr_t thread_attr;
> > +	int *cpu_map;
> >  
> >  	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
> >  	if (argc)
> >  		goto err;
> >  
> >  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > +	cpu_map = calloc(ncpus, sizeof(int));
> > +	if (!cpu_map)
> > +		err(EXIT_FAILURE, "calloc");
> > +
> > +	compute_cpu_online_map(cpu_map);
> >  
> >  	sigfillset(&act.sa_mask);
> >  	act.sa_sigaction = toggle_done;
> > @@ -156,7 +163,7 @@ int bench_futex_requeue(int argc, const char **argv)
> >  		struct timeval start, end, runtime;
> >  
> >  		/* create, launch & block all threads */
> > -		block_threads(worker, thread_attr);
> > +		block_threads(worker, thread_attr, cpu_map);
> >  
> >  		/* make sure all threads are already blocked */
> >  		pthread_mutex_lock(&thread_lock);
> > @@ -210,6 +217,7 @@ int bench_futex_requeue(int argc, const char **argv)
> >  	print_summary();
> >  
> >  	free(worker);
> > +	free(cpu_map);
> >  	return ret;
> >  err:
> >  	usage_with_options(bench_futex_requeue_usage, options);
> > diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
> > index b4732dad9f89..5617bcd17e55 100644
> > --- a/tools/perf/bench/futex-wake-parallel.c
> > +++ b/tools/perf/bench/futex-wake-parallel.c
> > @@ -21,6 +21,7 @@
> >  #include <errno.h>
> >  #include "bench.h"
> >  #include "futex.h"
> > +#include "cpu-online-map.h"
> >  
> >  #include <err.h>
> >  #include <stdlib.h>
> > @@ -119,7 +120,8 @@ static void *blocked_workerfn(void *arg __maybe_unused)
> >  	return NULL;
> >  }
> >  
> > -static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
> > +static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
> > +			  int *cpu_map)
> >  {
> >  	cpu_set_t cpu;
> >  	unsigned int i;
> > @@ -129,7 +131,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
> >  	/* create and block all threads */
> >  	for (i = 0; i < nblocked_threads; i++) {
> >  		CPU_ZERO(&cpu);
> > -		CPU_SET(i % ncpus, &cpu);
> > +		CPU_SET(cpu_map[i % ncpus], &cpu);
> >  
> >  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
> >  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> > @@ -205,6 +207,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
> >  	struct sigaction act;
> >  	pthread_attr_t thread_attr;
> >  	struct thread_data *waking_worker;
> > +	int *cpu_map;
> >  
> >  	argc = parse_options(argc, argv, options,
> >  			     bench_futex_wake_parallel_usage, 0);
> > @@ -218,6 +221,13 @@ int bench_futex_wake_parallel(int argc, const char **argv)
> >  	sigaction(SIGINT, &act, NULL);
> >  
> >  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > +
> > +	cpu_map = calloc(ncpus, sizeof(int));
> > +	if (!cpu_map)
> > +		err(EXIT_FAILURE, "calloc");
> > +
> > +	compute_cpu_online_map(cpu_map);
> > +
> >  	if (!nblocked_threads)
> >  		nblocked_threads = ncpus;
> >  
> > @@ -259,7 +269,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
> >  			err(EXIT_FAILURE, "calloc");
> >  
> >  		/* create, launch & block all threads */
> > -		block_threads(blocked_worker, thread_attr);
> > +		block_threads(blocked_worker, thread_attr, cpu_map);
> >  
> >  		/* make sure all threads are already blocked */
> >  		pthread_mutex_lock(&thread_lock);
> > @@ -295,5 +305,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
> >  	print_summary();
> >  
> >  	free(blocked_worker);
> > +	free(cpu_map);
> >  	return ret;
> >  }
> > diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> > index 8c5c0b6b5c97..c0c71a769d2f 100644
> > --- a/tools/perf/bench/futex-wake.c
> > +++ b/tools/perf/bench/futex-wake.c
> > @@ -22,6 +22,7 @@
> >  #include <errno.h>
> >  #include "bench.h"
> >  #include "futex.h"
> > +#include "cpu-online-map.h"
> >  
> >  #include <err.h>
> >  #include <stdlib.h>
> > @@ -89,7 +90,7 @@ static void print_summary(void)
> >  }
> >  
> >  static void block_threads(pthread_t *w,
> > -			  pthread_attr_t thread_attr)
> > +			  pthread_attr_t thread_attr, int *cpu_map)
> >  {
> >  	cpu_set_t cpu;
> >  	unsigned int i;
> > @@ -99,7 +100,7 @@ static void block_threads(pthread_t *w,
> >  	/* create and block all threads */
> >  	for (i = 0; i < nthreads; i++) {
> >  		CPU_ZERO(&cpu);
> > -		CPU_SET(i % ncpus, &cpu);
> > +		CPU_SET(cpu_map[i % ncpus], &cpu);
> >  
> >  		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
> >  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> > @@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv)
> >  	unsigned int i, j;
> >  	struct sigaction act;
> >  	pthread_attr_t thread_attr;
> > +	int *cpu_map;
> >  
> >  	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
> >  	if (argc) {
> > @@ -131,6 +133,12 @@ int bench_futex_wake(int argc, const char **argv)
> >  
> >  	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> >  
> > +	cpu_map = calloc(ncpus, sizeof(int));
> > +	if (! cpu_map)
> > +		err(EXIT_FAILURE, "calloc");
> > +
> > +	compute_cpu_online_map(cpu_map);
> > +
> >  	sigfillset(&act.sa_mask);
> >  	act.sa_sigaction = toggle_done;
> >  	sigaction(SIGINT, &act, NULL);
> > @@ -161,7 +169,7 @@ int bench_futex_wake(int argc, const char **argv)
> >  		struct timeval start, end, runtime;
> >  
> >  		/* create, launch & block all threads */
> > -		block_threads(worker, thread_attr);
> > +		block_threads(worker, thread_attr, cpu_map);
> >  
> >  		/* make sure all threads are already blocked */
> >  		pthread_mutex_lock(&thread_lock);
> > @@ -204,5 +212,6 @@ int bench_futex_wake(int argc, const char **argv)
> >  	print_summary();
> >  
> >  	free(worker);
> > +	free(cpu_map);
> >  	return ret;
> >  }
> > -- 
> > 2.15.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] perf bench futex: benchmark only online CPUs
  2017-11-23 15:11   ` Arnaldo Carvalho de Melo
@ 2017-11-24 15:32     ` Davidlohr Bueso
  2017-11-24 19:08       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 6+ messages in thread
From: Davidlohr Bueso @ 2017-11-24 15:32 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: James Yang, Kim Phillips, Peter Zijlstra, Ingo Molnar,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, Thomas Gleixner,
	Darren Hart, Colin Ian King, linux-kernel

On Thu, 23 Nov 2017, Arnaldo Carvalho de Melo wrote:

>Em Thu, Nov 23, 2017 at 12:09:48PM -0300, Arnaldo Carvalho de Melo escreveu:
>> Em Wed, Nov 22, 2017 at 06:25:28PM -0600, Kim Phillips escreveu:
>> > From: James Yang <james.yang@arm.com>
>> >
>> > The "perf bench futex" benchmarks have a problem when not all CPUs in
>> > the system are online:  perf assumes the CPUs that are online are
>> > contiguously numbered and assigns processor affinity to the threads by
>> > modulo striping.  When the online CPUs are not contiguously numbered,
>> > perf errors out with:

Good catch. Non contiguously numbered cpus was certainly not something I had
considered.

>> >
>> > $ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online
>> > 0
>> > $ ./oldperf bench futex all
>> > perf: pthread_create: Operation not permitted
>> > Run summary [PID 14934]: 7 threads, each operating on 1024 [private] futexes for 10 secs.
>> >
>> > $
>> >
>> > This patch makes perf not assume all CPUs configured are online, and
>> > adds a mapping table to stripe affinity across the CPUs that are
>> > online.
>>
>> So, have you looked at tools/perf/util/cpumap.c? I think you can use:
>>
>> 	int i;
>> 	struct cpu_map *cpus = cpu_map__new(NULL);
>>
>> 	for (i = 0; i < cpus->nr; ++i) {
>> 		int cpu = cpus->map[i];
>> 		...
>> 	}
>>
>> No?

Ah, I was just thinking there should be something like this in perf already :)

>
>But then, this can be done later, as probably will result in more
>changes, I'm continuing to review the other patches.

Unsure if you're implying otherwise, but I would strongly prefer a v2 was sent
to use the perf's cpumap.c.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] perf bench futex: benchmark only online CPUs
  2017-11-24 15:32     ` Davidlohr Bueso
@ 2017-11-24 19:08       ` Arnaldo Carvalho de Melo
  2017-11-24 21:30         ` James Yang
  0 siblings, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-11-24 19:08 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: James Yang, Kim Phillips, Peter Zijlstra, Ingo Molnar,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, Thomas Gleixner,
	Darren Hart, Colin Ian King, linux-kernel

Em Fri, Nov 24, 2017 at 07:32:49AM -0800, Davidlohr Bueso escreveu:
> On Thu, 23 Nov 2017, Arnaldo Carvalho de Melo wrote:
> 
> > Em Thu, Nov 23, 2017 at 12:09:48PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Wed, Nov 22, 2017 at 06:25:28PM -0600, Kim Phillips escreveu:
> > > > From: James Yang <james.yang@arm.com>
> > > >
> > > > The "perf bench futex" benchmarks have a problem when not all CPUs in
> > > > the system are online:  perf assumes the CPUs that are online are
> > > > contiguously numbered and assigns processor affinity to the threads by
> > > > modulo striping.  When the online CPUs are not contiguously numbered,
> > > > perf errors out with:
> 
> Good catch. Non contiguously numbered cpus was certainly not something I had
> considered.
> 
> > > >
> > > > $ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online
> > > > 0
> > > > $ ./oldperf bench futex all
> > > > perf: pthread_create: Operation not permitted
> > > > Run summary [PID 14934]: 7 threads, each operating on 1024 [private] futexes for 10 secs.
> > > >
> > > > $
> > > >
> > > > This patch makes perf not assume all CPUs configured are online, and
> > > > adds a mapping table to stripe affinity across the CPUs that are
> > > > online.
> > > 
> > > So, have you looked at tools/perf/util/cpumap.c? I think you can use:
> > > 
> > > 	int i;
> > > 	struct cpu_map *cpus = cpu_map__new(NULL);
> > > 
> > > 	for (i = 0; i < cpus->nr; ++i) {
> > > 		int cpu = cpus->map[i];
> > > 		...
> > > 	}
> > > 
> > > No?
> 
> Ah, I was just thinking there should be something like this in perf already :)
> 
> > 
> > But then, this can be done later, as probably will result in more
> > changes, I'm continuing to review the other patches.
> 
> Unsure if you're implying otherwise, but I would strongly prefer a v2 was sent
> to use the perf's cpumap.c.

I try not to ask too much from people, i.e. their work already improves
the current situation, for their use case, so could be applied, but
yeah, it would be better, if James (or somebody else) is willing to try
and use the perf cpumap infrastructure to reduce the bloat and actually
validate even more it, James?

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/3] perf bench futex: benchmark only online CPUs
  2017-11-24 19:08       ` Arnaldo Carvalho de Melo
@ 2017-11-24 21:30         ` James Yang
  0 siblings, 0 replies; 6+ messages in thread
From: James Yang @ 2017-11-24 21:30 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Davidlohr Bueso
  Cc: Kim Phillips, Peter Zijlstra, Ingo Molnar, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Thomas Gleixner, Darren Hart,
	Colin Ian King, linux-kernel

> -----Original Message-----
> From: Arnaldo Carvalho de Melo [mailto:acme@kernel.org]
> Sent: Friday, November 24, 2017 1:09 PM
> To: Davidlohr Bueso <dave@stgolabs.net>
> Cc: James Yang <James.Yang@arm.com>; Kim Phillips <Kim.Phillips@arm.com>;
> Peter Zijlstra <peterz@infradead.org>; Ingo Molnar <mingo@redhat.com>;
> Alexander Shishkin <alexander.shishkin@linux.intel.com>; Jiri Olsa
> <jolsa@redhat.com>; Namhyung Kim <namhyung@kernel.org>; Thomas Gleixner
> <tglx@linutronix.de>; Darren Hart <dvhart@infradead.org>; Colin Ian King
> <colin.king@canonical.com>; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 1/3] perf bench futex: benchmark only online CPUs
>
> I try not to ask too much from people, i.e. their work already improves
> the current situation, for their use case, so could be applied, but
> yeah, it would be better, if James (or somebody else) is willing to try
> and use the perf cpumap infrastructure to reduce the bloat and actually
> validate even more it, James?
>
> - Arnaldo

Sorry for the delayed response.  We are on holiday.

These patches were written over a year ago.  I did not send it to lkml.

I did not continue development because I remember seeing an equivalent fix in another patch or a newer kernel than the one against which I wrote the patch.

I am busy with other things at this time, so if Kim (or someone else) wants to rewrite the patch to use cpumap, he should do it because I don't have the time to do it.
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-11-24 21:30 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-23  0:25 [PATCH 1/3] perf bench futex: benchmark only online CPUs Kim Phillips
2017-11-23 15:09 ` Arnaldo Carvalho de Melo
2017-11-23 15:11   ` Arnaldo Carvalho de Melo
2017-11-24 15:32     ` Davidlohr Bueso
2017-11-24 19:08       ` Arnaldo Carvalho de Melo
2017-11-24 21:30         ` James Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.