All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -tip 0/3] perf bench futex: Improvements
@ 2017-11-27  4:20 Davidlohr Bueso
  2017-11-27  4:20 ` [PATCH 1/3] perf bench futex: Use cpumaps Davidlohr Bueso
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2017-11-27  4:20 UTC (permalink / raw)
  To: acme; +Cc: james.yang, kim.phillips, dave, linux-kernel

Hi,

I'm resending the patches from James Yang and Kim Phillips that
improve the perf bench futex benchmarks. Noticibly:

patch1 makes use of util/cpumap.c
patch2/3 are the same as the original only that I've split
it into two and removed some bogus debug noise.

Davidlohr Bueso (3):
  perf bench futex: Use cpumaps
  perf bench futex: Add --affine-wakers option to wake-parallel
  perf bench futex: sync waker threads

 tools/perf/bench/futex-hash.c          | 19 ++++++++-----
 tools/perf/bench/futex-lock-pi.c       | 23 +++++++++------
 tools/perf/bench/futex-requeue.c       | 22 +++++++++------
 tools/perf/bench/futex-wake-parallel.c | 51 ++++++++++++++++++++++++++--------
 tools/perf/bench/futex-wake.c          | 18 +++++++-----
 5 files changed, 90 insertions(+), 43 deletions(-)

-- 
2.13.6

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/3] perf bench futex: Use cpumaps
  2017-11-27  4:20 [PATCH -tip 0/3] perf bench futex: Improvements Davidlohr Bueso
@ 2017-11-27  4:20 ` Davidlohr Bueso
  2017-12-06 16:34   ` [tip:perf/core] " tip-bot for Davidlohr Bueso
  2017-11-27  4:21 ` [PATCH 2/3] perf bench futex: Add --affine-wakers option to wake-parallel Davidlohr Bueso
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Davidlohr Bueso @ 2017-11-27  4:20 UTC (permalink / raw)
  To: acme
  Cc: james.yang, kim.phillips, dave, linux-kernel, Kim Phillips,
	Davidlohr Bueso

It was reported that the whole futex bench breaks when
dealing with non-contiguously numbered cpus.

$ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online
$ ./perf bench futex all
 perf: pthread_create: Operation not permitted
 Run summary [PID 14934]: 7 threads, each ....

James had implemented an approach with cpumaps that use an
in house flavor. Instead of re-inventing the wheel, I've
redone the patch such that we use the perf's util/cpumap.c
interface instead.

Applies to all futex benchmarks.

Cc: Kim Phillips <Kim.Phillips@arm.com>
Originally-from: James Yang <James.Yang@arm.com>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-hash.c          | 19 ++++++++++++-------
 tools/perf/bench/futex-lock-pi.c       | 23 ++++++++++++++---------
 tools/perf/bench/futex-requeue.c       | 22 +++++++++++++---------
 tools/perf/bench/futex-wake-parallel.c | 24 +++++++++++++++---------
 tools/perf/bench/futex-wake.c          | 18 +++++++++++-------
 5 files changed, 65 insertions(+), 41 deletions(-)

diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fe16b310097f..230d7d5fc6e4 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -23,6 +23,7 @@
 #include <subcmd/parse-options.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <sys/time.h>
@@ -117,11 +118,12 @@ static void print_summary(void)
 int bench_futex_hash(int argc, const char **argv)
 {
 	int ret = 0;
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	struct sigaction act;
-	unsigned int i, ncpus;
+	unsigned int i;
 	pthread_attr_t thread_attr;
 	struct worker *worker = NULL;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
 	if (argc) {
@@ -129,14 +131,16 @@ int bench_futex_hash(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		goto errmem;
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
 	if (!nthreads) /* default to the number of CPUs */
-		nthreads = ncpus;
+		nthreads = cpu->nr;
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)
@@ -162,10 +166,10 @@ int bench_futex_hash(int argc, const char **argv)
 		if (!worker[i].futex)
 			goto errmem;
 
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
+		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
 		if (ret)
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
@@ -216,6 +220,7 @@ int bench_futex_hash(int argc, const char **argv)
 	print_summary();
 
 	free(worker);
+	free(cpu);
 	return ret;
 errmem:
 	err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 73a1c44ea63c..0868444c9e67 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -31,7 +32,7 @@ static struct worker *worker;
 static unsigned int nsecs = 10;
 static bool silent = false, multi = false;
 static bool done = false, fshared = false;
-static unsigned int ncpus, nthreads = 0;
+static unsigned int nthreads = 0;
 static int futex_flag = 0;
 struct timeval start, end, runtime;
 static pthread_mutex_t thread_lock;
@@ -112,9 +113,10 @@ static void *workerfn(void *arg)
 	return NULL;
 }
 
-static void create_threads(struct worker *w, pthread_attr_t thread_attr)
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+			   struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nthreads;
@@ -129,10 +131,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
 		} else
 			worker[i].futex = &global_futex;
 
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
@@ -146,19 +148,22 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	unsigned int i;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
 	if (argc)
 		goto err;
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
 	if (!nthreads)
-		nthreads = ncpus;
+		nthreads = cpu->nr;
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)
@@ -179,7 +184,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_attr_init(&thread_attr);
 	gettimeofday(&start, NULL);
 
-	create_threads(worker, thread_attr);
+	create_threads(worker, thread_attr, cpu);
 	pthread_attr_destroy(&thread_attr);
 
 	pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 41786cbea24c..69f3bbc765d1 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -21,6 +21,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -39,7 +40,7 @@ static bool done = false, silent = false, fshared = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats requeuetime_stats, requeued_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
 static int futex_flag = 0;
 
 static const struct option options[] = {
@@ -82,19 +83,19 @@ static void *workerfn(void *arg __maybe_unused)
 }
 
 static void block_threads(pthread_t *w,
-			  pthread_attr_t thread_attr)
+			  pthread_attr_t thread_attr, struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nthreads;
 
 	/* create and block all threads */
 	for (i = 0; i < nthreads; i++) {
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -115,19 +116,22 @@ int bench_futex_requeue(int argc, const char **argv)
 	unsigned int i, j;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
 	if (argc)
 		goto err;
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "cpu_map__new");
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
 	if (!nthreads)
-		nthreads = ncpus;
+		nthreads = cpu->nr;
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)
@@ -155,7 +159,7 @@ int bench_futex_requeue(int argc, const char **argv)
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
-		block_threads(worker, thread_attr);
+		block_threads(worker, thread_attr, cpu);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 4ab12c8e016a..979e303e4797 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -20,6 +20,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -42,7 +43,7 @@ static unsigned int nblocked_threads = 0, nwaking_threads = 0;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting;
+static unsigned int threads_starting;
 static int futex_flag = 0;
 
 static const struct option options[] = {
@@ -118,19 +119,20 @@ static void *blocked_workerfn(void *arg __maybe_unused)
 	return NULL;
 }
 
-static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
+			  struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nblocked_threads;
 
 	/* create and block all threads */
 	for (i = 0; i < nblocked_threads; i++) {
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
@@ -204,6 +206,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	struct sigaction act;
 	pthread_attr_t thread_attr;
 	struct thread_data *waking_worker;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options,
 			     bench_futex_wake_parallel_usage, 0);
@@ -216,9 +219,12 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
+
 	if (!nblocked_threads)
-		nblocked_threads = ncpus;
+		nblocked_threads = cpu->nr;
 
 	/* some sanity checks */
 	if (nwaking_threads > nblocked_threads || !nwaking_threads)
@@ -258,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 			err(EXIT_FAILURE, "calloc");
 
 		/* create, launch & block all threads */
-		block_threads(blocked_worker, thread_attr);
+		block_threads(blocked_worker, thread_attr, cpu);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 2fa49222ef8d..61877b83719a 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -21,6 +21,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -88,19 +89,19 @@ static void print_summary(void)
 }
 
 static void block_threads(pthread_t *w,
-			  pthread_attr_t thread_attr)
+			  pthread_attr_t thread_attr, struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nthreads;
 
 	/* create and block all threads */
 	for (i = 0; i < nthreads; i++) {
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -121,6 +122,7 @@ int bench_futex_wake(int argc, const char **argv)
 	unsigned int i, j;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
 	if (argc) {
@@ -128,7 +130,9 @@ int bench_futex_wake(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
@@ -160,7 +164,7 @@ int bench_futex_wake(int argc, const char **argv)
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
-		block_threads(worker, thread_attr);
+		block_threads(worker, thread_attr, cpu);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] perf bench futex: Add --affine-wakers option to wake-parallel
  2017-11-27  4:20 [PATCH -tip 0/3] perf bench futex: Improvements Davidlohr Bueso
  2017-11-27  4:20 ` [PATCH 1/3] perf bench futex: Use cpumaps Davidlohr Bueso
@ 2017-11-27  4:21 ` Davidlohr Bueso
  2017-12-06 17:24   ` Davidlohr Bueso
  2017-11-27  4:21 ` [PATCH 3/3] perf bench futex: sync waker threads Davidlohr Bueso
  2017-11-28  3:20 ` [PATCH -tip 0/3] perf bench futex: Improvements Kim Phillips
  3 siblings, 1 reply; 9+ messages in thread
From: Davidlohr Bueso @ 2017-11-27  4:21 UTC (permalink / raw)
  To: acme
  Cc: james.yang, kim.phillips, dave, linux-kernel, Kim Phillips,
	Davidlohr Bueso

From: James Yang <james.yang@arm.com>

The waker threads' processor affinity is not specified, so
the result has run-to-run variability as the scheduler
decides on which CPUs they are to run.  So we add a
-W/--affine-wakers flag to stripe the affinity of the
waker threads across the online CPUs instead of having
the scheduler place them.

Cc: Kim Phillips <Kim.Phillips@arm.com>
Signed-off-by: James Yang <james.yang@arm.com>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-wake-parallel.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 979e303e4797..c04e207ea37c 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -39,6 +39,7 @@ static u_int32_t futex = 0;
 
 static pthread_t *blocked_worker;
 static bool done = false, silent = false, fshared = false;
+static bool affine_wakers = false;
 static unsigned int nblocked_threads = 0, nwaking_threads = 0;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
@@ -51,6 +52,7 @@ static const struct option options[] = {
 	OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
 	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
 	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'W', "affine-wakers", &affine_wakers, "Stripe affinity of waker threads across CPUs"),
 	OPT_END()
 };
 
@@ -78,7 +80,8 @@ static void *waking_workerfn(void *arg)
 	return NULL;
 }
 
-static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr,
+			   struct cpu_map *cpu)
 {
 	unsigned int i;
 
@@ -91,6 +94,17 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
 		 * as it will affect the order to acquire the hb spinlock.
 		 * For now let the scheduler decide.
 		 */
+		if (affine_wakers) {
+			cpu_set_t cpuset;
+			CPU_ZERO(&cpuset);
+			CPU_SET(cpu->map[(i + 1) % cpu->nr], &cpuset);
+
+			if (pthread_attr_setaffinity_np(&thread_attr,
+							sizeof(cpu_set_t),
+							&cpuset))
+				err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+		}
+
 		if (pthread_create(&td[i].worker, &thread_attr,
 				   waking_workerfn, (void *)&td[i]))
 			err(EXIT_FAILURE, "pthread_create");
@@ -276,7 +290,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 		usleep(100000);
 
 		/* Ok, all threads are patiently blocked, start waking folks up */
-		wakeup_threads(waking_worker, thread_attr);
+		wakeup_threads(waking_worker, thread_attr, cpu);
 
 		for (i = 0; i < nblocked_threads; i++) {
 			ret = pthread_join(blocked_worker[i], NULL);
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] perf bench futex: sync waker threads
  2017-11-27  4:20 [PATCH -tip 0/3] perf bench futex: Improvements Davidlohr Bueso
  2017-11-27  4:20 ` [PATCH 1/3] perf bench futex: Use cpumaps Davidlohr Bueso
  2017-11-27  4:21 ` [PATCH 2/3] perf bench futex: Add --affine-wakers option to wake-parallel Davidlohr Bueso
@ 2017-11-27  4:21 ` Davidlohr Bueso
  2017-12-06 16:35   ` [tip:perf/core] perf bench futex: Sync " tip-bot for James Yang
  2017-11-28  3:20 ` [PATCH -tip 0/3] perf bench futex: Improvements Kim Phillips
  3 siblings, 1 reply; 9+ messages in thread
From: Davidlohr Bueso @ 2017-11-27  4:21 UTC (permalink / raw)
  To: acme; +Cc: james.yang, kim.phillips, dave, linux-kernel, Kim Phillips

From: James Yang <james.yang@arm.com>

Waker threads in the futex wake-parallel benchmark are started by a
loop using pthread_create().  However, there is no synchronization
for when the waker threads wake the waiting threads.  Comparison of
the waker threads' measurement timestamps show they are not all
running concurrently because older waker threads finish their task
before newer waker threads even start.

This patch uses a barrier to better synchronize the waker threads.

Cc: Kim Phillips <Kim.Phillips@arm.com>
Signed-off-by: James Yang <james.yang@arm.com
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 tools/perf/bench/futex-wake-parallel.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index c04e207ea37c..55bff8673d31 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -43,6 +43,7 @@ static bool affine_wakers = false;
 static unsigned int nblocked_threads = 0, nwaking_threads = 0;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
+static pthread_barrier_t barrier;
 static struct stats waketime_stats, wakeup_stats;
 static unsigned int threads_starting;
 static int futex_flag = 0;
@@ -66,6 +67,8 @@ static void *waking_workerfn(void *arg)
 	struct thread_data *waker = (struct thread_data *) arg;
 	struct timeval start, end;
 
+	pthread_barrier_wait(&barrier);
+
 	gettimeofday(&start, NULL);
 
 	waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
@@ -87,6 +90,8 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr,
 
 	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
 
+	pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+
 	/* create and block all threads */
 	for (i = 0; i < nwaking_threads; i++) {
 		/*
@@ -110,9 +115,13 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr,
 			err(EXIT_FAILURE, "pthread_create");
 	}
 
+	pthread_barrier_wait(&barrier);
+
 	for (i = 0; i < nwaking_threads; i++)
 		if (pthread_join(td[i].worker, NULL))
 			err(EXIT_FAILURE, "pthread_join");
+
+	pthread_barrier_destroy(&barrier);
 }
 
 static void *blocked_workerfn(void *arg __maybe_unused)
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH -tip 0/3] perf bench futex: Improvements
  2017-11-27  4:20 [PATCH -tip 0/3] perf bench futex: Improvements Davidlohr Bueso
                   ` (2 preceding siblings ...)
  2017-11-27  4:21 ` [PATCH 3/3] perf bench futex: sync waker threads Davidlohr Bueso
@ 2017-11-28  3:20 ` Kim Phillips
  2017-11-30 17:00   ` Arnaldo Carvalho de Melo
  3 siblings, 1 reply; 9+ messages in thread
From: Kim Phillips @ 2017-11-28  3:20 UTC (permalink / raw)
  To: Davidlohr Bueso; +Cc: acme, james.yang, linux-kernel

On Sun, 26 Nov 2017 20:20:58 -0800
Davidlohr Bueso <dave@stgolabs.net> wrote:

> I'm resending the patches from James Yang and Kim Phillips that
> improve the perf bench futex benchmarks. Noticibly:

Thanks!

> patch1 makes use of util/cpumap.c
> patch2/3 are the same as the original only that I've split
> it into two and removed some bogus debug noise.
> 
> Davidlohr Bueso (3):
>   perf bench futex: Use cpumaps
>   perf bench futex: Add --affine-wakers option to wake-parallel
>   perf bench futex: sync waker threads

They all look good to me.

I'm resubmitting 3/3 - the jvmti one - separately now.

Thanks,

Kim

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH -tip 0/3] perf bench futex: Improvements
  2017-11-28  3:20 ` [PATCH -tip 0/3] perf bench futex: Improvements Kim Phillips
@ 2017-11-30 17:00   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 9+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-11-30 17:00 UTC (permalink / raw)
  To: Kim Phillips; +Cc: Davidlohr Bueso, james.yang, linux-kernel

Em Mon, Nov 27, 2017 at 09:20:05PM -0600, Kim Phillips escreveu:
> On Sun, 26 Nov 2017 20:20:58 -0800
> Davidlohr Bueso <dave@stgolabs.net> wrote:
> 
> > I'm resending the patches from James Yang and Kim Phillips that
> > improve the perf bench futex benchmarks. Noticibly:
> 
> Thanks!
> > patch1 makes use of util/cpumap.c
> > patch2/3 are the same as the original only that I've split
> > it into two and removed some bogus debug noise.
> > 
> > Davidlohr Bueso (3):
> >   perf bench futex: Use cpumaps
> >   perf bench futex: Add --affine-wakers option to wake-parallel
> >   perf bench futex: sync waker threads
> 
> They all look good to me.

Taking this as an Acked-by.

- Arnaldo
 
> 
> I'm resubmitting 3/3 - the jvmti one - separately now.
> 
> Thanks,
> 
> Kim

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tip:perf/core] perf bench futex: Use cpumaps
  2017-11-27  4:20 ` [PATCH 1/3] perf bench futex: Use cpumaps Davidlohr Bueso
@ 2017-12-06 16:34   ` tip-bot for Davidlohr Bueso
  0 siblings, 0 replies; 9+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2017-12-06 16:34 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, dbueso, hpa, dave, acme, linux-kernel, mingo, kim.phillips

Commit-ID:  3b2323c2c1c4acf8961cfcdddcee9889daaa21e3
Gitweb:     https://git.kernel.org/tip/3b2323c2c1c4acf8961cfcdddcee9889daaa21e3
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Sun, 26 Nov 2017 20:20:59 -0800
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Thu, 30 Nov 2017 14:02:05 -0300

perf bench futex: Use cpumaps

It was reported that the whole futex bench breaks when dealing with
non-contiguously numbered cpus.

$ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online
$ ./perf bench futex all
 perf: pthread_create: Operation not permitted
 Run summary [PID 14934]: 7 threads, each ....

James had implemented an approach with cpumaps that use an in house
flavor. Instead of re-inventing the wheel, I've redone the patch such
that we use the perf's util/cpumap.c interface instead.

Applies to all futex benchmarks.

Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Originally-from: James Yang <james.yang@arm.com>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/r/20171127042101.3659-2-dave@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/futex-hash.c          | 19 ++++++++++++-------
 tools/perf/bench/futex-lock-pi.c       | 23 ++++++++++++++---------
 tools/perf/bench/futex-requeue.c       | 22 +++++++++++++---------
 tools/perf/bench/futex-wake-parallel.c | 24 +++++++++++++++---------
 tools/perf/bench/futex-wake.c          | 18 +++++++++++-------
 5 files changed, 65 insertions(+), 41 deletions(-)

diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 58ae6ed..2defb6d 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -24,6 +24,7 @@
 #include <subcmd/parse-options.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <sys/time.h>
@@ -118,11 +119,12 @@ static void print_summary(void)
 int bench_futex_hash(int argc, const char **argv)
 {
 	int ret = 0;
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	struct sigaction act;
-	unsigned int i, ncpus;
+	unsigned int i;
 	pthread_attr_t thread_attr;
 	struct worker *worker = NULL;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
 	if (argc) {
@@ -130,14 +132,16 @@ int bench_futex_hash(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		goto errmem;
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
 	if (!nthreads) /* default to the number of CPUs */
-		nthreads = ncpus;
+		nthreads = cpu->nr;
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)
@@ -163,10 +167,10 @@ int bench_futex_hash(int argc, const char **argv)
 		if (!worker[i].futex)
 			goto errmem;
 
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
+		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
 		if (ret)
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
@@ -217,6 +221,7 @@ int bench_futex_hash(int argc, const char **argv)
 	print_summary();
 
 	free(worker);
+	free(cpu);
 	return ret;
 errmem:
 	err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 08653ae..8e9c475 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -15,6 +15,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -32,7 +33,7 @@ static struct worker *worker;
 static unsigned int nsecs = 10;
 static bool silent = false, multi = false;
 static bool done = false, fshared = false;
-static unsigned int ncpus, nthreads = 0;
+static unsigned int nthreads = 0;
 static int futex_flag = 0;
 struct timeval start, end, runtime;
 static pthread_mutex_t thread_lock;
@@ -113,9 +114,10 @@ static void *workerfn(void *arg)
 	return NULL;
 }
 
-static void create_threads(struct worker *w, pthread_attr_t thread_attr)
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+			   struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nthreads;
@@ -130,10 +132,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
 		} else
 			worker[i].futex = &global_futex;
 
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
@@ -147,19 +149,22 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	unsigned int i;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
 	if (argc)
 		goto err;
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
 	if (!nthreads)
-		nthreads = ncpus;
+		nthreads = cpu->nr;
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)
@@ -180,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_attr_init(&thread_attr);
 	gettimeofday(&start, NULL);
 
-	create_threads(worker, thread_attr);
+	create_threads(worker, thread_attr, cpu);
 	pthread_attr_destroy(&thread_attr);
 
 	pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 1058c19..fc692ef 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -40,7 +41,7 @@ static bool done = false, silent = false, fshared = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats requeuetime_stats, requeued_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
 static int futex_flag = 0;
 
 static const struct option options[] = {
@@ -83,19 +84,19 @@ static void *workerfn(void *arg __maybe_unused)
 }
 
 static void block_threads(pthread_t *w,
-			  pthread_attr_t thread_attr)
+			  pthread_attr_t thread_attr, struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nthreads;
 
 	/* create and block all threads */
 	for (i = 0; i < nthreads; i++) {
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -116,19 +117,22 @@ int bench_futex_requeue(int argc, const char **argv)
 	unsigned int i, j;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
 	if (argc)
 		goto err;
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "cpu_map__new");
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
 	if (!nthreads)
-		nthreads = ncpus;
+		nthreads = cpu->nr;
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)
@@ -156,7 +160,7 @@ int bench_futex_requeue(int argc, const char **argv)
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
-		block_threads(worker, thread_attr);
+		block_threads(worker, thread_attr, cpu);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index b4732da..4488c27 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -21,6 +21,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -43,7 +44,7 @@ static unsigned int nblocked_threads = 0, nwaking_threads = 0;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting;
+static unsigned int threads_starting;
 static int futex_flag = 0;
 
 static const struct option options[] = {
@@ -119,19 +120,20 @@ static void *blocked_workerfn(void *arg __maybe_unused)
 	return NULL;
 }
 
-static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
+			  struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nblocked_threads;
 
 	/* create and block all threads */
 	for (i = 0; i < nblocked_threads; i++) {
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
@@ -205,6 +207,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	struct sigaction act;
 	pthread_attr_t thread_attr;
 	struct thread_data *waking_worker;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options,
 			     bench_futex_wake_parallel_usage, 0);
@@ -217,9 +220,12 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
+
 	if (!nblocked_threads)
-		nblocked_threads = ncpus;
+		nblocked_threads = cpu->nr;
 
 	/* some sanity checks */
 	if (nwaking_threads > nblocked_threads || !nwaking_threads)
@@ -259,7 +265,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 			err(EXIT_FAILURE, "calloc");
 
 		/* create, launch & block all threads */
-		block_threads(blocked_worker, thread_attr);
+		block_threads(blocked_worker, thread_attr, cpu);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 8c5c0b6..e8181ad 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
+#include "cpumap.h"
 
 #include <err.h>
 #include <stdlib.h>
@@ -89,19 +90,19 @@ static void print_summary(void)
 }
 
 static void block_threads(pthread_t *w,
-			  pthread_attr_t thread_attr)
+			  pthread_attr_t thread_attr, struct cpu_map *cpu)
 {
-	cpu_set_t cpu;
+	cpu_set_t cpuset;
 	unsigned int i;
 
 	threads_starting = nthreads;
 
 	/* create and block all threads */
 	for (i = 0; i < nthreads; i++) {
-		CPU_ZERO(&cpu);
-		CPU_SET(i % ncpus, &cpu);
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
-		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 
 		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv)
 	unsigned int i, j;
 	struct sigaction act;
 	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
 
 	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
 	if (argc) {
@@ -129,7 +131,9 @@ int bench_futex_wake(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
@@ -161,7 +165,7 @@ int bench_futex_wake(int argc, const char **argv)
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
-		block_threads(worker, thread_attr);
+		block_threads(worker, thread_attr, cpu);
 
 		/* make sure all threads are already blocked */
 		pthread_mutex_lock(&thread_lock);

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [tip:perf/core] perf bench futex: Sync waker threads
  2017-11-27  4:21 ` [PATCH 3/3] perf bench futex: sync waker threads Davidlohr Bueso
@ 2017-12-06 16:35   ` tip-bot for James Yang
  0 siblings, 0 replies; 9+ messages in thread
From: tip-bot for James Yang @ 2017-12-06 16:35 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, kim.phillips, james.yang, mingo, tglx, acme, linux-kernel, dave

Commit-ID:  8085e5ab41dadce558808b4186a6ea9d0862d3c0
Gitweb:     https://git.kernel.org/tip/8085e5ab41dadce558808b4186a6ea9d0862d3c0
Author:     James Yang <james.yang@arm.com>
AuthorDate: Sun, 26 Nov 2017 20:21:01 -0800
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Tue, 5 Dec 2017 10:23:44 -0300

perf bench futex: Sync waker threads

Waker threads in the futex wake-parallel benchmark are started by a loop
using pthread_create().  However, there is no synchronization for when
the waker threads wake the waiting threads.  Comparison of the waker
threads' measurement timestamps show they are not all running
concurrently because older waker threads finish their task before newer
waker threads even start.

This patch uses a barrier to better synchronize the waker threads.

Signed-off-by: James Yang <james.yang@arm.com
Cc: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/r/20171127042101.3659-4-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
[ Disable the wake-parallel test for systems without pthread_barrier_t ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/futex-wake-parallel.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 4488c27..69d8fdc 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -7,7 +7,17 @@
  * for each individual thread to service its share of work. Ultimately
  * it can be used to measure futex_wake() changes.
  */
+#include "bench.h"
+#include <linux/compiler.h>
+#include "../util/debug.h"
 
+#ifndef HAVE_PTHREAD_BARRIER
+int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+	pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
+	return 0;
+}
+#else /* HAVE_PTHREAD_BARRIER */
 /* For the CLR_() macros */
 #include <string.h>
 #include <pthread.h>
@@ -15,11 +25,9 @@
 #include <signal.h>
 #include "../util/stat.h"
 #include <subcmd/parse-options.h>
-#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/time64.h>
 #include <errno.h>
-#include "bench.h"
 #include "futex.h"
 #include "cpumap.h"
 
@@ -43,6 +51,7 @@ static bool done = false, silent = false, fshared = false;
 static unsigned int nblocked_threads = 0, nwaking_threads = 0;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
+static pthread_barrier_t barrier;
 static struct stats waketime_stats, wakeup_stats;
 static unsigned int threads_starting;
 static int futex_flag = 0;
@@ -65,6 +74,8 @@ static void *waking_workerfn(void *arg)
 	struct thread_data *waker = (struct thread_data *) arg;
 	struct timeval start, end;
 
+	pthread_barrier_wait(&barrier);
+
 	gettimeofday(&start, NULL);
 
 	waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
@@ -85,6 +96,8 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
 
 	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
 
+	pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+
 	/* create and block all threads */
 	for (i = 0; i < nwaking_threads; i++) {
 		/*
@@ -97,9 +110,13 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
 			err(EXIT_FAILURE, "pthread_create");
 	}
 
+	pthread_barrier_wait(&barrier);
+
 	for (i = 0; i < nwaking_threads; i++)
 		if (pthread_join(td[i].worker, NULL))
 			err(EXIT_FAILURE, "pthread_join");
+
+	pthread_barrier_destroy(&barrier);
 }
 
 static void *blocked_workerfn(void *arg __maybe_unused)
@@ -303,3 +320,4 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	free(blocked_worker);
 	return ret;
 }
+#endif /* HAVE_PTHREAD_BARRIER */

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] perf bench futex: Add --affine-wakers option to wake-parallel
  2017-11-27  4:21 ` [PATCH 2/3] perf bench futex: Add --affine-wakers option to wake-parallel Davidlohr Bueso
@ 2017-12-06 17:24   ` Davidlohr Bueso
  0 siblings, 0 replies; 9+ messages in thread
From: Davidlohr Bueso @ 2017-12-06 17:24 UTC (permalink / raw)
  To: acme; +Cc: james.yang, kim.phillips, linux-kernel, Davidlohr Bueso

Hi, any reason this patch didn't make it into -tip?k

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-12-06 17:28 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-27  4:20 [PATCH -tip 0/3] perf bench futex: Improvements Davidlohr Bueso
2017-11-27  4:20 ` [PATCH 1/3] perf bench futex: Use cpumaps Davidlohr Bueso
2017-12-06 16:34   ` [tip:perf/core] " tip-bot for Davidlohr Bueso
2017-11-27  4:21 ` [PATCH 2/3] perf bench futex: Add --affine-wakers option to wake-parallel Davidlohr Bueso
2017-12-06 17:24   ` Davidlohr Bueso
2017-11-27  4:21 ` [PATCH 3/3] perf bench futex: sync waker threads Davidlohr Bueso
2017-12-06 16:35   ` [tip:perf/core] perf bench futex: Sync " tip-bot for James Yang
2017-11-28  3:20 ` [PATCH -tip 0/3] perf bench futex: Improvements Kim Phillips
2017-11-30 17:00   ` Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.