All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -tip v2 0/7] perf/bench-futex: Misc updates
@ 2021-08-09  4:32 Davidlohr Bueso
  2021-08-09  4:32 ` [PATCH 1/7] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:32 UTC (permalink / raw)
  To: acme; +Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave

Changes from v1: https://lore.kernel.org/lkml/20210806013329.94627-1-dave@stgolabs.net/
- Addressed acme's comments in patch 1.
- Addeded more patches.
- Added more people to the Cc.

A few updates for the futex perf benchmarks.

Please consider for v5.15.

Thanks!

Davidlohr Bueso (7):
  perf/bench-futex: Group test parameters cleanup
  perf/bench-futex: Remove bogus backslash from comment
  perf/bench-futex: Factor out futex_flag
  perf/bench-futex: Add --mlockall parameter
  perf/bench-futex, requeue: Add --broadcast option
  perf/bench-futex, requeue: Robustify futex_wait() handling
  perf/bench-futex, requeue: Add --pi parameter

 tools/perf/bench/futex-hash.c          |  63 +++++-----
 tools/perf/bench/futex-lock-pi.c       |  61 +++++-----
 tools/perf/bench/futex-requeue.c       | 160 ++++++++++++++++++-------
 tools/perf/bench/futex-wake-parallel.c |  67 ++++++-----
 tools/perf/bench/futex-wake.c          |  66 +++++-----
 tools/perf/bench/futex.h               |  55 ++++++++-
 6 files changed, 318 insertions(+), 154 deletions(-)

--
2.26.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/7] perf/bench-futex: Group test parameters cleanup
  2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
@ 2021-08-09  4:32 ` Davidlohr Bueso
  2021-08-09  4:32 ` [PATCH 2/7] perf/bench-futex: Remove bogus backslash from comment Davidlohr Bueso
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:32 UTC (permalink / raw)
  To: acme
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave,
	Davidlohr Bueso

Do this across all futex-bench tests such that all program parameters
neatly share a common structure, which is nicer than how we have them
now. No changes in program behavior are expected.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-hash.c          | 55 +++++++++++-----------
 tools/perf/bench/futex-lock-pi.c       | 53 ++++++++++-----------
 tools/perf/bench/futex-requeue.c       | 65 ++++++++++++++------------
 tools/perf/bench/futex-wake-parallel.c | 58 ++++++++++++-----------
 tools/perf/bench/futex-wake.c          | 57 +++++++++++-----------
 tools/perf/bench/futex.h               | 11 +++++
 6 files changed, 162 insertions(+), 137 deletions(-)

diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index b65373ce5c4f..ddca7558e559 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -29,11 +29,7 @@
 
 #include <err.h>
 
-static unsigned int nthreads = 0;
-static unsigned int nsecs    = 10;
-/* amount of futexes per thread */
-static unsigned int nfutexes = 1024;
-static bool fshared = false, done = false, silent = false;
+static bool done = false;
 static int futex_flag = 0;
 
 struct timeval bench__start, bench__end, bench__runtime;
@@ -49,12 +45,17 @@ struct worker {
 	unsigned long ops;
 };
 
+static struct bench_futex_parameters params = {
+	.nfutexes = 1024,
+	.runtime  = 10,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('r', "runtime", &nsecs,    "Specify runtime (in seconds)"),
-	OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
-	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
+	OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
 	OPT_END()
 };
 
@@ -78,7 +79,7 @@ static void *workerfn(void *arg)
 	pthread_mutex_unlock(&thread_lock);
 
 	do {
-		for (i = 0; i < nfutexes; i++, ops++) {
+		for (i = 0; i < params.nfutexes; i++, ops++) {
 			/*
 			 * We want the futex calls to fail in order to stress
 			 * the hashing of uaddr and not measure other steps,
@@ -86,7 +87,7 @@ static void *workerfn(void *arg)
 			 * the critical region protected by hb->lock.
 			 */
 			ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
-			if (!silent &&
+			if (!params.silent &&
 			    (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
 				warn("Non-expected futex return call");
 		}
@@ -112,7 +113,7 @@ static void print_summary(void)
 	double stddev = stddev_stats(&throughput_stats);
 
 	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
-	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+	       !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
 	       (int)bench__runtime.tv_sec);
 }
 
@@ -141,30 +142,30 @@ int bench_futex_hash(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads) /* default to the number of CPUs */
-		nthreads = cpu->nr;
+	if (!params.nthreads) /* default to the number of CPUs */
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		goto errmem;
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
-	       getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+	       getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime);
 
 	init_stats(&throughput_stats);
 	pthread_mutex_init(&thread_lock, NULL);
 	pthread_cond_init(&thread_parent, NULL);
 	pthread_cond_init(&thread_worker, NULL);
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 	pthread_attr_init(&thread_attr);
 	gettimeofday(&bench__start, NULL);
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		worker[i].tid = i;
-		worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
+		worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
 		if (!worker[i].futex)
 			goto errmem;
 
@@ -189,10 +190,10 @@ int bench_futex_hash(int argc, const char **argv)
 	pthread_cond_broadcast(&thread_worker);
 	pthread_mutex_unlock(&thread_lock);
 
-	sleep(nsecs);
+	sleep(params.runtime);
 	toggle_done(0, NULL, NULL);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		ret = pthread_join(worker[i].thread, NULL);
 		if (ret)
 			err(EXIT_FAILURE, "pthread_join");
@@ -203,18 +204,18 @@ int bench_futex_hash(int argc, const char **argv)
 	pthread_cond_destroy(&thread_worker);
 	pthread_mutex_destroy(&thread_lock);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		unsigned long t = bench__runtime.tv_sec > 0 ?
 			worker[i].ops / bench__runtime.tv_sec : 0;
 		update_stats(&throughput_stats, t);
-		if (!silent) {
-			if (nfutexes == 1)
+		if (!params.silent) {
+			if (params.nfutexes == 1)
 				printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
 				       worker[i].tid, &worker[i].futex[0], t);
 			else
 				printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
 				       worker[i].tid, &worker[i].futex[0],
-				       &worker[i].futex[nfutexes-1], t);
+				       &worker[i].futex[params.nfutexes-1], t);
 		}
 
 		zfree(&worker[i].futex);
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 89c6d160379c..ce980df23bb0 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -31,22 +31,23 @@ struct worker {
 
 static u_int32_t global_futex = 0;
 static struct worker *worker;
-static unsigned int nsecs = 10;
-static bool silent = false, multi = false;
-static bool done = false, fshared = false;
-static unsigned int nthreads = 0;
+static bool done = false;
 static int futex_flag = 0;
 static pthread_mutex_t thread_lock;
 static unsigned int threads_starting;
 static struct stats throughput_stats;
 static pthread_cond_t thread_parent, thread_worker;
 
+static struct bench_futex_parameters params = {
+	.runtime  = 10,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('r', "runtime", &nsecs,     "Specify runtime (in seconds)"),
-	OPT_BOOLEAN( 'M', "multi",   &multi,     "Use multiple futexes"),
-	OPT_BOOLEAN( 's', "silent",  &silent,    "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,   "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
+	OPT_BOOLEAN( 'M', "multi",   &params.multi, "Use multiple futexes"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
 	OPT_END()
 };
 
@@ -61,7 +62,7 @@ static void print_summary(void)
 	double stddev = stddev_stats(&throughput_stats);
 
 	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
-	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+	       !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
 	       (int)bench__runtime.tv_sec);
 }
 
@@ -93,7 +94,7 @@ static void *workerfn(void *arg)
 		ret = futex_lock_pi(w->futex, NULL, futex_flag);
 
 		if (ret) { /* handle lock acquisition */
-			if (!silent)
+			if (!params.silent)
 				warn("thread %d: Could not lock pi-lock for %p (%d)",
 				     w->tid, w->futex, ret);
 			if (done)
@@ -104,7 +105,7 @@ static void *workerfn(void *arg)
 
 		usleep(1);
 		ret = futex_unlock_pi(w->futex, futex_flag);
-		if (ret && !silent)
+		if (ret && !params.silent)
 			warn("thread %d: Could not unlock pi-lock for %p (%d)",
 			     w->tid, w->futex, ret);
 		ops++; /* account for thread's share of work */
@@ -120,12 +121,12 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		worker[i].tid = i;
 
-		if (multi) {
+		if (params.multi) {
 			worker[i].futex = calloc(1, sizeof(u_int32_t));
 			if (!worker[i].futex)
 				err(EXIT_FAILURE, "calloc");
@@ -164,25 +165,25 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads)
-		nthreads = cpu->nr;
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
-	       getpid(), nthreads, nsecs);
+	       getpid(), params.nthreads, params.runtime);
 
 	init_stats(&throughput_stats);
 	pthread_mutex_init(&thread_lock, NULL);
 	pthread_cond_init(&thread_parent, NULL);
 	pthread_cond_init(&thread_worker, NULL);
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 	pthread_attr_init(&thread_attr);
 	gettimeofday(&bench__start, NULL);
 
@@ -195,10 +196,10 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_cond_broadcast(&thread_worker);
 	pthread_mutex_unlock(&thread_lock);
 
-	sleep(nsecs);
+	sleep(params.runtime);
 	toggle_done(0, NULL, NULL);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		ret = pthread_join(worker[i].thread, NULL);
 		if (ret)
 			err(EXIT_FAILURE, "pthread_join");
@@ -209,16 +210,16 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_cond_destroy(&thread_worker);
 	pthread_mutex_destroy(&thread_lock);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		unsigned long t = bench__runtime.tv_sec > 0 ?
 			worker[i].ops / bench__runtime.tv_sec : 0;
 
 		update_stats(&throughput_stats, t);
-		if (!silent)
+		if (!params.silent)
 			printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
 			       worker[i].tid, worker[i].futex, t);
 
-		if (multi)
+		if (params.multi)
 			zfree(&worker[i].futex);
 	}
 
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 5fa23295ee5f..66747bfe22cf 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -30,25 +30,27 @@
 
 static u_int32_t futex1 = 0, futex2 = 0;
 
-/*
- * How many tasks to requeue at a time.
- * Default to 1 in order to make the kernel work more.
- */
-static unsigned int nrequeue = 1;
-
 static pthread_t *worker;
-static bool done = false, silent = false, fshared = false;
+static bool done = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats requeuetime_stats, requeued_stats;
-static unsigned int threads_starting, nthreads = 0;
+static unsigned int threads_starting;
 static int futex_flag = 0;
 
+static struct bench_futex_parameters params = {
+	/*
+	 * How many tasks to requeue at a time.
+	 * Default to 1 in order to make the kernel work more.
+	 */
+	.nrequeue = 1,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
-	OPT_BOOLEAN( 's', "silent",   &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",   &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads",  &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
+	OPT_BOOLEAN( 's', "silent",   &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
 	OPT_END()
 };
 
@@ -65,7 +67,7 @@ static void print_summary(void)
 
 	printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       requeued_avg,
-	       nthreads,
+	       params.nthreads,
 	       requeuetime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
 }
@@ -89,10 +91,10 @@ static void block_threads(pthread_t *w,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 
 	/* create and block all threads */
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		CPU_ZERO(&cpuset);
 		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
@@ -132,22 +134,22 @@ int bench_futex_requeue(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads)
-		nthreads = cpu->nr;
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
-	if (nrequeue > nthreads)
-		nrequeue = nthreads;
+	if (params.nrequeue > params.nthreads)
+		params.nrequeue = params.nthreads;
 
 	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
-	       "%d at a time.\n\n",  getpid(), nthreads,
-	       fshared ? "shared":"private", &futex1, &futex2, nrequeue);
+	       "%d at a time.\n\n",  getpid(), params.nthreads,
+	       params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue);
 
 	init_stats(&requeued_stats);
 	init_stats(&requeuetime_stats);
@@ -174,13 +176,14 @@ int bench_futex_requeue(int argc, const char **argv)
 
 		/* Ok, all threads are patiently blocked, start requeueing */
 		gettimeofday(&start, NULL);
-		while (nrequeued < nthreads) {
+		while (nrequeued < params.nthreads) {
 			/*
 			 * Do not wakeup any tasks blocked on futex1, allowing
 			 * us to really measure futex_wait functionality.
 			 */
 			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
-						       nrequeue, futex_flag);
+						       params.nrequeue,
+						       futex_flag);
 		}
 
 		gettimeofday(&end, NULL);
@@ -189,17 +192,19 @@ int bench_futex_requeue(int argc, const char **argv)
 		update_stats(&requeued_stats, nrequeued);
 		update_stats(&requeuetime_stats, runtime.tv_usec);
 
-		if (!silent) {
+		if (!params.silent) {
 			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
-			       j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+			       j + 1, nrequeued, params.nthreads,
+			       runtime.tv_usec / (double)USEC_PER_MSEC);
 		}
 
 		/* everybody should be blocked on futex2, wake'em up */
 		nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
-		if (nthreads != nrequeued)
-			warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
+		if (params.nthreads != nrequeued)
+			warnx("couldn't wakeup all tasks (%d/%d)",
+			      nrequeued, params.nthreads);
 
-		for (i = 0; i < nthreads; i++) {
+		for (i = 0; i < params.nthreads; i++) {
 			ret = pthread_join(worker[i], NULL);
 			if (ret)
 				err(EXIT_FAILURE, "pthread_join");
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 6e6f5247e1fe..958372ad159c 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -47,8 +47,7 @@ static unsigned int nwakes = 1;
 static u_int32_t futex = 0;
 
 static pthread_t *blocked_worker;
-static bool done = false, silent = false, fshared = false;
-static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static bool done = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static pthread_barrier_t barrier;
@@ -56,11 +55,13 @@ static struct stats waketime_stats, wakeup_stats;
 static unsigned int threads_starting;
 static int futex_flag = 0;
 
+static struct bench_futex_parameters params;
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
-	OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
-	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
 	OPT_END()
 };
 
@@ -96,10 +97,10 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
 
 	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
 
-	pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+	pthread_barrier_init(&barrier, NULL, params.nwakes + 1);
 
 	/* create and block all threads */
-	for (i = 0; i < nwaking_threads; i++) {
+	for (i = 0; i < params.nwakes; i++) {
 		/*
 		 * Thread creation order will impact per-thread latency
 		 * as it will affect the order to acquire the hb spinlock.
@@ -112,7 +113,7 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
 
 	pthread_barrier_wait(&barrier);
 
-	for (i = 0; i < nwaking_threads; i++)
+	for (i = 0; i < params.nwakes; i++)
 		if (pthread_join(td[i].worker, NULL))
 			err(EXIT_FAILURE, "pthread_join");
 
@@ -143,10 +144,10 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nblocked_threads;
+	threads_starting = params.nthreads;
 
 	/* create and block all threads */
-	for (i = 0; i < nblocked_threads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		CPU_ZERO(&cpuset);
 		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
@@ -167,7 +168,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num)
 	init_stats(&__wakeup_stats);
 	init_stats(&__waketime_stats);
 
-	for (i = 0; i < nwaking_threads; i++) {
+	for (i = 0; i < params.nwakes; i++) {
 		update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
 		update_stats(&__wakeup_stats, waking_worker[i].nwoken);
 	}
@@ -178,7 +179,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num)
 
 	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
 	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
-	       nblocked_threads, waketime_avg / USEC_PER_MSEC,
+	       params.nthreads, waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
@@ -193,7 +194,7 @@ static void print_summary(void)
 
 	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
-	       nblocked_threads,
+	       params.nthreads,
 	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
@@ -203,7 +204,7 @@ static void do_run_stats(struct thread_data *waking_worker)
 {
 	unsigned int i;
 
-	for (i = 0; i < nwaking_threads; i++) {
+	for (i = 0; i < params.nwakes; i++) {
 		update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
 		update_stats(&wakeup_stats, waking_worker[i].nwoken);
 	}
@@ -242,32 +243,33 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!nblocked_threads)
-		nblocked_threads = cpu->nr;
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
 	/* some sanity checks */
-	if (nwaking_threads > nblocked_threads || !nwaking_threads)
-		nwaking_threads = nblocked_threads;
+	if (params.nwakes > params.nthreads ||
+	    !params.nwakes)
+		params.nwakes = params.nthreads;
 
-	if (nblocked_threads % nwaking_threads)
+	if (params.nthreads % params.nwakes)
 		errx(EXIT_FAILURE, "Must be perfectly divisible");
 	/*
 	 * Each thread will wakeup nwakes tasks in
 	 * a single futex_wait call.
 	 */
-	nwakes = nblocked_threads/nwaking_threads;
+	nwakes = params.nthreads/params.nwakes;
 
-	blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+	blocked_worker = calloc(params.nthreads, sizeof(*blocked_worker));
 	if (!blocked_worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
 	       "futex %p), %d threads waking up %d at a time.\n\n",
-	       getpid(), nblocked_threads, fshared ? "shared":"private",
-	       &futex, nwaking_threads, nwakes);
+	       getpid(), params.nthreads, params.fshared ? "shared":"private",
+	       &futex, params.nwakes, nwakes);
 
 	init_stats(&wakeup_stats);
 	init_stats(&waketime_stats);
@@ -278,7 +280,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	pthread_cond_init(&thread_worker, NULL);
 
 	for (j = 0; j < bench_repeat && !done; j++) {
-		waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+		waking_worker = calloc(params.nwakes, sizeof(*waking_worker));
 		if (!waking_worker)
 			err(EXIT_FAILURE, "calloc");
 
@@ -297,14 +299,14 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 		/* Ok, all threads are patiently blocked, start waking folks up */
 		wakeup_threads(waking_worker, thread_attr);
 
-		for (i = 0; i < nblocked_threads; i++) {
+		for (i = 0; i < params.nthreads; i++) {
 			ret = pthread_join(blocked_worker[i], NULL);
 			if (ret)
 				err(EXIT_FAILURE, "pthread_join");
 		}
 
 		do_run_stats(waking_worker);
-		if (!silent)
+		if (!params.silent)
 			print_run(waking_worker, j);
 
 		free(waking_worker);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 6d217868f53c..9ed4d65416f3 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -31,25 +31,27 @@
 /* all threads will block on the same futex */
 static u_int32_t futex1 = 0;
 
-/*
- * How many wakeups to do at a time.
- * Default to 1 in order to make the kernel work more.
- */
-static unsigned int nwakes = 1;
-
-pthread_t *worker;
-static bool done = false, silent = false, fshared = false;
+static pthread_t *worker;
+static bool done = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats waketime_stats, wakeup_stats;
-static unsigned int threads_starting, nthreads = 0;
+static unsigned int threads_starting;
 static int futex_flag = 0;
 
+static struct bench_futex_parameters params = {
+	/*
+	 * How many wakeups to do at a time.
+	 * Default to 1 in order to make the kernel work more.
+	 */
+	.nwakes  = 1,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('w', "nwakes",  &nwakes,   "Specify amount of threads to wake at once"),
-	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakes",  &params.nwakes, "Specify amount of threads to wake at once"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
 	OPT_END()
 };
 
@@ -84,7 +86,7 @@ static void print_summary(void)
 
 	printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
-	       nthreads,
+	       params.nthreads,
 	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
@@ -95,10 +97,10 @@ static void block_threads(pthread_t *w,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 
 	/* create and block all threads */
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		CPU_ZERO(&cpuset);
 		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
@@ -140,19 +142,20 @@ int bench_futex_wake(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads)
-		nthreads = cpu->nr;
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
 	       "waking up %d at a time.\n\n",
-	       getpid(), nthreads, fshared ? "shared":"private",  &futex1, nwakes);
+	       getpid(), params.nthreads, params.fshared ? "shared":"private",
+	       &futex1, params.nwakes);
 
 	init_stats(&wakeup_stats);
 	init_stats(&waketime_stats);
@@ -179,20 +182,22 @@ int bench_futex_wake(int argc, const char **argv)
 
 		/* Ok, all threads are patiently blocked, start waking folks up */
 		gettimeofday(&start, NULL);
-		while (nwoken != nthreads)
-			nwoken += futex_wake(&futex1, nwakes, futex_flag);
+		while (nwoken != params.nthreads)
+			nwoken += futex_wake(&futex1,
+					     params.nwakes, futex_flag);
 		gettimeofday(&end, NULL);
 		timersub(&end, &start, &runtime);
 
 		update_stats(&wakeup_stats, nwoken);
 		update_stats(&waketime_stats, runtime.tv_usec);
 
-		if (!silent) {
+		if (!params.silent) {
 			printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
-			       j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+			       j + 1, nwoken, params.nthreads,
+			       runtime.tv_usec / (double)USEC_PER_MSEC);
 		}
 
-		for (i = 0; i < nthreads; i++) {
+		for (i = 0; i < params.nthreads; i++) {
 			ret = pthread_join(worker[i], NULL);
 			if (ret)
 				err(EXIT_FAILURE, "pthread_join");
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 31b53cc7d5bc..5f98653e6bb3 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -13,6 +13,17 @@
 #include <sys/types.h>
 #include <linux/futex.h>
 
+struct bench_futex_parameters {
+	bool silent;
+	bool fshared;
+	bool multi; /* lock-pi */
+	unsigned int runtime; /* seconds*/
+	unsigned int nthreads;
+	unsigned int nfutexes;
+	unsigned int nwakes;
+	unsigned int nrequeue;
+};
+
 /**
  * futex() - SYS_futex syscall wrapper
  * @uaddr:	address of first futex
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/7] perf/bench-futex: Remove bogus backslash from comment
  2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
  2021-08-09  4:32 ` [PATCH 1/7] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso
@ 2021-08-09  4:32 ` Davidlohr Bueso
  2021-08-09  4:32 ` [PATCH 3/7] perf/bench-futex: Factor out futex_flag Davidlohr Bueso
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:32 UTC (permalink / raw)
  To: acme
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave,
	Davidlohr Bueso

It obviously doesn't belong there.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 5f98653e6bb3..6f8b85b67348 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -31,7 +31,7 @@ struct bench_futex_parameters {
  * @val:	typically expected value of uaddr, but varies by op
  * @timeout:	typically an absolute struct timespec (except where noted
  *		otherwise). Overloaded by some ops
- * @uaddr2:	address of second futex for some ops\
+ * @uaddr2:	address of second futex for some ops
  * @val3:	varies by op
  * @opflags:	flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
  *
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/7] perf/bench-futex: Factor out futex_flag
  2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
  2021-08-09  4:32 ` [PATCH 1/7] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso
  2021-08-09  4:32 ` [PATCH 2/7] perf/bench-futex: Remove bogus backslash from comment Davidlohr Bueso
@ 2021-08-09  4:32 ` Davidlohr Bueso
  2021-08-09 14:54   ` Arnaldo Carvalho de Melo
  2021-08-09  4:32 ` [PATCH 4/7] perf/bench-futex: Add --mlockall parameter Davidlohr Bueso
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:32 UTC (permalink / raw)
  To: acme
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave,
	Davidlohr Bueso

This is common across all tests, move it into futex.h.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-hash.c          | 1 -
 tools/perf/bench/futex-lock-pi.c       | 1 -
 tools/perf/bench/futex-requeue.c       | 1 -
 tools/perf/bench/futex-wake-parallel.c | 1 -
 tools/perf/bench/futex-wake.c          | 1 -
 tools/perf/bench/futex.h               | 3 +++
 6 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index ddca7558e559..b71a34204b79 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -30,7 +30,6 @@
 #include <err.h>
 
 static bool done = false;
-static int futex_flag = 0;
 
 struct timeval bench__start, bench__end, bench__runtime;
 static pthread_mutex_t thread_lock;
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index ce980df23bb0..bc208edf3de3 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -32,7 +32,6 @@ struct worker {
 static u_int32_t global_futex = 0;
 static struct worker *worker;
 static bool done = false;
-static int futex_flag = 0;
 static pthread_mutex_t thread_lock;
 static unsigned int threads_starting;
 static struct stats throughput_stats;
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 66747bfe22cf..4001312122be 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -36,7 +36,6 @@ static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats requeuetime_stats, requeued_stats;
 static unsigned int threads_starting;
-static int futex_flag = 0;
 
 static struct bench_futex_parameters params = {
 	/*
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 958372ad159c..ea4fdea6e2f3 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -53,7 +53,6 @@ static pthread_cond_t thread_parent, thread_worker;
 static pthread_barrier_t barrier;
 static struct stats waketime_stats, wakeup_stats;
 static unsigned int threads_starting;
-static int futex_flag = 0;
 
 static struct bench_futex_parameters params;
 
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 9ed4d65416f3..1cf651c8ee5c 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -37,7 +37,6 @@ static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats waketime_stats, wakeup_stats;
 static unsigned int threads_starting;
-static int futex_flag = 0;
 
 static struct bench_futex_parameters params = {
 	/*
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 6f8b85b67348..f7cd22bbd677 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -13,6 +13,9 @@
 #include <sys/types.h>
 #include <linux/futex.h>
 
+/* FUTEX_PRIVATE_FLAG or zero */
+static int futex_flag = 0;
+
 struct bench_futex_parameters {
 	bool silent;
 	bool fshared;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/7] perf/bench-futex: Add --mlockall parameter
  2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
                   ` (2 preceding siblings ...)
  2021-08-09  4:32 ` [PATCH 3/7] perf/bench-futex: Factor out futex_flag Davidlohr Bueso
@ 2021-08-09  4:32 ` Davidlohr Bueso
  2021-08-09 14:56   ` Arnaldo Carvalho de Melo
  2021-08-09  4:32 ` [PATCH 5/7] perf/bench-futex, requeue: Add --broadcast option Davidlohr Bueso
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:32 UTC (permalink / raw)
  To: acme
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave,
	Davidlohr Bueso

This adds, across all futex benchmarks, the -m/--mlockall option
which is a common operation for realtime workloads by not incurring
in page faults in paths that want determinism. As such, threads
started after a call to mlockall(2) will generate page faults
immediately since the new stack is immediately forced to memory,
due to the MCL_FUTURE flag.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-hash.c          | 7 +++++++
 tools/perf/bench/futex-lock-pi.c       | 7 +++++++
 tools/perf/bench/futex-requeue.c       | 7 +++++++
 tools/perf/bench/futex-wake-parallel.c | 8 ++++++++
 tools/perf/bench/futex-wake.c          | 8 ++++++++
 tools/perf/bench/futex.h               | 1 +
 6 files changed, 38 insertions(+)

diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index b71a34204b79..af9fbb409472 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
@@ -55,6 +56,7 @@ static const struct option options[] = {
 	OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
 	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
 	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
 	OPT_END()
 };
 
@@ -141,6 +143,11 @@ int bench_futex_hash(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
 	if (!params.nthreads) /* default to the number of CPUs */
 		params.nthreads = cpu->nr;
 
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index bc208edf3de3..d2927d2eb3f7 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -21,6 +21,7 @@
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 struct worker {
 	int tid;
@@ -47,6 +48,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN( 'M', "multi",   &params.multi, "Use multiple futexes"),
 	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
 	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
 	OPT_END()
 };
 
@@ -164,6 +166,11 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
 	if (!params.nthreads)
 		params.nthreads = cpu->nr;
 
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 4001312122be..88cb7e2a6729 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -27,6 +27,7 @@
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 static u_int32_t futex1 = 0, futex2 = 0;
 
@@ -50,6 +51,7 @@ static const struct option options[] = {
 	OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
 	OPT_BOOLEAN( 's', "silent",   &params.silent, "Silent mode: do not display data/details"),
 	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
 	OPT_END()
 };
 
@@ -133,6 +135,11 @@ int bench_futex_requeue(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
 	if (!params.nthreads)
 		params.nthreads = cpu->nr;
 
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index ea4fdea6e2f3..ef1f8237fd81 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -34,6 +34,7 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 struct thread_data {
 	pthread_t worker;
@@ -61,6 +62,8 @@ static const struct option options[] = {
 	OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
 	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
 	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
+
 	OPT_END()
 };
 
@@ -238,6 +241,11 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
 	cpu = perf_cpu_map__new(NULL);
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 1cf651c8ee5c..40e492c7996a 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -27,6 +27,7 @@
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 /* all threads will block on the same futex */
 static u_int32_t futex1 = 0;
@@ -51,6 +52,8 @@ static const struct option options[] = {
 	OPT_UINTEGER('w', "nwakes",  &params.nwakes, "Specify amount of threads to wake at once"),
 	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
 	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
+
 	OPT_END()
 };
 
@@ -141,6 +144,11 @@ int bench_futex_wake(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
 	if (!params.nthreads)
 		params.nthreads = cpu->nr;
 
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index f7cd22bbd677..1c8fa469993f 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -19,6 +19,7 @@ static int futex_flag = 0;
 struct bench_futex_parameters {
 	bool silent;
 	bool fshared;
+	bool mlockall;
 	bool multi; /* lock-pi */
 	unsigned int runtime; /* seconds*/
 	unsigned int nthreads;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/7] perf/bench-futex, requeue: Add --broadcast option
  2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
                   ` (3 preceding siblings ...)
  2021-08-09  4:32 ` [PATCH 4/7] perf/bench-futex: Add --mlockall parameter Davidlohr Bueso
@ 2021-08-09  4:32 ` Davidlohr Bueso
  2021-08-09 14:57   ` Arnaldo Carvalho de Melo
  2021-08-09  4:33 ` [PATCH 6/7] perf/bench-futex, requeue: Robustify futex_wait() handling Davidlohr Bueso
  2021-08-09  4:33 ` [PATCH 7/7] perf/bench-futex, requeue: Add --pi parameter Davidlohr Bueso
  6 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:32 UTC (permalink / raw)
  To: acme
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave,
	Davidlohr Bueso

Such that all threads are requeued to uaddr2 in a single
futex_cmp_requeue(), unlike the default, which is 1.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-requeue.c | 4 ++++
 tools/perf/bench/futex.h         | 1 +
 2 files changed, 5 insertions(+)

diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 88cb7e2a6729..80f40ee92b53 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -52,6 +52,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN( 's', "silent",   &params.silent, "Silent mode: do not display data/details"),
 	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
 	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
+	OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
 	OPT_END()
 };
 
@@ -153,6 +154,9 @@ int bench_futex_requeue(int argc, const char **argv)
 	if (params.nrequeue > params.nthreads)
 		params.nrequeue = params.nthreads;
 
+	if (params.broadcast)
+		params.nrequeue = params.nthreads;
+
 	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
 	       "%d at a time.\n\n",  getpid(), params.nthreads,
 	       params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue);
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 1c8fa469993f..36f158650edf 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -21,6 +21,7 @@ struct bench_futex_parameters {
 	bool fshared;
 	bool mlockall;
 	bool multi; /* lock-pi */
+	bool broadcast; /* requeue */
 	unsigned int runtime; /* seconds*/
 	unsigned int nthreads;
 	unsigned int nfutexes;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 6/7] perf/bench-futex, requeue: Robustify futex_wait() handling
  2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
                   ` (4 preceding siblings ...)
  2021-08-09  4:32 ` [PATCH 5/7] perf/bench-futex, requeue: Add --broadcast option Davidlohr Bueso
@ 2021-08-09  4:33 ` Davidlohr Bueso
  2021-08-09 14:58   ` Arnaldo Carvalho de Melo
  2021-08-09  4:33 ` [PATCH 7/7] perf/bench-futex, requeue: Add --pi parameter Davidlohr Bueso
  6 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:33 UTC (permalink / raw)
  To: acme
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave,
	Davidlohr Bueso

Do not assume success and account for EAGAIN or any other return value,
however unlikely.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-requeue.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 80f40ee92b53..e4892ba6864f 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -76,6 +76,8 @@ static void print_summary(void)
 
 static void *workerfn(void *arg __maybe_unused)
 {
+	int ret;
+
 	pthread_mutex_lock(&thread_lock);
 	threads_starting--;
 	if (!threads_starting)
@@ -83,7 +85,18 @@ static void *workerfn(void *arg __maybe_unused)
 	pthread_cond_wait(&thread_worker, &thread_lock);
 	pthread_mutex_unlock(&thread_lock);
 
-	futex_wait(&futex1, 0, NULL, futex_flag);
+	while (1) {
+		ret = futex_wait(&futex1, 0, NULL, futex_flag);
+		if (!ret)
+			break;
+
+		if (ret && errno != EAGAIN) {
+			if (!params.silent)
+				warn("futex_wait");
+			break;
+		}
+	}
+
 	return NULL;
 }
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 7/7] perf/bench-futex, requeue: Add --pi parameter
  2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
                   ` (5 preceding siblings ...)
  2021-08-09  4:33 ` [PATCH 6/7] perf/bench-futex, requeue: Robustify futex_wait() handling Davidlohr Bueso
@ 2021-08-09  4:33 ` Davidlohr Bueso
  2021-08-09 15:01   ` Arnaldo Carvalho de Melo
  6 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09  4:33 UTC (permalink / raw)
  To: acme
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, dave,
	Davidlohr Bueso

This extends the program to measure WAIT_REQUEUE_PI+CMP_REQUEUE_PI
pairs, which are the underlying machinery behind priority-inheritance
aware condition variables. The defaults are the same as with the regular
non-pi version, requeueing one task at a time, with the exception that
PI will always wakeup the first waiter.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-requeue.c | 100 +++++++++++++++++++++++--------
 tools/perf/bench/futex.h         |  37 +++++++++++-
 2 files changed, 111 insertions(+), 26 deletions(-)

diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index e4892ba6864f..03a05814d45e 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -6,7 +6,8 @@
  *                on futex2, N at a time.
  *
  * This program is particularly useful to measure the latency of nthread
- * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ * requeues without waking up any tasks (in the non-pi case) -- thus
+ * mimicking a regular futex_wait.
  */
 
 /* For the CLR_() macros */
@@ -53,6 +54,8 @@ static const struct option options[] = {
 	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
 	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
 	OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
+	OPT_BOOLEAN( 'p', "pi", &params.pi, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),
+
 	OPT_END()
 };
 
@@ -86,14 +89,30 @@ static void *workerfn(void *arg __maybe_unused)
 	pthread_mutex_unlock(&thread_lock);
 
 	while (1) {
-		ret = futex_wait(&futex1, 0, NULL, futex_flag);
-		if (!ret)
-			break;
-
-		if (ret && errno != EAGAIN) {
-			if (!params.silent)
-				warn("futex_wait");
-			break;
+		if (!params.pi) {
+			ret = futex_wait(&futex1, 0, NULL, futex_flag);
+			if (!ret)
+				break;
+
+			if (ret && errno != EAGAIN) {
+				if (!params.silent)
+					warnx("futex_wait");
+				break;
+			}
+		} else {
+			ret = futex_wait_requeue_pi(&futex1, 0, &futex2,
+						    NULL, futex_flag);
+			if (!ret) {
+				/* got the lock at futex2 */
+				futex_unlock_pi(&futex2, futex_flag);
+				break;
+			}
+
+			if (ret && errno != EAGAIN) {
+				if (!params.silent)
+					warnx("futex_wait_requeue_pi");
+				break;
+			}
 		}
 	}
 
@@ -170,9 +189,10 @@ int bench_futex_requeue(int argc, const char **argv)
 	if (params.broadcast)
 		params.nrequeue = params.nthreads;
 
-	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
+	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
 	       "%d at a time.\n\n",  getpid(), params.nthreads,
-	       params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue);
+	       params.fshared ? "shared":"private", &futex1,
+	       params.pi ? "PI ": "", &futex2, params.nrequeue);
 
 	init_stats(&requeued_stats);
 	init_stats(&requeuetime_stats);
@@ -182,7 +202,7 @@ int bench_futex_requeue(int argc, const char **argv)
 	pthread_cond_init(&thread_worker, NULL);
 
 	for (j = 0; j < bench_repeat && !done; j++) {
-		unsigned int nrequeued = 0;
+		unsigned int nrequeued = 0, wakeups = 0;
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
@@ -200,13 +220,30 @@ int bench_futex_requeue(int argc, const char **argv)
 		/* Ok, all threads are patiently blocked, start requeueing */
 		gettimeofday(&start, NULL);
 		while (nrequeued < params.nthreads) {
+			int r;
+
 			/*
-			 * Do not wakeup any tasks blocked on futex1, allowing
-			 * us to really measure futex_wait functionality.
+			 * For the regular non-pi case, do not wakeup any tasks
+			 * blocked on futex1, allowing us to really measure
+			 * futex_wait functionality. For the PI case the first
+			 * waiter is always awoken.
 			 */
-			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
-						       params.nrequeue,
-						       futex_flag);
+			if (!params.pi) {
+				r = futex_cmp_requeue(&futex1, 0, &futex2, 0,
+						      params.nrequeue,
+						      futex_flag);
+			} else {
+				r = futex_cmp_requeue_pi(&futex1, 0, &futex2,
+							 params.nrequeue,
+							 futex_flag);
+				wakeups++; /* assume no error */
+			}
+
+			if (r < 0)
+				err(EXIT_FAILURE, "couldn't requeue from %p to %p",
+				    &futex1, &futex2);
+
+			nrequeued += r;
 		}
 
 		gettimeofday(&end, NULL);
@@ -216,16 +253,29 @@ int bench_futex_requeue(int argc, const char **argv)
 		update_stats(&requeuetime_stats, runtime.tv_usec);
 
 		if (!params.silent) {
-			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
-			       j + 1, nrequeued, params.nthreads,
-			       runtime.tv_usec / (double)USEC_PER_MSEC);
+			if (!params.pi)
+				printf("[Run %d]: Requeued %d of %d threads in "
+				       "%.4f ms\n", j + 1, nrequeued,
+				       params.nthreads,
+				       runtime.tv_usec / (double)USEC_PER_MSEC);
+			else {
+				nrequeued -= wakeups;
+				printf("[Run %d]: Awoke and Requeued (%d+%d) of "
+				       "%d threads in %.4f ms\n",
+				       j + 1, wakeups, nrequeued,
+				       params.nthreads,
+				       runtime.tv_usec / (double)USEC_PER_MSEC);
+			}
+
 		}
 
-		/* everybody should be blocked on futex2, wake'em up */
-		nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
-		if (params.nthreads != nrequeued)
-			warnx("couldn't wakeup all tasks (%d/%d)",
-			      nrequeued, params.nthreads);
+		if (!params.pi) {
+			/* everybody should be blocked on futex2, wake'em up */
+			nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
+			if (params.nthreads != nrequeued)
+				warnx("couldn't wakeup all tasks (%d/%d)",
+				      nrequeued, params.nthreads);
+		}
 
 		for (i = 0; i < params.nthreads; i++) {
 			ret = pthread_join(worker[i], NULL);
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 36f158650edf..f0ad6d988349 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -21,6 +21,7 @@ struct bench_futex_parameters {
 	bool fshared;
 	bool mlockall;
 	bool multi; /* lock-pi */
+	bool pi; /* requeue-pi */
 	bool broadcast; /* requeue */
 	unsigned int runtime; /* seconds*/
 	unsigned int nthreads;
@@ -93,7 +94,7 @@ futex_unlock_pi(u_int32_t *uaddr, int opflags)
 /**
 * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
 * @nr_wake:        wake up to this many tasks
-* @nr_requeue:        requeue up to this many tasks
+* @nr_requeue:     requeue up to this many tasks
 */
 static inline int
 futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
@@ -102,4 +103,38 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
 	return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
 		 val, opflags);
 }
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
+		      struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+		     opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ * @nr_requeue:	requeue up to this many tasks
+ *
+ * This is the second half of the requeue_pi mechanism. It shall always be
+ * paired with futex_wait_requeue_pi(). The first waker is always awoken.
+ */
+static inline int
+futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
+		     int nr_requeue, int opflags)
+{
+	return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2,
+		     val, opflags);
+}
+
 #endif /* _FUTEX_H */
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/7] perf/bench-futex: Factor out futex_flag
  2021-08-09  4:32 ` [PATCH 3/7] perf/bench-futex: Factor out futex_flag Davidlohr Bueso
@ 2021-08-09 14:54   ` Arnaldo Carvalho de Melo
  2021-08-09 16:30     ` Davidlohr Bueso
  0 siblings, 1 reply; 15+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-08-09 14:54 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, Davidlohr Bueso

Em Sun, Aug 08, 2021 at 09:32:57PM -0700, Davidlohr Bueso escreveu:
> This is common across all tests, move it into futex.h.

Wouldn't be better to have it defined in one place and then in futex.h
just have a 'extern int futex_flag;' declaration?

I applied the first two patches already.

- Arnaldo
 
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>  tools/perf/bench/futex-hash.c          | 1 -
>  tools/perf/bench/futex-lock-pi.c       | 1 -
>  tools/perf/bench/futex-requeue.c       | 1 -
>  tools/perf/bench/futex-wake-parallel.c | 1 -
>  tools/perf/bench/futex-wake.c          | 1 -
>  tools/perf/bench/futex.h               | 3 +++
>  6 files changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index ddca7558e559..b71a34204b79 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -30,7 +30,6 @@
>  #include <err.h>
>  
>  static bool done = false;
> -static int futex_flag = 0;
>  
>  struct timeval bench__start, bench__end, bench__runtime;
>  static pthread_mutex_t thread_lock;
> diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
> index ce980df23bb0..bc208edf3de3 100644
> --- a/tools/perf/bench/futex-lock-pi.c
> +++ b/tools/perf/bench/futex-lock-pi.c
> @@ -32,7 +32,6 @@ struct worker {
>  static u_int32_t global_futex = 0;
>  static struct worker *worker;
>  static bool done = false;
> -static int futex_flag = 0;
>  static pthread_mutex_t thread_lock;
>  static unsigned int threads_starting;
>  static struct stats throughput_stats;
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index 66747bfe22cf..4001312122be 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -36,7 +36,6 @@ static pthread_mutex_t thread_lock;
>  static pthread_cond_t thread_parent, thread_worker;
>  static struct stats requeuetime_stats, requeued_stats;
>  static unsigned int threads_starting;
> -static int futex_flag = 0;
>  
>  static struct bench_futex_parameters params = {
>  	/*
> diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
> index 958372ad159c..ea4fdea6e2f3 100644
> --- a/tools/perf/bench/futex-wake-parallel.c
> +++ b/tools/perf/bench/futex-wake-parallel.c
> @@ -53,7 +53,6 @@ static pthread_cond_t thread_parent, thread_worker;
>  static pthread_barrier_t barrier;
>  static struct stats waketime_stats, wakeup_stats;
>  static unsigned int threads_starting;
> -static int futex_flag = 0;
>  
>  static struct bench_futex_parameters params;
>  
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index 9ed4d65416f3..1cf651c8ee5c 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -37,7 +37,6 @@ static pthread_mutex_t thread_lock;
>  static pthread_cond_t thread_parent, thread_worker;
>  static struct stats waketime_stats, wakeup_stats;
>  static unsigned int threads_starting;
> -static int futex_flag = 0;
>  
>  static struct bench_futex_parameters params = {
>  	/*
> diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
> index 6f8b85b67348..f7cd22bbd677 100644
> --- a/tools/perf/bench/futex.h
> +++ b/tools/perf/bench/futex.h
> @@ -13,6 +13,9 @@
>  #include <sys/types.h>
>  #include <linux/futex.h>
>  
> +/* FUTEX_PRIVATE_FLAG or zero */
> +static int futex_flag = 0;
> +
>  struct bench_futex_parameters {
>  	bool silent;
>  	bool fshared;
> -- 
> 2.26.2
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/7] perf/bench-futex: Add --mlockall parameter
  2021-08-09  4:32 ` [PATCH 4/7] perf/bench-futex: Add --mlockall parameter Davidlohr Bueso
@ 2021-08-09 14:56   ` Arnaldo Carvalho de Melo
  2021-08-09 16:11     ` Davidlohr Bueso
  0 siblings, 1 reply; 15+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-08-09 14:56 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, Davidlohr Bueso

Em Sun, Aug 08, 2021 at 09:32:58PM -0700, Davidlohr Bueso escreveu:
> This adds, across all futex benchmarks, the -m/--mlockall option
> which is a common operation for realtime workloads by not incurring
> in page faults in paths that want determinism. As such, threads
> started after a call to mlockall(2) will generate page faults
> immediately since the new stack is immediately forced to memory,
> due to the MCL_FUTURE flag.

Applied.

At some point these options could be handled in a common
futex_parse_options() function that would consume argv[] and then the
specific futex benchmarks would continue from where the common function
left off.

- Arnaldo
 
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>  tools/perf/bench/futex-hash.c          | 7 +++++++
>  tools/perf/bench/futex-lock-pi.c       | 7 +++++++
>  tools/perf/bench/futex-requeue.c       | 7 +++++++
>  tools/perf/bench/futex-wake-parallel.c | 8 ++++++++
>  tools/perf/bench/futex-wake.c          | 8 ++++++++
>  tools/perf/bench/futex.h               | 1 +
>  6 files changed, 38 insertions(+)
> 
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index b71a34204b79..af9fbb409472 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -20,6 +20,7 @@
>  #include <linux/kernel.h>
>  #include <linux/zalloc.h>
>  #include <sys/time.h>
> +#include <sys/mman.h>
>  #include <perf/cpumap.h>
>  
>  #include "../util/stat.h"
> @@ -55,6 +56,7 @@ static const struct option options[] = {
>  	OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
>  	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
>  	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
> +	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
>  	OPT_END()
>  };
>  
> @@ -141,6 +143,11 @@ int bench_futex_hash(int argc, const char **argv)
>  	act.sa_sigaction = toggle_done;
>  	sigaction(SIGINT, &act, NULL);
>  
> +	if (params.mlockall) {
> +		if (mlockall(MCL_CURRENT | MCL_FUTURE))
> +			err(EXIT_FAILURE, "mlockall");
> +	}
> +
>  	if (!params.nthreads) /* default to the number of CPUs */
>  		params.nthreads = cpu->nr;
>  
> diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
> index bc208edf3de3..d2927d2eb3f7 100644
> --- a/tools/perf/bench/futex-lock-pi.c
> +++ b/tools/perf/bench/futex-lock-pi.c
> @@ -21,6 +21,7 @@
>  #include <err.h>
>  #include <stdlib.h>
>  #include <sys/time.h>
> +#include <sys/mman.h>
>  
>  struct worker {
>  	int tid;
> @@ -47,6 +48,7 @@ static const struct option options[] = {
>  	OPT_BOOLEAN( 'M', "multi",   &params.multi, "Use multiple futexes"),
>  	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
>  	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
> +	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
>  	OPT_END()
>  };
>  
> @@ -164,6 +166,11 @@ int bench_futex_lock_pi(int argc, const char **argv)
>  	act.sa_sigaction = toggle_done;
>  	sigaction(SIGINT, &act, NULL);
>  
> +	if (params.mlockall) {
> +		if (mlockall(MCL_CURRENT | MCL_FUTURE))
> +			err(EXIT_FAILURE, "mlockall");
> +	}
> +
>  	if (!params.nthreads)
>  		params.nthreads = cpu->nr;
>  
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index 4001312122be..88cb7e2a6729 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -27,6 +27,7 @@
>  #include <err.h>
>  #include <stdlib.h>
>  #include <sys/time.h>
> +#include <sys/mman.h>
>  
>  static u_int32_t futex1 = 0, futex2 = 0;
>  
> @@ -50,6 +51,7 @@ static const struct option options[] = {
>  	OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
>  	OPT_BOOLEAN( 's', "silent",   &params.silent, "Silent mode: do not display data/details"),
>  	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
> +	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
>  	OPT_END()
>  };
>  
> @@ -133,6 +135,11 @@ int bench_futex_requeue(int argc, const char **argv)
>  	act.sa_sigaction = toggle_done;
>  	sigaction(SIGINT, &act, NULL);
>  
> +	if (params.mlockall) {
> +		if (mlockall(MCL_CURRENT | MCL_FUTURE))
> +			err(EXIT_FAILURE, "mlockall");
> +	}
> +
>  	if (!params.nthreads)
>  		params.nthreads = cpu->nr;
>  
> diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
> index ea4fdea6e2f3..ef1f8237fd81 100644
> --- a/tools/perf/bench/futex-wake-parallel.c
> +++ b/tools/perf/bench/futex-wake-parallel.c
> @@ -34,6 +34,7 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe
>  #include <err.h>
>  #include <stdlib.h>
>  #include <sys/time.h>
> +#include <sys/mman.h>
>  
>  struct thread_data {
>  	pthread_t worker;
> @@ -61,6 +62,8 @@ static const struct option options[] = {
>  	OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
>  	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
>  	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
> +	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
> +
>  	OPT_END()
>  };
>  
> @@ -238,6 +241,11 @@ int bench_futex_wake_parallel(int argc, const char **argv)
>  	act.sa_sigaction = toggle_done;
>  	sigaction(SIGINT, &act, NULL);
>  
> +	if (params.mlockall) {
> +		if (mlockall(MCL_CURRENT | MCL_FUTURE))
> +			err(EXIT_FAILURE, "mlockall");
> +	}
> +
>  	cpu = perf_cpu_map__new(NULL);
>  	if (!cpu)
>  		err(EXIT_FAILURE, "calloc");
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index 1cf651c8ee5c..40e492c7996a 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -27,6 +27,7 @@
>  #include <err.h>
>  #include <stdlib.h>
>  #include <sys/time.h>
> +#include <sys/mman.h>
>  
>  /* all threads will block on the same futex */
>  static u_int32_t futex1 = 0;
> @@ -51,6 +52,8 @@ static const struct option options[] = {
>  	OPT_UINTEGER('w', "nwakes",  &params.nwakes, "Specify amount of threads to wake at once"),
>  	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
>  	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
> +	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
> +
>  	OPT_END()
>  };
>  
> @@ -141,6 +144,11 @@ int bench_futex_wake(int argc, const char **argv)
>  	act.sa_sigaction = toggle_done;
>  	sigaction(SIGINT, &act, NULL);
>  
> +	if (params.mlockall) {
> +		if (mlockall(MCL_CURRENT | MCL_FUTURE))
> +			err(EXIT_FAILURE, "mlockall");
> +	}
> +
>  	if (!params.nthreads)
>  		params.nthreads = cpu->nr;
>  
> diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
> index f7cd22bbd677..1c8fa469993f 100644
> --- a/tools/perf/bench/futex.h
> +++ b/tools/perf/bench/futex.h
> @@ -19,6 +19,7 @@ static int futex_flag = 0;
>  struct bench_futex_parameters {
>  	bool silent;
>  	bool fshared;
> +	bool mlockall;
>  	bool multi; /* lock-pi */
>  	unsigned int runtime; /* seconds*/
>  	unsigned int nthreads;
> -- 
> 2.26.2
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/7] perf/bench-futex, requeue: Add --broadcast option
  2021-08-09  4:32 ` [PATCH 5/7] perf/bench-futex, requeue: Add --broadcast option Davidlohr Bueso
@ 2021-08-09 14:57   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 15+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-08-09 14:57 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, Davidlohr Bueso

Em Sun, Aug 08, 2021 at 09:32:59PM -0700, Davidlohr Bueso escreveu:
> Such that all threads are requeued to uaddr2 in a single
> futex_cmp_requeue(), unlike the default, which is 1.

Thanks, applied.

- Arnaldo

 
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>  tools/perf/bench/futex-requeue.c | 4 ++++
>  tools/perf/bench/futex.h         | 1 +
>  2 files changed, 5 insertions(+)
> 
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index 88cb7e2a6729..80f40ee92b53 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -52,6 +52,7 @@ static const struct option options[] = {
>  	OPT_BOOLEAN( 's', "silent",   &params.silent, "Silent mode: do not display data/details"),
>  	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
>  	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
> +	OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
>  	OPT_END()
>  };
>  
> @@ -153,6 +154,9 @@ int bench_futex_requeue(int argc, const char **argv)
>  	if (params.nrequeue > params.nthreads)
>  		params.nrequeue = params.nthreads;
>  
> +	if (params.broadcast)
> +		params.nrequeue = params.nthreads;
> +
>  	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
>  	       "%d at a time.\n\n",  getpid(), params.nthreads,
>  	       params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue);
> diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
> index 1c8fa469993f..36f158650edf 100644
> --- a/tools/perf/bench/futex.h
> +++ b/tools/perf/bench/futex.h
> @@ -21,6 +21,7 @@ struct bench_futex_parameters {
>  	bool fshared;
>  	bool mlockall;
>  	bool multi; /* lock-pi */
> +	bool broadcast; /* requeue */
>  	unsigned int runtime; /* seconds*/
>  	unsigned int nthreads;
>  	unsigned int nfutexes;
> -- 
> 2.26.2
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 6/7] perf/bench-futex, requeue: Robustify futex_wait() handling
  2021-08-09  4:33 ` [PATCH 6/7] perf/bench-futex, requeue: Robustify futex_wait() handling Davidlohr Bueso
@ 2021-08-09 14:58   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 15+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-08-09 14:58 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, Davidlohr Bueso

Em Sun, Aug 08, 2021 at 09:33:00PM -0700, Davidlohr Bueso escreveu:
> Do not assume success and account for EAGAIN or any other return value,
> however unlikely.

Thanks, applied.

- Arnaldo

 
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>  tools/perf/bench/futex-requeue.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index 80f40ee92b53..e4892ba6864f 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -76,6 +76,8 @@ static void print_summary(void)
>  
>  static void *workerfn(void *arg __maybe_unused)
>  {
> +	int ret;
> +
>  	pthread_mutex_lock(&thread_lock);
>  	threads_starting--;
>  	if (!threads_starting)
> @@ -83,7 +85,18 @@ static void *workerfn(void *arg __maybe_unused)
>  	pthread_cond_wait(&thread_worker, &thread_lock);
>  	pthread_mutex_unlock(&thread_lock);
>  
> -	futex_wait(&futex1, 0, NULL, futex_flag);
> +	while (1) {
> +		ret = futex_wait(&futex1, 0, NULL, futex_flag);
> +		if (!ret)
> +			break;
> +
> +		if (ret && errno != EAGAIN) {
> +			if (!params.silent)
> +				warn("futex_wait");
> +			break;
> +		}
> +	}
> +
>  	return NULL;
>  }
>  
> -- 
> 2.26.2
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/7] perf/bench-futex, requeue: Add --pi parameter
  2021-08-09  4:33 ` [PATCH 7/7] perf/bench-futex, requeue: Add --pi parameter Davidlohr Bueso
@ 2021-08-09 15:01   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 15+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-08-09 15:01 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, Davidlohr Bueso

Em Sun, Aug 08, 2021 at 09:33:01PM -0700, Davidlohr Bueso escreveu:
> This extends the program to measure WAIT_REQUEUE_PI+CMP_REQUEUE_PI
> pairs, which are the underlying machinery behind priority-inheritance
> aware condition variables. The defaults are the same as with the regular
> non-pi version, requeueing one task at a time, with the exception that
> PI will always wakeup the first waiter.

Thanks, applied.

- Arnaldo

 
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>  tools/perf/bench/futex-requeue.c | 100 +++++++++++++++++++++++--------
>  tools/perf/bench/futex.h         |  37 +++++++++++-
>  2 files changed, 111 insertions(+), 26 deletions(-)
> 
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index e4892ba6864f..03a05814d45e 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -6,7 +6,8 @@
>   *                on futex2, N at a time.
>   *
>   * This program is particularly useful to measure the latency of nthread
> - * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
> + * requeues without waking up any tasks (in the non-pi case) -- thus
> + * mimicking a regular futex_wait.
>   */
>  
>  /* For the CLR_() macros */
> @@ -53,6 +54,8 @@ static const struct option options[] = {
>  	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
>  	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
>  	OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
> +	OPT_BOOLEAN( 'p', "pi", &params.pi, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),
> +
>  	OPT_END()
>  };
>  
> @@ -86,14 +89,30 @@ static void *workerfn(void *arg __maybe_unused)
>  	pthread_mutex_unlock(&thread_lock);
>  
>  	while (1) {
> -		ret = futex_wait(&futex1, 0, NULL, futex_flag);
> -		if (!ret)
> -			break;
> -
> -		if (ret && errno != EAGAIN) {
> -			if (!params.silent)
> -				warn("futex_wait");
> -			break;
> +		if (!params.pi) {
> +			ret = futex_wait(&futex1, 0, NULL, futex_flag);
> +			if (!ret)
> +				break;
> +
> +			if (ret && errno != EAGAIN) {
> +				if (!params.silent)
> +					warnx("futex_wait");
> +				break;
> +			}
> +		} else {
> +			ret = futex_wait_requeue_pi(&futex1, 0, &futex2,
> +						    NULL, futex_flag);
> +			if (!ret) {
> +				/* got the lock at futex2 */
> +				futex_unlock_pi(&futex2, futex_flag);
> +				break;
> +			}
> +
> +			if (ret && errno != EAGAIN) {
> +				if (!params.silent)
> +					warnx("futex_wait_requeue_pi");
> +				break;
> +			}
>  		}
>  	}
>  
> @@ -170,9 +189,10 @@ int bench_futex_requeue(int argc, const char **argv)
>  	if (params.broadcast)
>  		params.nrequeue = params.nthreads;
>  
> -	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
> +	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
>  	       "%d at a time.\n\n",  getpid(), params.nthreads,
> -	       params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue);
> +	       params.fshared ? "shared":"private", &futex1,
> +	       params.pi ? "PI ": "", &futex2, params.nrequeue);
>  
>  	init_stats(&requeued_stats);
>  	init_stats(&requeuetime_stats);
> @@ -182,7 +202,7 @@ int bench_futex_requeue(int argc, const char **argv)
>  	pthread_cond_init(&thread_worker, NULL);
>  
>  	for (j = 0; j < bench_repeat && !done; j++) {
> -		unsigned int nrequeued = 0;
> +		unsigned int nrequeued = 0, wakeups = 0;
>  		struct timeval start, end, runtime;
>  
>  		/* create, launch & block all threads */
> @@ -200,13 +220,30 @@ int bench_futex_requeue(int argc, const char **argv)
>  		/* Ok, all threads are patiently blocked, start requeueing */
>  		gettimeofday(&start, NULL);
>  		while (nrequeued < params.nthreads) {
> +			int r;
> +
>  			/*
> -			 * Do not wakeup any tasks blocked on futex1, allowing
> -			 * us to really measure futex_wait functionality.
> +			 * For the regular non-pi case, do not wakeup any tasks
> +			 * blocked on futex1, allowing us to really measure
> +			 * futex_wait functionality. For the PI case the first
> +			 * waiter is always awoken.
>  			 */
> -			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
> -						       params.nrequeue,
> -						       futex_flag);
> +			if (!params.pi) {
> +				r = futex_cmp_requeue(&futex1, 0, &futex2, 0,
> +						      params.nrequeue,
> +						      futex_flag);
> +			} else {
> +				r = futex_cmp_requeue_pi(&futex1, 0, &futex2,
> +							 params.nrequeue,
> +							 futex_flag);
> +				wakeups++; /* assume no error */
> +			}
> +
> +			if (r < 0)
> +				err(EXIT_FAILURE, "couldn't requeue from %p to %p",
> +				    &futex1, &futex2);
> +
> +			nrequeued += r;
>  		}
>  
>  		gettimeofday(&end, NULL);
> @@ -216,16 +253,29 @@ int bench_futex_requeue(int argc, const char **argv)
>  		update_stats(&requeuetime_stats, runtime.tv_usec);
>  
>  		if (!params.silent) {
> -			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
> -			       j + 1, nrequeued, params.nthreads,
> -			       runtime.tv_usec / (double)USEC_PER_MSEC);
> +			if (!params.pi)
> +				printf("[Run %d]: Requeued %d of %d threads in "
> +				       "%.4f ms\n", j + 1, nrequeued,
> +				       params.nthreads,
> +				       runtime.tv_usec / (double)USEC_PER_MSEC);
> +			else {
> +				nrequeued -= wakeups;
> +				printf("[Run %d]: Awoke and Requeued (%d+%d) of "
> +				       "%d threads in %.4f ms\n",
> +				       j + 1, wakeups, nrequeued,
> +				       params.nthreads,
> +				       runtime.tv_usec / (double)USEC_PER_MSEC);
> +			}
> +
>  		}
>  
> -		/* everybody should be blocked on futex2, wake'em up */
> -		nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
> -		if (params.nthreads != nrequeued)
> -			warnx("couldn't wakeup all tasks (%d/%d)",
> -			      nrequeued, params.nthreads);
> +		if (!params.pi) {
> +			/* everybody should be blocked on futex2, wake'em up */
> +			nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
> +			if (params.nthreads != nrequeued)
> +				warnx("couldn't wakeup all tasks (%d/%d)",
> +				      nrequeued, params.nthreads);
> +		}
>  
>  		for (i = 0; i < params.nthreads; i++) {
>  			ret = pthread_join(worker[i], NULL);
> diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
> index 36f158650edf..f0ad6d988349 100644
> --- a/tools/perf/bench/futex.h
> +++ b/tools/perf/bench/futex.h
> @@ -21,6 +21,7 @@ struct bench_futex_parameters {
>  	bool fshared;
>  	bool mlockall;
>  	bool multi; /* lock-pi */
> +	bool pi; /* requeue-pi */
>  	bool broadcast; /* requeue */
>  	unsigned int runtime; /* seconds*/
>  	unsigned int nthreads;
> @@ -93,7 +94,7 @@ futex_unlock_pi(u_int32_t *uaddr, int opflags)
>  /**
>  * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
>  * @nr_wake:        wake up to this many tasks
> -* @nr_requeue:        requeue up to this many tasks
> +* @nr_requeue:     requeue up to this many tasks
>  */
>  static inline int
>  futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
> @@ -102,4 +103,38 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
>  	return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
>  		 val, opflags);
>  }
> +
> +/**
> + * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
> + * @uaddr:	non-PI futex source
> + * @uaddr2:	PI futex target
> + *
> + * This is the first half of the requeue_pi mechanism. It shall always be
> + * paired with futex_cmp_requeue_pi().
> + */
> +static inline int
> +futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
> +		      struct timespec *timeout, int opflags)
> +{
> +	return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
> +		     opflags);
> +}
> +
> +/**
> + * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2
> + * @uaddr:	non-PI futex source
> + * @uaddr2:	PI futex target
> + * @nr_requeue:	requeue up to this many tasks
> + *
> + * This is the second half of the requeue_pi mechanism. It shall always be
> + * paired with futex_wait_requeue_pi(). The first waker is always awoken.
> + */
> +static inline int
> +futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
> +		     int nr_requeue, int opflags)
> +{
> +	return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2,
> +		     val, opflags);
> +}
> +
>  #endif /* _FUTEX_H */
> -- 
> 2.26.2
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/7] perf/bench-futex: Add --mlockall parameter
  2021-08-09 14:56   ` Arnaldo Carvalho de Melo
@ 2021-08-09 16:11     ` Davidlohr Bueso
  0 siblings, 0 replies; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09 16:11 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, Davidlohr Bueso

On Mon, 09 Aug 2021, Arnaldo Carvalho de Melo wrote:

>Applied.

Thanks!

>
>At some point these options could be handled in a common
>futex_parse_options() function that would consume argv[] and then the
>specific futex benchmarks would continue from where the common function
>left off.

I agree. I'll see about a follow up to this series in the future, there
are a few other things we can share among the programs.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/7] perf/bench-futex: Factor out futex_flag
  2021-08-09 14:54   ` Arnaldo Carvalho de Melo
@ 2021-08-09 16:30     ` Davidlohr Bueso
  0 siblings, 0 replies; 15+ messages in thread
From: Davidlohr Bueso @ 2021-08-09 16:30 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: mingo, peterz, tglx, linux-perf-users, linux-kernel, Davidlohr Bueso

On Mon, 09 Aug 2021, Arnaldo Carvalho de Melo wrote:
>Wouldn't be better to have it defined in one place and then in futex.h
>just have a 'extern int futex_flag;' declaration?

Sure.

>
>I applied the first two patches already.

Ok so I'm clear, this one just won't get picked up right now then.
(you don't want me to respin this patch then in a form v3, right?)

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-08-09 16:31 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-09  4:32 [PATCH -tip v2 0/7] perf/bench-futex: Misc updates Davidlohr Bueso
2021-08-09  4:32 ` [PATCH 1/7] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso
2021-08-09  4:32 ` [PATCH 2/7] perf/bench-futex: Remove bogus backslash from comment Davidlohr Bueso
2021-08-09  4:32 ` [PATCH 3/7] perf/bench-futex: Factor out futex_flag Davidlohr Bueso
2021-08-09 14:54   ` Arnaldo Carvalho de Melo
2021-08-09 16:30     ` Davidlohr Bueso
2021-08-09  4:32 ` [PATCH 4/7] perf/bench-futex: Add --mlockall parameter Davidlohr Bueso
2021-08-09 14:56   ` Arnaldo Carvalho de Melo
2021-08-09 16:11     ` Davidlohr Bueso
2021-08-09  4:32 ` [PATCH 5/7] perf/bench-futex, requeue: Add --broadcast option Davidlohr Bueso
2021-08-09 14:57   ` Arnaldo Carvalho de Melo
2021-08-09  4:33 ` [PATCH 6/7] perf/bench-futex, requeue: Robustify futex_wait() handling Davidlohr Bueso
2021-08-09 14:58   ` Arnaldo Carvalho de Melo
2021-08-09  4:33 ` [PATCH 7/7] perf/bench-futex, requeue: Add --pi parameter Davidlohr Bueso
2021-08-09 15:01   ` Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.