From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757359AbcEEUqU (ORCPT ); Thu, 5 May 2016 16:46:20 -0400 Received: from www.linutronix.de ([62.245.132.108]:56801 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757056AbcEEUqB (ORCPT ); Thu, 5 May 2016 16:46:01 -0400 Message-Id: <20160505204354.224426158@linutronix.de> User-Agent: quilt/0.63-1 Date: Thu, 05 May 2016 20:44:06 -0000 From: Thomas Gleixner To: LKML Cc: Sebastian Andrzej Siewior , Linus Torvalds , Darren Hart , Peter Zijlstra , Ingo Molnar , Michael Kerrisk , Davidlohr Bueso , Chris Mason , "Carlos O'Donell" , Torvald Riegel , Eric Dumazet Subject: [patch V2 5/7] perf/bench/futex-hash: Support NUMA References: <20160505204230.932454245@linutronix.de> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-15 Content-Disposition: inline; filename=perf-bench-futex-hash-Support-NUMA.patch X-Linutronix-Spam-Score: -1.0 X-Linutronix-Spam-Level: - X-Linutronix-Spam-Status: No , -1.0 points, 5.0 required, ALL_TRUSTED=-1,SHORTCIRCUIT=-0.0001,URIBL_BLOCKED=0.001 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This adds a new option to tell perf on which numa node the hash benchmark should run. If set then - The test is bound to the node - Memory is allocated on the local NUMA node - The threads are bound to the cpus on the node The NUMA node can be specified by the -n argument. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner --- tools/perf/bench/Build | 4 ++ tools/perf/bench/futex-hash.c | 89 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 60bf119..9e6e518 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,3 +1,7 @@ +ifdef CONFIG_NUMA +CFLAGS_futex-hash.o += -DCONFIG_NUMA=1 +endif + perf-y += sched-messaging.o perf-y += sched-pipe.o perf-y += mem-functions.o diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 0999ac5..a1c6ee9 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -20,6 +20,9 @@ #include #include #include +#ifdef CONFIG_NUMA +#include +#endif static unsigned int nthreads = 0; static unsigned int nsecs = 10; @@ -27,6 +30,7 @@ static unsigned int nsecs = 10; static unsigned int nfutexes = 1024; static bool fshared = false, done = false, silent = false; static int futex_flag = 0; +static int numa_node = -1; struct timeval start, end, runtime; static pthread_mutex_t thread_lock; @@ -39,7 +43,7 @@ struct worker { u_int32_t *futex; pthread_t thread; unsigned long ops; -}; +} __attribute__((aligned(128))); static const struct option options[] = { OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), @@ -47,9 +51,28 @@ static const struct option options[] = { OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), +#ifdef CONFIG_NUMA + OPT_INTEGER( 'n', "numa", &numa_node, "Specify the NUMA node"), +#endif OPT_END() }; +#ifndef CONFIG_NUMA +static int numa_run_on_node(int node __maybe_unused) { return 0; } +static int numa_node_of_cpu(int node __maybe_unused) { return 0; } +static void *numa_alloc_local(size_t size) { return malloc(size); } +static void numa_free(void *p, size_t size __maybe_unused) { return free(p); } +#endif + +static bool cpu_is_local(int cpu) +{ + if (numa_node < 0) + return true; + if (numa_node_of_cpu(cpu) == numa_node) + return true; + return false; +} + static const char * const bench_futex_hash_usage[] = { "perf bench futex hash ", NULL @@ -115,6 +138,8 @@ int bench_futex_hash(int argc, const char **argv, unsigned int i, ncpus; pthread_attr_t thread_attr; struct worker *worker = NULL; + char *node_str = NULL; + unsigned int cpunum; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -128,18 +153,50 @@ int bench_futex_hash(int argc, const char **argv, act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - if (!nthreads) /* default to the number of CPUs */ - nthreads = ncpus; + if (!nthreads) { + /* default to the number of CPUs per NUMA node */ + if (numa_node < 0) { + nthreads = ncpus; + } else { + for (i = 0; i < ncpus; i++) { + if (cpu_is_local(i)) + nthreads++; + } + if (!nthreads) + err(EXIT_FAILURE, "No online CPUs for this node"); + } + } else { + int cpu_available = 0; + + for (i = 0; i < ncpus && !cpu_available; i++) { + if (cpu_is_local(i)) + cpu_available = 1; + } + if (!cpu_available) + err(EXIT_FAILURE, "No online CPUs for this node"); + } + + if (numa_node >= 0) { + ret = numa_run_on_node(numa_node); + if (ret < 0) + err(EXIT_FAILURE, "numa_run_on_node"); + ret = asprintf(&node_str, " on node %d", numa_node); + if (ret < 0) + err(EXIT_FAILURE, "numa_node, asprintf"); + } - worker = calloc(nthreads, sizeof(*worker)); + worker = numa_alloc_local(nthreads * sizeof(*worker)); if (!worker) goto errmem; if (!fshared) futex_flag = FUTEX_PRIVATE_FLAG; - printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", - getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); + printf("Run summary [PID %d]: %d threads%s, each operating on %d [%s] futexes for %d secs.\n\n", + getpid(), nthreads, + node_str ? : "", + nfutexes, fshared ? "shared":"private", + nsecs); init_stats(&throughput_stats); pthread_mutex_init(&thread_lock, NULL); @@ -149,14 +206,24 @@ int bench_futex_hash(int argc, const char **argv, threads_starting = nthreads; pthread_attr_init(&thread_attr); gettimeofday(&start, NULL); - for (i = 0; i < nthreads; i++) { + for (cpunum = 0, i = 0; i < nthreads; i++, cpunum++) { + + do { + if (cpu_is_local(cpunum)) + break; + cpunum++; + if (cpunum > ncpus) + cpunum = 0; + } while (1); + worker[i].tid = i; - worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); + worker[i].futex = numa_alloc_local(nfutexes * + sizeof(*worker[i].futex)); if (!worker[i].futex) goto errmem; CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_SET(cpunum % ncpus, &cpu); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); if (ret) @@ -203,12 +270,12 @@ int bench_futex_hash(int argc, const char **argv, &worker[i].futex[nfutexes-1], t); } - free(worker[i].futex); + numa_free(worker[i].futex, nfutexes * sizeof(*worker[i].futex)); } print_summary(); - free(worker); + numa_free(worker, nthreads * sizeof(*worker)); return ret; errmem: err(EXIT_FAILURE, "calloc"); -- 2.1.4