From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
To: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: acme@kernel.org, jolsa@kernel.org, disgoel@linux.vnet.ibm.com,
mpe@ellerman.id.au, linux-perf-users@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, maddy@linux.vnet.ibm.com,
rnsastry@linux.ibm.com, kjain@linux.ibm.com,
linux-kernel@vger.kernel.org, irogers@google.com
Subject: Re: [PATCH v2 3/4] tools/perf: Fix perf numa bench to fix usage of affinity for machines with #CPUs > 1K
Date: Fri, 8 Apr 2022 17:58:43 +0530 [thread overview]
Message-ID: <20220408122843.GG568950@linux.vnet.ibm.com> (raw)
In-Reply-To: <20220406175113.87881-4-atrajeev@linux.vnet.ibm.com>
* Athira Rajeev <atrajeev@linux.vnet.ibm.com> [2022-04-06 23:21:12]:
> perf bench numa testcase fails on systems with CPU's
> more than 1K.
>
> Testcase: perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp 1
> Snippet of code:
> <<>>
> perf: bench/numa.c:302: bind_to_node: Assertion `!(ret)' failed.
> Aborted (core dumped)
> <<>>
>
> bind_to_node function uses "sched_getaffinity" to save the original
> cpumask and this call is returning EINVAL ((invalid argument).
> This happens because the default mask size in glibc is 1024.
> To overcome this 1024 CPUs mask size limitation of cpu_set_t,
> change the mask size using the CPU_*_S macros ie, use CPU_ALLOC to
> allocate cpumask, CPU_ALLOC_SIZE for size. Apart from fixing this
> for "orig_mask", apply same logic to "mask" as well which is used to
> setaffinity so that mask size is large enough to represent number
> of possible CPU's in the system.
>
> sched_getaffinity is used in one more place in perf numa bench. It
> is in "bind_to_cpu" function. Apply the same logic there also. Though
> currently no failure is reported from there, it is ideal to change
> getaffinity to work with such system configurations having CPU's more
> than default mask size supported by glibc.
>
> Also fix "sched_setaffinity" to use mask size which is large enough
> to represent number of possible CPU's in the system.
>
> Fixed all places where "bind_cpumask" which is part of "struct
> thread_data" is used such that bind_cpumask works in all configuration.
>
> Tested-by: Disha Goel <disgoel@linux.vnet.ibm.com>
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Disha Goel <disgoel@linux.vnet.ibm.com>
Looks good to me.
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> ---
> tools/perf/bench/numa.c | 97 ++++++++++++++++++++++++++++++-----------
> 1 file changed, 71 insertions(+), 26 deletions(-)
>
> diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
> index f2640179ada9..29e41e32bd88 100644
> --- a/tools/perf/bench/numa.c
> +++ b/tools/perf/bench/numa.c
> @@ -54,7 +54,7 @@
>
> struct thread_data {
> int curr_cpu;
> - cpu_set_t bind_cpumask;
> + cpu_set_t *bind_cpumask;
> int bind_node;
> u8 *process_data;
> int process_nr;
> @@ -266,46 +266,71 @@ static bool node_has_cpus(int node)
> return ret;
> }
>
> -static cpu_set_t bind_to_cpu(int target_cpu)
> +static cpu_set_t *bind_to_cpu(int target_cpu)
> {
> - cpu_set_t orig_mask, mask;
> + int nrcpus = numa_num_possible_cpus();
> + cpu_set_t *orig_mask, *mask;
> + size_t size;
> int ret;
>
> - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
> - BUG_ON(ret);
> + orig_mask = CPU_ALLOC(nrcpus);
> + BUG_ON(!orig_mask);
> + size = CPU_ALLOC_SIZE(nrcpus);
> + CPU_ZERO_S(size, orig_mask);
> +
> + ret = sched_getaffinity(0, size, orig_mask);
> + if (ret) {
> + CPU_FREE(orig_mask);
> + BUG_ON(ret);
> + }
>
> - CPU_ZERO(&mask);
> + mask = CPU_ALLOC(nrcpus);
> + BUG_ON(!mask);
> + CPU_ZERO_S(size, mask);
>
> if (target_cpu == -1) {
> int cpu;
>
> for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
> - CPU_SET(cpu, &mask);
> + CPU_SET_S(cpu, size, mask);
> } else {
> BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
> - CPU_SET(target_cpu, &mask);
> + CPU_SET_S(target_cpu, size, mask);
> }
>
> - ret = sched_setaffinity(0, sizeof(mask), &mask);
> + ret = sched_setaffinity(0, size, mask);
> + CPU_FREE(mask);
> BUG_ON(ret);
>
> return orig_mask;
> }
>
> -static cpu_set_t bind_to_node(int target_node)
> +static cpu_set_t *bind_to_node(int target_node)
> {
> - cpu_set_t orig_mask, mask;
> + int nrcpus = numa_num_possible_cpus();
> + cpu_set_t *orig_mask, *mask;
> + size_t size;
> int cpu;
> int ret;
>
> - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
> - BUG_ON(ret);
> + orig_mask = CPU_ALLOC(nrcpus);
> + BUG_ON(!orig_mask);
> + size = CPU_ALLOC_SIZE(nrcpus);
> + CPU_ZERO_S(size, orig_mask);
> +
> + ret = sched_getaffinity(0, size, orig_mask);
> + if (ret) {
> + CPU_FREE(orig_mask);
> + BUG_ON(ret);
> + }
>
> - CPU_ZERO(&mask);
> + mask = CPU_ALLOC(nrcpus);
> + BUG_ON(!mask);
> + CPU_ZERO_S(size, mask);
>
> if (target_node == NUMA_NO_NODE) {
> for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
> - CPU_SET(cpu, &mask);
> + CPU_SET_S(cpu, size, mask);
> } else {
> struct bitmask *cpumask = numa_allocate_cpumask();
>
> @@ -313,24 +338,29 @@ static cpu_set_t bind_to_node(int target_node)
> if (!numa_node_to_cpus(target_node, cpumask)) {
> for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
> if (numa_bitmask_isbitset(cpumask, cpu))
> - CPU_SET(cpu, &mask);
> + CPU_SET_S(cpu, size, mask);
> }
> }
> numa_free_cpumask(cpumask);
> }
>
> - ret = sched_setaffinity(0, sizeof(mask), &mask);
> + ret = sched_setaffinity(0, size, mask);
> + CPU_FREE(mask);
> BUG_ON(ret);
>
> return orig_mask;
> }
>
> -static void bind_to_cpumask(cpu_set_t mask)
> +static void bind_to_cpumask(cpu_set_t *mask)
> {
> int ret;
> + size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
>
> - ret = sched_setaffinity(0, sizeof(mask), &mask);
> - BUG_ON(ret);
> + ret = sched_setaffinity(0, size, mask);
> + if (ret) {
> + CPU_FREE(mask);
> + BUG_ON(ret);
> + }
> }
>
> static void mempol_restore(void)
> @@ -376,7 +406,7 @@ do { \
> static u8 *alloc_data(ssize_t bytes0, int map_flags,
> int init_zero, int init_cpu0, int thp, int init_random)
> {
> - cpu_set_t orig_mask;
> + cpu_set_t *orig_mask;
> ssize_t bytes;
> u8 *buf;
> int ret;
> @@ -434,6 +464,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
> /* Restore affinity: */
> if (init_cpu0) {
> bind_to_cpumask(orig_mask);
> + CPU_FREE(orig_mask);
> mempol_restore();
> }
>
> @@ -589,6 +620,7 @@ static int parse_setup_cpu_list(void)
> BUG_ON(bind_cpu_0 > bind_cpu_1);
>
> for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
> + size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
> int i;
>
> for (i = 0; i < mul; i++) {
> @@ -608,10 +640,12 @@ static int parse_setup_cpu_list(void)
> tprintf("%2d", bind_cpu);
> }
>
> - CPU_ZERO(&td->bind_cpumask);
> + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
> + BUG_ON(!td->bind_cpumask);
> + CPU_ZERO_S(size, td->bind_cpumask);
> for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
> BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
> - CPU_SET(cpu, &td->bind_cpumask);
> + CPU_SET_S(cpu, size, td->bind_cpumask);
> }
> t++;
> }
> @@ -1241,7 +1275,7 @@ static void *worker_thread(void *__tdata)
> * by migrating to CPU#0:
> */
> if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
> - cpu_set_t orig_mask;
> + cpu_set_t *orig_mask;
> int target_cpu;
> int this_cpu;
>
> @@ -1265,6 +1299,7 @@ static void *worker_thread(void *__tdata)
> printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
>
> bind_to_cpumask(orig_mask);
> + CPU_FREE(orig_mask);
> }
>
> if (details >= 3) {
> @@ -1398,21 +1433,31 @@ static void init_thread_data(void)
>
> for (t = 0; t < g->p.nr_tasks; t++) {
> struct thread_data *td = g->threads + t;
> + size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
> int cpu;
>
> /* Allow all nodes by default: */
> td->bind_node = NUMA_NO_NODE;
>
> /* Allow all CPUs by default: */
> - CPU_ZERO(&td->bind_cpumask);
> + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
> + BUG_ON(!td->bind_cpumask);
> + CPU_ZERO_S(cpuset_size, td->bind_cpumask);
> for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
> - CPU_SET(cpu, &td->bind_cpumask);
> + CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
> }
> }
>
> static void deinit_thread_data(void)
> {
> ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
> + int t;
> +
> + /* Free the bind_cpumask allocated for thread_data */
> + for (t = 0; t < g->p.nr_tasks; t++) {
> + struct thread_data *td = g->threads + t;
> + CPU_FREE(td->bind_cpumask);
> + }
>
> free_data(g->threads, size);
> }
> --
> 2.35.1
>
next prev parent reply other threads:[~2022-04-08 12:29 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-06 17:51 [PATCH v2 0/4] Fix perf bench numa, futex and epoll to work with machines having #CPUs > 1K Athira Rajeev
2022-04-06 17:51 ` [PATCH v2 1/4] tools/perf: Fix perf bench futex to correct usage of affinity for machines with " Athira Rajeev
2022-04-08 12:27 ` Srikar Dronamraju
2022-04-06 17:51 ` [PATCH v2 2/4] tools/perf: Fix perf bench epoll " Athira Rajeev
2022-04-06 17:51 ` [PATCH v2 3/4] tools/perf: Fix perf numa bench to fix " Athira Rajeev
2022-04-08 12:28 ` Srikar Dronamraju [this message]
2022-04-06 17:51 ` [PATCH v2 4/4] tools/perf: Fix perf bench numa testcase to check if CPU used to bind task is online Athira Rajeev
2022-04-08 12:26 ` Srikar Dronamraju
2022-04-09 6:29 ` Athira Rajeev
2022-04-09 15:20 ` Arnaldo Carvalho de Melo
2022-04-11 13:54 ` Athira Rajeev
2022-04-07 0:35 ` [PATCH v2 0/4] Fix perf bench numa, futex and epoll to work with machines having #CPUs > 1K Ian Rogers
2022-04-07 4:31 ` Athira Rajeev
2022-04-09 15:28 ` Arnaldo Carvalho de Melo
2022-04-09 17:18 ` Arnaldo Carvalho de Melo
2022-04-12 16:33 ` Athira Rajeev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220408122843.GG568950@linux.vnet.ibm.com \
--to=srikar@linux.vnet.ibm.com \
--cc=acme@kernel.org \
--cc=atrajeev@linux.vnet.ibm.com \
--cc=disgoel@linux.vnet.ibm.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=kjain@linux.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=maddy@linux.vnet.ibm.com \
--cc=mpe@ellerman.id.au \
--cc=rnsastry@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).