All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] perf record: Fix per-thread option.
@ 2022-04-14  1:46 Ian Rogers
  2022-04-14  1:46 ` [PATCH v2 2/3] perf cpumap: Switch to using perf_cpu_map API Ian Rogers
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Ian Rogers @ 2022-04-14  1:46 UTC (permalink / raw)
  To: Alexey Bayduraev, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Ian Rogers, Alexey Bayduraev,
	Andi Kleen, Riccardo Mancini, linux-kernel, linux-perf-users
  Cc: Stephane Eranian

From: Alexey Bayduraev <alexey.bayduraev@gmail.com>

Per-thread mode doesn't have specific CPUs for events, add checks for
this case.

Minor fix to a pr_debug by Ian Rogers <irogers@google.com> to avoid an
out of bound array access.

Reported-by: Ian Rogers <irogers@google.com>
Fixes: 7954f71689f9 ("perf record: Introduce thread affinity and mmap masks")
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Alexey Bayduraev <alexey.bayduraev@gmail.com>
---
 tools/perf/builtin-record.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ba74fab02e62..069825c48d40 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
 	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
 
-	thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
-					      thread_data->mask->maps.nbits);
+	if (cpu_map__is_dummy(cpus))
+		thread_data->nr_mmaps = nr_mmaps;
+	else
+		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
+						      thread_data->mask->maps.nbits);
 	if (mmap) {
 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
 		if (!thread_data->maps)
@@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
 
 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
-		if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
+		if (cpu_map__is_dummy(cpus) ||
+		    test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
 			if (thread_data->maps) {
 				thread_data->maps[tm] = &mmap[m];
 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
-					  thread_data, cpus->map[m].cpu, tm, m);
+					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
 			}
 			if (thread_data->overwrite_maps) {
 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
-					  thread_data, cpus->map[m].cpu, tm, m);
+					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
 			}
 			tm++;
 		}
@@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
 {
 	int c;
 
+	if (cpu_map__is_dummy(cpus))
+		return;
+
 	for (c = 0; c < cpus->nr; c++)
 		set_bit(cpus->map[c].cpu, mask->bits);
 }
@@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec)
 	if (!record__threads_enabled(rec))
 		return record__init_thread_default_masks(rec, cpus);
 
+	if (cpu_map__is_dummy(cpus)) {
+		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
+		return -EINVAL;
+	}
+
 	switch (rec->opts.threads_spec) {
 	case THREAD_SPEC__CPU:
 		ret = record__init_thread_cpu_masks(rec, cpus);
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/3] perf cpumap: Switch to using perf_cpu_map API
  2022-04-14  1:46 [PATCH v2 1/3] perf record: Fix per-thread option Ian Rogers
@ 2022-04-14  1:46 ` Ian Rogers
  2022-04-14 12:09   ` Arnaldo Carvalho de Melo
  2022-04-14  1:46 ` [PATCH v2 3/3] perf test: Add basic perf record tests Ian Rogers
  2022-04-14 12:09 ` [PATCH v2 1/3] perf record: Fix per-thread option Arnaldo Carvalho de Melo
  2 siblings, 1 reply; 6+ messages in thread
From: Ian Rogers @ 2022-04-14  1:46 UTC (permalink / raw)
  To: Alexey Bayduraev, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Ian Rogers, Alexey Bayduraev,
	Andi Kleen, Riccardo Mancini, linux-kernel, linux-perf-users
  Cc: Stephane Eranian

Switch some raw accesses to the cpu map to using the library API. This
can help with reference count checking.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-record.c          | 13 ++++++------
 tools/perf/util/bpf_counter_cgroup.c | 31 ++++++++++++++--------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 069825c48d40..a5cf6a99d67f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1011,7 +1011,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
 
 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
 		if (cpu_map__is_dummy(cpus) ||
-		    test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
+		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
 			if (thread_data->maps) {
 				thread_data->maps[tm] = &mmap[m];
 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
@@ -3331,13 +3331,14 @@ struct option *record_options = __record_options;
 
 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
 {
-	int c;
+	struct perf_cpu cpu;
+	int idx;
 
 	if (cpu_map__is_dummy(cpus))
 		return;
 
-	for (c = 0; c < cpus->nr; c++)
-		set_bit(cpus->map[c].cpu, mask->bits);
+	perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+		set_bit(cpu.cpu, mask->bits);
 }
 
 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
@@ -3404,8 +3405,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map
 	pr_debug("nr_threads: %d\n", rec->nr_threads);
 
 	for (t = 0; t < rec->nr_threads; t++) {
-		set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits);
-		set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits);
+		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
+		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
 		if (verbose) {
 			pr_debug("thread_masks[%d]: ", t);
 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index ac60c08e8e2a..a4b676920da0 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -47,7 +47,7 @@ static int bperf_load_program(struct evlist *evlist)
 	struct evsel *evsel;
 	struct cgroup *cgrp, *leader_cgrp;
 	__u32 i, cpu;
-	__u32 nr_cpus = evlist->core.all_cpus->nr;
+	__u32 nr_cpus = perf_cpu_map__nr(evlist->core.all_cpus);
 	int total_cpus = cpu__max_cpu().cpu;
 	int map_size, map_fd;
 	int prog_fd, err;
@@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist)
 			for (cpu = 0; cpu < nr_cpus; cpu++) {
 				int fd = FD(evsel, cpu);
 				__u32 idx = evsel->core.idx * total_cpus +
-					evlist->core.all_cpus->map[cpu].cpu;
+					perf_cpu_map__cpu(evlist->core.all_cpus, cpu).cpu;
 
 				err = bpf_map_update_elem(map_fd, &idx, &fd,
 							  BPF_ANY);
@@ -207,13 +207,13 @@ static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
  */
 static int bperf_cgrp__sync_counters(struct evlist *evlist)
 {
-	int i, cpu;
-	int nr_cpus = evlist->core.all_cpus->nr;
+	struct perf_cpu cpu;
+	int idx;
 	int prog_fd = bpf_program__fd(skel->progs.trigger_read);
 
-	for (i = 0; i < nr_cpus; i++) {
-		cpu = evlist->core.all_cpus->map[i].cpu;
-		bperf_trigger_reading(prog_fd, cpu);
+	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+		cpu = perf_cpu_map__cpu(evlist->core.all_cpus, i);
+		bperf_trigger_reading(prog_fd, cpu.cpu);
 	}
 
 	return 0;
@@ -244,12 +244,10 @@ static int bperf_cgrp__disable(struct evsel *evsel)
 static int bperf_cgrp__read(struct evsel *evsel)
 {
 	struct evlist *evlist = evsel->evlist;
-	int i, cpu, nr_cpus = evlist->core.all_cpus->nr;
 	int total_cpus = cpu__max_cpu().cpu;
 	struct perf_counts_values *counts;
 	struct bpf_perf_event_value *values;
 	int reading_map_fd, err = 0;
-	__u32 idx;
 
 	if (evsel->core.idx)
 		return 0;
@@ -263,7 +261,10 @@ static int bperf_cgrp__read(struct evsel *evsel)
 	reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings);
 
 	evlist__for_each_entry(evlist, evsel) {
-		idx = evsel->core.idx;
+		__u32 idx = evsel->core.idx;
+		int i;
+		struct perf_cpu_map cpu;
+
 		err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
 		if (err) {
 			pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
@@ -271,13 +272,11 @@ static int bperf_cgrp__read(struct evsel *evsel)
 			goto out;
 		}
 
-		for (i = 0; i < nr_cpus; i++) {
-			cpu = evlist->core.all_cpus->map[i].cpu;
-
+		perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpu) {
 			counts = perf_counts(evsel->counts, i, 0);
-			counts->val = values[cpu].counter;
-			counts->ena = values[cpu].enabled;
-			counts->run = values[cpu].running;
+			counts->val = values[cpu.cpu].counter;
+			counts->ena = values[cpu.cpu].enabled;
+			counts->run = values[cpu.cpu].running;
 		}
 	}
 
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 3/3] perf test: Add basic perf record tests
  2022-04-14  1:46 [PATCH v2 1/3] perf record: Fix per-thread option Ian Rogers
  2022-04-14  1:46 ` [PATCH v2 2/3] perf cpumap: Switch to using perf_cpu_map API Ian Rogers
@ 2022-04-14  1:46 ` Ian Rogers
  2022-04-14 12:10   ` Arnaldo Carvalho de Melo
  2022-04-14 12:09 ` [PATCH v2 1/3] perf record: Fix per-thread option Arnaldo Carvalho de Melo
  2 siblings, 1 reply; 6+ messages in thread
From: Ian Rogers @ 2022-04-14  1:46 UTC (permalink / raw)
  To: Alexey Bayduraev, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Ian Rogers, Alexey Bayduraev,
	Andi Kleen, Riccardo Mancini, linux-kernel, linux-perf-users
  Cc: Stephane Eranian

Test the --per-thread flag.
Test Intel machine state capturing.

Suggested-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/tests/shell/record.sh | 42 ++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100755 tools/perf/tests/shell/record.sh

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
new file mode 100755
index 000000000000..cd1cf14259b8
--- /dev/null
+++ b/tools/perf/tests/shell/record.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# perf record tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+test_per_thread() {
+  echo "Basic --per-thread mode test"
+  perf record -e instructions:u --per-thread -o- true 2> /dev/null \
+    | perf report -i- -q \
+    | egrep -q true
+  echo "Basic --per-thread mode test [Success]"
+}
+
+test_register_capture() {
+  echo "Register capture test"
+  if ! perf list | egrep -q 'br_inst_retired.near_call'
+  then
+    echo "Register capture test [Skipped missing instruction]"
+    return
+  fi
+  if ! perf record --intr-regs=\? 2>&1 | egrep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
+  then
+    echo "Register capture test [Skipped missing registers]"
+    return
+  fi
+  if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \
+    -c 1000 --per-thread true 2> /dev/null \
+    | perf script -F ip,sym,iregs -i - 2> /dev/null \
+    | egrep -q "DI:"
+  then
+    echo "Register capture test [Failed missing output]"
+    err=1
+    return
+  fi
+  echo "Register capture test [Success]"
+}
+
+test_per_thread
+test_register_capture
+exit $err
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/3] perf record: Fix per-thread option.
  2022-04-14  1:46 [PATCH v2 1/3] perf record: Fix per-thread option Ian Rogers
  2022-04-14  1:46 ` [PATCH v2 2/3] perf cpumap: Switch to using perf_cpu_map API Ian Rogers
  2022-04-14  1:46 ` [PATCH v2 3/3] perf test: Add basic perf record tests Ian Rogers
@ 2022-04-14 12:09 ` Arnaldo Carvalho de Melo
  2 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-04-14 12:09 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Alexey Bayduraev, Peter Zijlstra, Ingo Molnar, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, Alexey Bayduraev,
	Andi Kleen, Riccardo Mancini, linux-kernel, linux-perf-users,
	Stephane Eranian

Em Wed, Apr 13, 2022 at 06:46:40PM -0700, Ian Rogers escreveu:
> From: Alexey Bayduraev <alexey.bayduraev@gmail.com>
> 
> Per-thread mode doesn't have specific CPUs for events, add checks for
> this case.
> 
> Minor fix to a pr_debug by Ian Rogers <irogers@google.com> to avoid an
> out of bound array access.

Thanks, applied to perf/urgent.

- Arnaldo

 
> Reported-by: Ian Rogers <irogers@google.com>
> Fixes: 7954f71689f9 ("perf record: Introduce thread affinity and mmap masks")
> Signed-off-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Alexey Bayduraev <alexey.bayduraev@gmail.com>
> ---
>  tools/perf/builtin-record.c | 22 +++++++++++++++++-----
>  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index ba74fab02e62..069825c48d40 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
>  	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
>  	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
>  
> -	thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
> -					      thread_data->mask->maps.nbits);
> +	if (cpu_map__is_dummy(cpus))
> +		thread_data->nr_mmaps = nr_mmaps;
> +	else
> +		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
> +						      thread_data->mask->maps.nbits);
>  	if (mmap) {
>  		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
>  		if (!thread_data->maps)
> @@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
>  		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
>  
>  	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
> -		if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
> +		if (cpu_map__is_dummy(cpus) ||
> +		    test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
>  			if (thread_data->maps) {
>  				thread_data->maps[tm] = &mmap[m];
>  				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
> -					  thread_data, cpus->map[m].cpu, tm, m);
> +					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
>  			}
>  			if (thread_data->overwrite_maps) {
>  				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
>  				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
> -					  thread_data, cpus->map[m].cpu, tm, m);
> +					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
>  			}
>  			tm++;
>  		}
> @@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
>  {
>  	int c;
>  
> +	if (cpu_map__is_dummy(cpus))
> +		return;
> +
>  	for (c = 0; c < cpus->nr; c++)
>  		set_bit(cpus->map[c].cpu, mask->bits);
>  }
> @@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec)
>  	if (!record__threads_enabled(rec))
>  		return record__init_thread_default_masks(rec, cpus);
>  
> +	if (cpu_map__is_dummy(cpus)) {
> +		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
> +		return -EINVAL;
> +	}
> +
>  	switch (rec->opts.threads_spec) {
>  	case THREAD_SPEC__CPU:
>  		ret = record__init_thread_cpu_masks(rec, cpus);
> -- 
> 2.36.0.rc0.470.gd361397f0d-goog

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/3] perf cpumap: Switch to using perf_cpu_map API
  2022-04-14  1:46 ` [PATCH v2 2/3] perf cpumap: Switch to using perf_cpu_map API Ian Rogers
@ 2022-04-14 12:09   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-04-14 12:09 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Alexey Bayduraev, Peter Zijlstra, Ingo Molnar, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, Alexey Bayduraev,
	Andi Kleen, Riccardo Mancini, linux-kernel, linux-perf-users,
	Stephane Eranian

Em Wed, Apr 13, 2022 at 06:46:41PM -0700, Ian Rogers escreveu:
> Switch some raw accesses to the cpu map to using the library API. This
> can help with reference count checking.

Thanks, applying to perf/core after what is in perf/urgent, which this
patch depends on, gets merged there.

- Arnaldo
 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-record.c          | 13 ++++++------
>  tools/perf/util/bpf_counter_cgroup.c | 31 ++++++++++++++--------------
>  2 files changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 069825c48d40..a5cf6a99d67f 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1011,7 +1011,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
>  
>  	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
>  		if (cpu_map__is_dummy(cpus) ||
> -		    test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
> +		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
>  			if (thread_data->maps) {
>  				thread_data->maps[tm] = &mmap[m];
>  				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
> @@ -3331,13 +3331,14 @@ struct option *record_options = __record_options;
>  
>  static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
>  {
> -	int c;
> +	struct perf_cpu cpu;
> +	int idx;
>  
>  	if (cpu_map__is_dummy(cpus))
>  		return;
>  
> -	for (c = 0; c < cpus->nr; c++)
> -		set_bit(cpus->map[c].cpu, mask->bits);
> +	perf_cpu_map__for_each_cpu(cpu, idx, cpus)
> +		set_bit(cpu.cpu, mask->bits);
>  }
>  
>  static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
> @@ -3404,8 +3405,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map
>  	pr_debug("nr_threads: %d\n", rec->nr_threads);
>  
>  	for (t = 0; t < rec->nr_threads; t++) {
> -		set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits);
> -		set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits);
> +		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
> +		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
>  		if (verbose) {
>  			pr_debug("thread_masks[%d]: ", t);
>  			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
> diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
> index ac60c08e8e2a..a4b676920da0 100644
> --- a/tools/perf/util/bpf_counter_cgroup.c
> +++ b/tools/perf/util/bpf_counter_cgroup.c
> @@ -47,7 +47,7 @@ static int bperf_load_program(struct evlist *evlist)
>  	struct evsel *evsel;
>  	struct cgroup *cgrp, *leader_cgrp;
>  	__u32 i, cpu;
> -	__u32 nr_cpus = evlist->core.all_cpus->nr;
> +	__u32 nr_cpus = perf_cpu_map__nr(evlist->core.all_cpus);
>  	int total_cpus = cpu__max_cpu().cpu;
>  	int map_size, map_fd;
>  	int prog_fd, err;
> @@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist)
>  			for (cpu = 0; cpu < nr_cpus; cpu++) {
>  				int fd = FD(evsel, cpu);
>  				__u32 idx = evsel->core.idx * total_cpus +
> -					evlist->core.all_cpus->map[cpu].cpu;
> +					perf_cpu_map__cpu(evlist->core.all_cpus, cpu).cpu;
>  
>  				err = bpf_map_update_elem(map_fd, &idx, &fd,
>  							  BPF_ANY);
> @@ -207,13 +207,13 @@ static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
>   */
>  static int bperf_cgrp__sync_counters(struct evlist *evlist)
>  {
> -	int i, cpu;
> -	int nr_cpus = evlist->core.all_cpus->nr;
> +	struct perf_cpu cpu;
> +	int idx;
>  	int prog_fd = bpf_program__fd(skel->progs.trigger_read);
>  
> -	for (i = 0; i < nr_cpus; i++) {
> -		cpu = evlist->core.all_cpus->map[i].cpu;
> -		bperf_trigger_reading(prog_fd, cpu);
> +	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
> +		cpu = perf_cpu_map__cpu(evlist->core.all_cpus, i);
> +		bperf_trigger_reading(prog_fd, cpu.cpu);
>  	}
>  
>  	return 0;
> @@ -244,12 +244,10 @@ static int bperf_cgrp__disable(struct evsel *evsel)
>  static int bperf_cgrp__read(struct evsel *evsel)
>  {
>  	struct evlist *evlist = evsel->evlist;
> -	int i, cpu, nr_cpus = evlist->core.all_cpus->nr;
>  	int total_cpus = cpu__max_cpu().cpu;
>  	struct perf_counts_values *counts;
>  	struct bpf_perf_event_value *values;
>  	int reading_map_fd, err = 0;
> -	__u32 idx;
>  
>  	if (evsel->core.idx)
>  		return 0;
> @@ -263,7 +261,10 @@ static int bperf_cgrp__read(struct evsel *evsel)
>  	reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings);
>  
>  	evlist__for_each_entry(evlist, evsel) {
> -		idx = evsel->core.idx;
> +		__u32 idx = evsel->core.idx;
> +		int i;
> +		struct perf_cpu_map cpu;
> +
>  		err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
>  		if (err) {
>  			pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
> @@ -271,13 +272,11 @@ static int bperf_cgrp__read(struct evsel *evsel)
>  			goto out;
>  		}
>  
> -		for (i = 0; i < nr_cpus; i++) {
> -			cpu = evlist->core.all_cpus->map[i].cpu;
> -
> +		perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpu) {
>  			counts = perf_counts(evsel->counts, i, 0);
> -			counts->val = values[cpu].counter;
> -			counts->ena = values[cpu].enabled;
> -			counts->run = values[cpu].running;
> +			counts->val = values[cpu.cpu].counter;
> +			counts->ena = values[cpu.cpu].enabled;
> +			counts->run = values[cpu.cpu].running;
>  		}
>  	}
>  
> -- 
> 2.36.0.rc0.470.gd361397f0d-goog

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 3/3] perf test: Add basic perf record tests
  2022-04-14  1:46 ` [PATCH v2 3/3] perf test: Add basic perf record tests Ian Rogers
@ 2022-04-14 12:10   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-04-14 12:10 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Alexey Bayduraev, Peter Zijlstra, Ingo Molnar, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, Alexey Bayduraev,
	Andi Kleen, Riccardo Mancini, linux-kernel, linux-perf-users,
	Stephane Eranian

Em Wed, Apr 13, 2022 at 06:46:42PM -0700, Ian Rogers escreveu:
> Test the --per-thread flag.
> Test Intel machine state capturing.

Thanks, applied to perf/core.

- Arnaldo

 
> Suggested-by: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/tests/shell/record.sh | 42 ++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
>  create mode 100755 tools/perf/tests/shell/record.sh
> 
> diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
> new file mode 100755
> index 000000000000..cd1cf14259b8
> --- /dev/null
> +++ b/tools/perf/tests/shell/record.sh
> @@ -0,0 +1,42 @@
> +#!/bin/sh
> +# perf record tests
> +# SPDX-License-Identifier: GPL-2.0
> +
> +set -e
> +
> +err=0
> +test_per_thread() {
> +  echo "Basic --per-thread mode test"
> +  perf record -e instructions:u --per-thread -o- true 2> /dev/null \
> +    | perf report -i- -q \
> +    | egrep -q true
> +  echo "Basic --per-thread mode test [Success]"
> +}
> +
> +test_register_capture() {
> +  echo "Register capture test"
> +  if ! perf list | egrep -q 'br_inst_retired.near_call'
> +  then
> +    echo "Register capture test [Skipped missing instruction]"
> +    return
> +  fi
> +  if ! perf record --intr-regs=\? 2>&1 | egrep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
> +  then
> +    echo "Register capture test [Skipped missing registers]"
> +    return
> +  fi
> +  if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \
> +    -c 1000 --per-thread true 2> /dev/null \
> +    | perf script -F ip,sym,iregs -i - 2> /dev/null \
> +    | egrep -q "DI:"
> +  then
> +    echo "Register capture test [Failed missing output]"
> +    err=1
> +    return
> +  fi
> +  echo "Register capture test [Success]"
> +}
> +
> +test_per_thread
> +test_register_capture
> +exit $err
> -- 
> 2.36.0.rc0.470.gd361397f0d-goog

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-04-14 12:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-14  1:46 [PATCH v2 1/3] perf record: Fix per-thread option Ian Rogers
2022-04-14  1:46 ` [PATCH v2 2/3] perf cpumap: Switch to using perf_cpu_map API Ian Rogers
2022-04-14 12:09   ` Arnaldo Carvalho de Melo
2022-04-14  1:46 ` [PATCH v2 3/3] perf test: Add basic perf record tests Ian Rogers
2022-04-14 12:10   ` Arnaldo Carvalho de Melo
2022-04-14 12:09 ` [PATCH v2 1/3] perf record: Fix per-thread option Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.