All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] perf/record: add num-synthesize-threads option
@ 2020-04-22 15:50 Ian Rogers
  2020-04-23 12:09 ` Jiri Olsa
  2020-05-08 13:05 ` [tip: perf/core] perf record: Add " tip-bot2 for Stephane Eranian
  0 siblings, 2 replies; 4+ messages in thread
From: Ian Rogers @ 2020-04-22 15:50 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Kan Liang, Adrian Hunter, Alexey Budankov, yuzhoujian,
	Tony Jones, linux-kernel, linux-perf-users
  Cc: Stephane Eranian, Ian Rogers

From: Stephane Eranian <eranian@google.com>

To control degree of parallelism of the synthesize_mmap() code which
is scanning /proc/PID/task/PID/maps and can be time consuming.
Mimic perf top way of handling the option.
If not specified will default to 1 thread, i.e. default behavior before
this option.

On a desktop computer the processing of /proc/PID/task/PID/maps isn't
slow enough to warrant parallel processing and the thread creation has
some cost - hence the default of 1. On a loaded server with
>100 cores it is possible to see synthesis times in the order of
seconds and in this case having the option is desirable.

As the processing is a synchronization point, it is legitimate to worry if
Amdahl's law will apply to this patch. Profiling with this patch in
place:
https://lore.kernel.org/lkml/20200415054050.31645-4-irogers@google.com/
shows:
...
      - 32.59% __perf_event__synthesize_threads
         - 32.54% __event__synthesize_thread
            + 22.13% perf_event__synthesize_mmap_events
            + 6.68% perf_event__get_comm_ids.constprop.0
            + 1.49% process_synthesized_event
            + 1.29% __GI___readdir64
            + 0.60% __opendir
...
That is the processing is 1.49% of execution time and there is plenty to
make parallel. This is shown in the benchmark in this patch:
https://lore.kernel.org/lkml/20200415054050.31645-2-irogers@google.com/
Computing performance of multi threaded perf event synthesis by
synthesizing events on CPU 0:
 Number of synthesis threads: 1
   Average synthesis took: 127729.000 usec (+- 3372.880 usec)
   Average num. events: 21548.600 (+- 0.306)
   Average time per event 5.927 usec
 Number of synthesis threads: 2
   Average synthesis took: 88863.500 usec (+- 385.168 usec)
   Average num. events: 21552.800 (+- 0.327)
   Average time per event 4.123 usec
 Number of synthesis threads: 3
   Average synthesis took: 83257.400 usec (+- 348.617 usec)
   Average num. events: 21553.200 (+- 0.327)
   Average time per event 3.863 usec
 Number of synthesis threads: 4
   Average synthesis took: 75093.000 usec (+- 422.978 usec)
   Average num. events: 21554.200 (+- 0.200)
   Average time per event 3.484 usec
 Number of synthesis threads: 5
   Average synthesis took: 64896.600 usec (+- 353.348 usec)
   Average num. events: 21558.000 (+- 0.000)
   Average time per event 3.010 usec
 Number of synthesis threads: 6
   Average synthesis took: 59210.200 usec (+- 342.890 usec)
   Average num. events: 21560.000 (+- 0.000)
   Average time per event 2.746 usec
 Number of synthesis threads: 7
   Average synthesis took: 54093.900 usec (+- 306.247 usec)
   Average num. events: 21562.000 (+- 0.000)
   Average time per event 2.509 usec
 Number of synthesis threads: 8
   Average synthesis took: 48938.700 usec (+- 341.732 usec)
   Average num. events: 21564.000 (+- 0.000)
   Average time per event 2.269 usec

Where average time per synthesized event goes from 5.927 usec with 1
thread to 2.269 usec with 8. This isn't a linear speed up as not all of
synthesize code has been made parallel. If the synthesis time was about
10 seconds then using 8 threads may bring this down to less than 4.

Signed-off-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 tools/perf/Documentation/perf-record.txt |  4 +++
 tools/perf/builtin-record.c              | 34 ++++++++++++++++++++++--
 tools/perf/util/record.h                 |  1 +
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b3f3b3f1c161..6e8b4649307c 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -596,6 +596,10 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file
 Limit the sample data max size, <size> is expected to be a number with
 appended unit character - B/K/M/G
 
+--num-thread-synthesize::
+	The number of threads to run when synthesizing events for existing processes.
+	By default, the number of threads equals 1.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1ab349abe904..2e8011f179f2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -43,6 +43,7 @@
 #include "util/time-utils.h"
 #include "util/units.h"
 #include "util/bpf-event.h"
+#include "util/util.h"
 #include "asm/bug.h"
 #include "perf.h"
 
@@ -50,6 +51,7 @@
 #include <inttypes.h>
 #include <locale.h>
 #include <poll.h>
+#include <pthread.h>
 #include <unistd.h>
 #include <sched.h>
 #include <signal.h>
@@ -503,6 +505,20 @@ static int process_synthesized_event(struct perf_tool *tool,
 	return record__write(rec, NULL, event, event->header.size);
 }
 
+static int process_locked_synthesized_event(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
+	int ret;
+
+	pthread_mutex_lock(&synth_lock);
+	ret = process_synthesized_event(tool, event, sample, machine);
+	pthread_mutex_unlock(&synth_lock);
+	return ret;
+}
+
 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 {
 	struct record *rec = to;
@@ -1288,6 +1304,7 @@ static int record__synthesize(struct record *rec, bool tail)
 	struct perf_tool *tool = &rec->tool;
 	int fd = perf_data__fd(data);
 	int err = 0;
+	event_op f = process_synthesized_event;
 
 	if (rec->opts.tail_synthesize != tail)
 		return 0;
@@ -1402,9 +1419,18 @@ static int record__synthesize(struct record *rec, bool tail)
 	if (err < 0)
 		pr_warning("Couldn't synthesize cgroup events.\n");
 
+	if (rec->opts.nr_threads_synthesize > 1) {
+		perf_set_multithreaded();
+		f = process_locked_synthesized_event;
+	}
+
 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
-					    process_synthesized_event, opts->sample_address,
-					    1);
+					    f, opts->sample_address,
+					    rec->opts.nr_threads_synthesize);
+
+	if (rec->opts.nr_threads_synthesize > 1)
+		perf_set_singlethreaded();
+
 out:
 	return err;
 }
@@ -2232,6 +2258,7 @@ static struct record record = {
 			.default_per_cpu = true,
 		},
 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
+		.nr_threads_synthesize = 1,
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -2421,6 +2448,9 @@ static struct option __record_options[] = {
 #endif
 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
+	OPT_UINTEGER(0, "num-thread-synthesize",
+		     &record.opts.nr_threads_synthesize,
+		     "number of threads to run for event synthesis"),
 	OPT_END()
 };
 
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 24316458be20..923565c3b155 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -68,6 +68,7 @@ struct record_opts {
 	int	      affinity;
 	int	      mmap_flush;
 	unsigned int  comp_level;
+	unsigned int  nr_threads_synthesize;
 };
 
 extern const char * const *record_usage;
-- 
2.26.2.303.gf8c07b1a785-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] perf/record: add num-synthesize-threads option
  2020-04-22 15:50 [PATCH v3] perf/record: add num-synthesize-threads option Ian Rogers
@ 2020-04-23 12:09 ` Jiri Olsa
  2020-04-23 14:11   ` Arnaldo Carvalho de Melo
  2020-05-08 13:05 ` [tip: perf/core] perf record: Add " tip-bot2 for Stephane Eranian
  1 sibling, 1 reply; 4+ messages in thread
From: Jiri Olsa @ 2020-04-23 12:09 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Namhyung Kim, Kan Liang,
	Adrian Hunter, Alexey Budankov, yuzhoujian, Tony Jones,
	linux-kernel, linux-perf-users, Stephane Eranian

On Wed, Apr 22, 2020 at 08:50:38AM -0700, Ian Rogers wrote:
> From: Stephane Eranian <eranian@google.com>
> 

SNIP

> That is the processing is 1.49% of execution time and there is plenty to
> make parallel. This is shown in the benchmark in this patch:
> https://lore.kernel.org/lkml/20200415054050.31645-2-irogers@google.com/
> Computing performance of multi threaded perf event synthesis by
> synthesizing events on CPU 0:
>  Number of synthesis threads: 1
>    Average synthesis took: 127729.000 usec (+- 3372.880 usec)
>    Average num. events: 21548.600 (+- 0.306)
>    Average time per event 5.927 usec
>  Number of synthesis threads: 2
>    Average synthesis took: 88863.500 usec (+- 385.168 usec)
>    Average num. events: 21552.800 (+- 0.327)
>    Average time per event 4.123 usec
>  Number of synthesis threads: 3
>    Average synthesis took: 83257.400 usec (+- 348.617 usec)
>    Average num. events: 21553.200 (+- 0.327)
>    Average time per event 3.863 usec
>  Number of synthesis threads: 4
>    Average synthesis took: 75093.000 usec (+- 422.978 usec)
>    Average num. events: 21554.200 (+- 0.200)
>    Average time per event 3.484 usec
>  Number of synthesis threads: 5
>    Average synthesis took: 64896.600 usec (+- 353.348 usec)
>    Average num. events: 21558.000 (+- 0.000)
>    Average time per event 3.010 usec
>  Number of synthesis threads: 6
>    Average synthesis took: 59210.200 usec (+- 342.890 usec)
>    Average num. events: 21560.000 (+- 0.000)
>    Average time per event 2.746 usec
>  Number of synthesis threads: 7
>    Average synthesis took: 54093.900 usec (+- 306.247 usec)
>    Average num. events: 21562.000 (+- 0.000)
>    Average time per event 2.509 usec
>  Number of synthesis threads: 8
>    Average synthesis took: 48938.700 usec (+- 341.732 usec)
>    Average num. events: 21564.000 (+- 0.000)
>    Average time per event 2.269 usec
> 
> Where average time per synthesized event goes from 5.927 usec with 1
> thread to 2.269 usec with 8. This isn't a linear speed up as not all of
> synthesize code has been made parallel. If the synthesis time was about
> 10 seconds then using 8 threads may bring this down to less than 4.

Acked-by: Jiri Olsa <jolsa@redhat.com>

thanks,
jirka


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] perf/record: add num-synthesize-threads option
  2020-04-23 12:09 ` Jiri Olsa
@ 2020-04-23 14:11   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 4+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-04-23 14:11 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Ian Rogers, Peter Zijlstra, Ingo Molnar, Mark Rutland,
	Alexander Shishkin, Namhyung Kim, Kan Liang, Adrian Hunter,
	Alexey Budankov, yuzhoujian, Tony Jones, linux-kernel,
	linux-perf-users, Stephane Eranian

Em Thu, Apr 23, 2020 at 02:09:57PM +0200, Jiri Olsa escreveu:
> On Wed, Apr 22, 2020 at 08:50:38AM -0700, Ian Rogers wrote:
> > From: Stephane Eranian <eranian@google.com>
> > 
> 
> SNIP
> 
> > That is the processing is 1.49% of execution time and there is plenty to
> > make parallel. This is shown in the benchmark in this patch:
> > https://lore.kernel.org/lkml/20200415054050.31645-2-irogers@google.com/
> > Computing performance of multi threaded perf event synthesis by
> > synthesizing events on CPU 0:
> >  Number of synthesis threads: 1
> >    Average synthesis took: 127729.000 usec (+- 3372.880 usec)
> >    Average num. events: 21548.600 (+- 0.306)
> >    Average time per event 5.927 usec
> >  Number of synthesis threads: 2
> >    Average synthesis took: 88863.500 usec (+- 385.168 usec)
> >    Average num. events: 21552.800 (+- 0.327)
> >    Average time per event 4.123 usec
> >  Number of synthesis threads: 3
> >    Average synthesis took: 83257.400 usec (+- 348.617 usec)
> >    Average num. events: 21553.200 (+- 0.327)
> >    Average time per event 3.863 usec
> >  Number of synthesis threads: 4
> >    Average synthesis took: 75093.000 usec (+- 422.978 usec)
> >    Average num. events: 21554.200 (+- 0.200)
> >    Average time per event 3.484 usec
> >  Number of synthesis threads: 5
> >    Average synthesis took: 64896.600 usec (+- 353.348 usec)
> >    Average num. events: 21558.000 (+- 0.000)
> >    Average time per event 3.010 usec
> >  Number of synthesis threads: 6
> >    Average synthesis took: 59210.200 usec (+- 342.890 usec)
> >    Average num. events: 21560.000 (+- 0.000)
> >    Average time per event 2.746 usec
> >  Number of synthesis threads: 7
> >    Average synthesis took: 54093.900 usec (+- 306.247 usec)
> >    Average num. events: 21562.000 (+- 0.000)
> >    Average time per event 2.509 usec
> >  Number of synthesis threads: 8
> >    Average synthesis took: 48938.700 usec (+- 341.732 usec)
> >    Average num. events: 21564.000 (+- 0.000)
> >    Average time per event 2.269 usec
> > 
> > Where average time per synthesized event goes from 5.927 usec with 1
> > thread to 2.269 usec with 8. This isn't a linear speed up as not all of
> > synthesize code has been made parallel. If the synthesis time was about
> > 10 seconds then using 8 threads may bring this down to less than 4.
> 
> Acked-by: Jiri Olsa <jolsa@redhat.com>

Thanks, applied.

- Arnaldo

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tip: perf/core] perf record: Add num-synthesize-threads option
  2020-04-22 15:50 [PATCH v3] perf/record: add num-synthesize-threads option Ian Rogers
  2020-04-23 12:09 ` Jiri Olsa
@ 2020-05-08 13:05 ` tip-bot2 for Stephane Eranian
  1 sibling, 0 replies; 4+ messages in thread
From: tip-bot2 for Stephane Eranian @ 2020-05-08 13:05 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Stephane Eranian, Ian Rogers, Jiri Olsa, Adrian Hunter,
	Alexander Shishkin, Alexey Budankov, Kan Liang, Mark Rutland,
	Namhyung Kim, Peter Zijlstra, Tony Jones, yuzhoujian,
	Arnaldo Carvalho de Melo, x86, LKML

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     d99c22eabee45f40ca44b877a1adde028f14b6b4
Gitweb:        https://git.kernel.org/tip/d99c22eabee45f40ca44b877a1adde028f14b6b4
Author:        Stephane Eranian <eranian@google.com>
AuthorDate:    Wed, 22 Apr 2020 08:50:38 -07:00
Committer:     Arnaldo Carvalho de Melo <acme@redhat.com>
CommitterDate: Thu, 23 Apr 2020 11:10:41 -03:00

perf record: Add num-synthesize-threads option

To control degree of parallelism of the synthesize_mmap() code which
is scanning /proc/PID/task/PID/maps and can be time consuming.
Mimic perf top way of handling the option.
If not specified will default to 1 thread, i.e. default behavior before
this option.

On a desktop computer the processing of /proc/PID/task/PID/maps isn't
slow enough to warrant parallel processing and the thread creation has
some cost - hence the default of 1. On a loaded server with
>100 cores it is possible to see synthesis times in the order of
seconds and in this case having the option is desirable.

As the processing is a synchronization point, it is legitimate to worry if
Amdahl's law will apply to this patch. Profiling with this patch in
place:
https://lore.kernel.org/lkml/20200415054050.31645-4-irogers@google.com/
shows:
...
      - 32.59% __perf_event__synthesize_threads
         - 32.54% __event__synthesize_thread
            + 22.13% perf_event__synthesize_mmap_events
            + 6.68% perf_event__get_comm_ids.constprop.0
            + 1.49% process_synthesized_event
            + 1.29% __GI___readdir64
            + 0.60% __opendir
...
That is the processing is 1.49% of execution time and there is plenty to
make parallel. This is shown in the benchmark in this patch:

https://lore.kernel.org/lkml/20200415054050.31645-2-irogers@google.com/

  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
   Number of synthesis threads: 1
     Average synthesis took: 127729.000 usec (+- 3372.880 usec)
     Average num. events: 21548.600 (+- 0.306)
     Average time per event 5.927 usec
   Number of synthesis threads: 2
     Average synthesis took: 88863.500 usec (+- 385.168 usec)
     Average num. events: 21552.800 (+- 0.327)
     Average time per event 4.123 usec
   Number of synthesis threads: 3
     Average synthesis took: 83257.400 usec (+- 348.617 usec)
     Average num. events: 21553.200 (+- 0.327)
     Average time per event 3.863 usec
   Number of synthesis threads: 4
     Average synthesis took: 75093.000 usec (+- 422.978 usec)
     Average num. events: 21554.200 (+- 0.200)
     Average time per event 3.484 usec
   Number of synthesis threads: 5
     Average synthesis took: 64896.600 usec (+- 353.348 usec)
     Average num. events: 21558.000 (+- 0.000)
     Average time per event 3.010 usec
   Number of synthesis threads: 6
     Average synthesis took: 59210.200 usec (+- 342.890 usec)
     Average num. events: 21560.000 (+- 0.000)
     Average time per event 2.746 usec
   Number of synthesis threads: 7
     Average synthesis took: 54093.900 usec (+- 306.247 usec)
     Average num. events: 21562.000 (+- 0.000)
     Average time per event 2.509 usec
   Number of synthesis threads: 8
     Average synthesis took: 48938.700 usec (+- 341.732 usec)
     Average num. events: 21564.000 (+- 0.000)
     Average time per event 2.269 usec

Where average time per synthesized event goes from 5.927 usec with 1
thread to 2.269 usec with 8. This isn't a linear speed up as not all of
synthesize code has been made parallel. If the synthesis time was about
10 seconds then using 8 threads may bring this down to less than 4.

Signed-off-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tony Jones <tonyj@suse.de>
Cc: yuzhoujian <yuzhoujian@didichuxing.com>
Link: http://lore.kernel.org/lkml/20200422155038.9380-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  4 +++-
 tools/perf/builtin-record.c              | 34 +++++++++++++++++++++--
 tools/perf/util/record.h                 |  1 +-
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b3f3b3f..6e8b464 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -596,6 +596,10 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file
 Limit the sample data max size, <size> is expected to be a number with
 appended unit character - B/K/M/G
 
+--num-thread-synthesize::
+	The number of threads to run when synthesizing events for existing processes.
+	By default, the number of threads equals 1.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1ab349a..2e8011f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -43,6 +43,7 @@
 #include "util/time-utils.h"
 #include "util/units.h"
 #include "util/bpf-event.h"
+#include "util/util.h"
 #include "asm/bug.h"
 #include "perf.h"
 
@@ -50,6 +51,7 @@
 #include <inttypes.h>
 #include <locale.h>
 #include <poll.h>
+#include <pthread.h>
 #include <unistd.h>
 #include <sched.h>
 #include <signal.h>
@@ -503,6 +505,20 @@ static int process_synthesized_event(struct perf_tool *tool,
 	return record__write(rec, NULL, event, event->header.size);
 }
 
+static int process_locked_synthesized_event(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
+	int ret;
+
+	pthread_mutex_lock(&synth_lock);
+	ret = process_synthesized_event(tool, event, sample, machine);
+	pthread_mutex_unlock(&synth_lock);
+	return ret;
+}
+
 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 {
 	struct record *rec = to;
@@ -1288,6 +1304,7 @@ static int record__synthesize(struct record *rec, bool tail)
 	struct perf_tool *tool = &rec->tool;
 	int fd = perf_data__fd(data);
 	int err = 0;
+	event_op f = process_synthesized_event;
 
 	if (rec->opts.tail_synthesize != tail)
 		return 0;
@@ -1402,9 +1419,18 @@ static int record__synthesize(struct record *rec, bool tail)
 	if (err < 0)
 		pr_warning("Couldn't synthesize cgroup events.\n");
 
+	if (rec->opts.nr_threads_synthesize > 1) {
+		perf_set_multithreaded();
+		f = process_locked_synthesized_event;
+	}
+
 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
-					    process_synthesized_event, opts->sample_address,
-					    1);
+					    f, opts->sample_address,
+					    rec->opts.nr_threads_synthesize);
+
+	if (rec->opts.nr_threads_synthesize > 1)
+		perf_set_singlethreaded();
+
 out:
 	return err;
 }
@@ -2232,6 +2258,7 @@ static struct record record = {
 			.default_per_cpu = true,
 		},
 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
+		.nr_threads_synthesize = 1,
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -2421,6 +2448,9 @@ static struct option __record_options[] = {
 #endif
 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
+	OPT_UINTEGER(0, "num-thread-synthesize",
+		     &record.opts.nr_threads_synthesize,
+		     "number of threads to run for event synthesis"),
 	OPT_END()
 };
 
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 2431645..923565c 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -68,6 +68,7 @@ struct record_opts {
 	int	      affinity;
 	int	      mmap_flush;
 	unsigned int  comp_level;
+	unsigned int  nr_threads_synthesize;
 };
 
 extern const char * const *record_usage;

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-05-08 13:06 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-22 15:50 [PATCH v3] perf/record: add num-synthesize-threads option Ian Rogers
2020-04-23 12:09 ` Jiri Olsa
2020-04-23 14:11   ` Arnaldo Carvalho de Melo
2020-05-08 13:05 ` [tip: perf/core] perf record: Add " tip-bot2 for Stephane Eranian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.