All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Jiri Olsa <jolsa@redhat.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	Andi Kleen <ak@linux.intel.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>,
	Alexander Antonov <alexander.antonov@linux.intel.com>
Subject: Re: [PATCH v3 08/12] perf record: introduce --threads=<spec> command line option
Date: Fri, 20 Nov 2020 20:09:16 +0900	[thread overview]
Message-ID: <20201120110916.GG94830@google.com> (raw)
In-Reply-To: <e9a74675-7323-14f4-ce60-32fa1e019111@linux.intel.com>

On Mon, Nov 16, 2020 at 03:20:26PM +0300, Alexey Budankov wrote:
> 
> Provide --threads option in perf record command line interface.
> The option can have a value in the form of masks that specify
> cpus to be monitored with data streaming threads and its layout
> in system topology. The masks can be filtered using cpu mask
> provided via -C option.
> 
> The specification value can be user defined list of masks. Masks
> separated by colon define cpus to be monitored by one thread and
> affinity mask of that thread is separated by slash. For example:
> <cpus mask 1>/<affinity mask 1>:<cpu mask 2>/<affinity mask 2>
> specifies parallel threads layout that consists of two threads
> with corresponding assigned cpus to be monitored.
> 
> The specification value can be a string e.g. "cpu", "core" or
> "socket" meaning creation of data streaming thread for every
> cpu or core or socket to monitor distinct cpus or cpus grouped
> by core or socket.
> 
> The option provided with no or empty value defaults to per-cpu
> parallel threads layout creating data streaming thread for every
> cpu being monitored.
> 
> Feature design and implementation are based on prototypes [1], [2].
> 
> [1] git clone https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git -b perf/record_threads
> [2] https://lore.kernel.org/lkml/20180913125450.21342-1-jolsa@kernel.org/
> 
> Suggested-by: Jiri Olsa <jolsa@kernel.org>
> Suggested-by: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> ---
[SNIP]
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index f5e5175da6a1..fd0587d636b2 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -3097,6 +3173,17 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
>  		set_bit(cpus->map[c], mask->bits);
>  }
>  
> +static void record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, char *mask_spec)
> +{
> +	struct perf_cpu_map *cpus;
> +
> +	cpus = perf_cpu_map__new(mask_spec);
> +	if (cpus) {
> +		record__mmap_cpu_mask_init(mask, cpus);
> +		free(cpus);

Would be better to use perf_cpu_map__put().


> +	}
> +}
> +
>  static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
>  {
>  	int t, ret;
> @@ -3116,6 +3203,196 @@ static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr
>  
>  	return 0;
>  }
> +
> +static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
> +{
> +	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
> +
> +	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu());
> +	if (ret)
> +		return ret;
> +
> +	rec->nr_threads = nr_cpus;
> +	pr_debug("threads: nr_threads=%d\n", rec->nr_threads);
> +
> +	for (t = 0; t < rec->nr_threads; t++) {
> +		set_bit(cpus->map[t], rec->thread_masks[t].maps.bits);
> +		pr_debug("thread_masks[%d]: maps mask [%d]\n", t, cpus->map[t]);
> +		set_bit(cpus->map[t], rec->thread_masks[t].affinity.bits);
> +		pr_debug("thread_masks[%d]: affinity mask [%d]\n", t, cpus->map[t]);
> +	}
> +
> +	return 0;
> +}
> +
> +static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
> +					  char **maps_spec, char **affinity_spec, u32 nr_spec)
> +{
> +	u32 s;
> +	int ret, nr_threads = 0;
> +	struct mmap_cpu_mask cpus_mask;
> +	struct thread_mask thread_mask, full_mask;
> +
> +	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu());
> +	if (ret)
> +		return ret;
> +	record__mmap_cpu_mask_init(&cpus_mask, cpus);
> +	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu());
> +	if (ret)
> +		return ret;
> +	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu());
> +	if (ret)
> +		return ret;
> +	record__thread_mask_clear(&full_mask);
> +
> +	for (s = 0; s < nr_spec; s++) {
> +		record__thread_mask_clear(&thread_mask);
> +
> +		record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
> +		record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
> +
> +		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
> +				cpus_mask.bits, thread_mask.maps.nbits) ||
> +		    !bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
> +				cpus_mask.bits, thread_mask.affinity.nbits))
> +			continue;
> +
> +		ret = record__thread_mask_intersects(&thread_mask, &full_mask);
> +		if (ret)
> +			return ret;
> +		record__thread_mask_or(&full_mask, &full_mask, &thread_mask);
> +
> +		rec->thread_masks = realloc(rec->thread_masks,
> +					    (nr_threads + 1) * sizeof(struct thread_mask));
> +		if (!rec->thread_masks) {
> +			pr_err("Failed to allocate thread masks\n");
> +			return -ENOMEM;

It'll leak previous rec->thread_masks as well as cpu/thread/full masks.


> +		}
> +		rec->thread_masks[nr_threads] = thread_mask;
> +		pr_debug("thread_masks[%d]: addr=", nr_threads);
> +		mmap_cpu_mask__scnprintf(&rec->thread_masks[nr_threads].maps, "maps");
> +		pr_debug("thread_masks[%d]: addr=", nr_threads);
> +		mmap_cpu_mask__scnprintf(&rec->thread_masks[nr_threads].affinity, "affinity");
> +		nr_threads++;
> +		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu());
> +		if (ret)
> +			return ret;
> +	}
> +
> +	rec->nr_threads = nr_threads;
> +	pr_debug("threads: nr_threads=%d\n", rec->nr_threads);
> +
> +	record__mmap_cpu_mask_free(&cpus_mask);
> +	record__thread_mask_free(&thread_mask);
> +	record__thread_mask_free(&full_mask);
> +
> +	return 0;
> +}
> +
> +static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
> +{
> +	int ret;
> +	struct cpu_topology *topo;
> +
> +	topo = cpu_topology__new();
> +	if (!topo)
> +		return -EINVAL;
> +
> +	ret = record__init_thread_masks_spec(rec, cpus, topo->thread_siblings,
> +					     topo->thread_siblings, topo->thread_sib);
> +	cpu_topology__delete(topo);
> +
> +	return ret;
> +}
> +
> +static int record__init_thread_socket_masks(struct record *rec, struct perf_cpu_map *cpus)
> +{
> +	int ret;
> +	struct cpu_topology *topo;
> +
> +	topo = cpu_topology__new();
> +	if (!topo)
> +		return -EINVAL;
> +
> +	ret = record__init_thread_masks_spec(rec, cpus, topo->core_siblings,
> +					     topo->core_siblings, topo->core_sib);
> +	cpu_topology__delete(topo);
> +
> +	return ret;
> +}
> +
> +static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
> +{
> +	u32 s;
> +	int ret;
> +	char **spec;
> +	struct numa_topology *topo;
> +
> +	topo = numa_topology__new();
> +	if (!topo)
> +		return -EINVAL;
> +	spec = zalloc(topo->nr * sizeof(char *));
> +	if (!spec)
> +		return -ENOMEM;

Will leak topo.


> +	for (s = 0; s < topo->nr; s++)
> +		spec[s] = topo->nodes[s].cpus;
> +
> +	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
> +
> +	zfree(&spec);
> +
> +	numa_topology__delete(topo);
> +
> +	return ret;
> +}
> +
> +static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
> +{
> +	int t, ret;
> +	u32 s, nr_spec = 0;
> +	char **maps_spec = NULL, **affinity_spec = NULL;
> +	char *spec, *spec_ptr, *user_spec, *mask, *mask_ptr;
> +
> +	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ;t++, user_spec = NULL) {
> +		spec = strtok_r(user_spec, ":", &spec_ptr);
> +		if (spec == NULL)
> +			break;
> +		pr_debug(" spec[%d]: %s\n", t, spec);
> +		mask = strtok_r(spec, "/", &mask_ptr);
> +		if (mask == NULL)
> +			break;
> +		pr_debug("  maps mask: %s\n", mask);
> +		maps_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
> +		if (!maps_spec) {
> +			pr_err("Failed to realloc maps_spec\n");
> +			return -ENOMEM;

Likewise, you can use a temp variable and bail out to free all
existing specs.

> +		}
> +		maps_spec[nr_spec] = strdup(mask);
> +		mask = strtok_r(NULL, "/", &mask_ptr);
> +		if (mask == NULL)
> +			break;
> +		pr_debug("  affinity mask: %s\n", mask);
> +		affinity_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
> +		if (!maps_spec) {
> +			pr_err("Failed to realloc affinity_spec\n");
> +			return -ENOMEM;

Ditto.

Thanks,
Namhyung


> +		}
> +		affinity_spec[nr_spec] = strdup(mask);
> +		nr_spec++;
> +	}
> +
> +	ret = record__init_thread_masks_spec(rec, cpus, maps_spec, affinity_spec, nr_spec);
> +
> +	for (s = 0; s < nr_spec; s++) {
> +		free(maps_spec[s]);
> +		free(affinity_spec[s]);
> +	}
> +	free(affinity_spec);
> +	free(maps_spec);
> +
> +	return ret;
> +}
> +
>  static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
>  {
>  	int ret;
> @@ -3133,9 +3410,33 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu
>  
>  static int record__init_thread_masks(struct record *rec)
>  {
> +	int ret = 0;
>  	struct perf_cpu_map *cpus = rec->evlist->core.cpus;
>  
> -	return record__init_thread_default_masks(rec, cpus);
> +	if (!record__threads_enabled(rec))
> +		return record__init_thread_default_masks(rec, cpus);
> +
> +	switch (rec->opts.threads_spec) {
> +	case THREAD_SPEC__CPU:
> +		ret = record__init_thread_cpu_masks(rec, cpus);
> +		break;
> +	case THREAD_SPEC__CORE:
> +		ret = record__init_thread_core_masks(rec, cpus);
> +		break;
> +	case THREAD_SPEC__SOCKET:
> +		ret = record__init_thread_socket_masks(rec, cpus);
> +		break;
> +	case THREAD_SPEC__NUMA:
> +		ret = record__init_thread_numa_masks(rec, cpus);
> +		break;
> +	case THREAD_SPEC__USER:
> +		ret = record__init_thread_user_masks(rec, cpus);
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	return ret;
>  }
>  
>  static int record__fini_thread_masks(struct record *rec)
> @@ -3361,7 +3662,10 @@ int cmd_record(int argc, const char **argv)
>  
>  	err = record__init_thread_masks(rec);
>  	if (err) {
> -		pr_err("record__init_thread_masks failed, error %d\n", err);
> +		if (err > 0)
> +			pr_err("ERROR: parallel data streaming masks (--threads) intersect.\n");
> +		else
> +			pr_err("record__init_thread_masks failed, error %d\n", err);
>  		goto out;
>  	}
>  
> diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
> index 9c13a39cc58f..7f64ff5da2b2 100644
> --- a/tools/perf/util/record.h
> +++ b/tools/perf/util/record.h
> @@ -75,6 +75,7 @@ struct record_opts {
>  	int	      ctl_fd_ack;
>  	bool	      ctl_fd_close;
>  	int	      threads_spec;
> +	const char    *threads_user_spec;
>  };
>  
>  extern const char * const *record_usage;
> -- 
> 2.24.1
> 
> 

  reply	other threads:[~2020-11-20 11:09 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-16 12:12 [PATCH v3 00/12] Introduce threaded trace streaming for basic perf record operation Alexey Budankov
2020-11-16 12:14 ` [PATCH v3 01/12] perf record: introduce thread affinity and mmap masks Alexey Budankov
2020-11-20 10:01   ` Namhyung Kim
2020-11-16 12:15 ` [PATCH v3 02/12] perf record: introduce thread specific data array Alexey Budankov
2020-11-20 10:14   ` Namhyung Kim
2020-11-16 12:16 ` [PATCH v3 03/12] perf record: introduce thread local variable Alexey Budankov
2020-11-20 10:20   ` Namhyung Kim
2020-11-16 12:17 ` [PATCH v3 04/12] perf record: stop threads in the end of trace streaming Alexey Budankov
2020-11-16 12:18 ` [PATCH v3 05/12] perf record: start threads in the beginning " Alexey Budankov
2020-11-16 12:18 ` [PATCH v3 06/12] perf record: introduce data file at mmap buffer object Alexey Budankov
2020-11-20 10:28   ` Namhyung Kim
2020-11-16 12:19 ` [PATCH v3 07/12] perf record: init " Alexey Budankov
2020-11-20 10:49   ` Namhyung Kim
2021-03-01 11:16     ` Bayduraev, Alexey V
2021-03-01 11:44       ` Namhyung Kim
2021-03-01 13:33         ` Bayduraev, Alexey V
2021-03-01 14:20           ` Namhyung Kim
2020-11-16 12:20 ` [PATCH v3 08/12] perf record: introduce --threads=<spec> command line option Alexey Budankov
2020-11-20 11:09   ` Namhyung Kim [this message]
2020-11-16 12:21 ` [PATCH v3 09/12] perf record: document parallel data streaming mode Alexey Budankov
2020-11-16 12:22 ` [PATCH v3 10/12] perf report: output data file name in raw trace dump Alexey Budankov
2020-11-16 12:22 ` [PATCH v3 11/12] perf session: load data directory files for analysis Alexey Budankov
2020-11-16 12:25 ` [PATCH v3 12/12] perf session: use reader functions to load perf data file Alexey Budankov
2020-11-20  9:45 ` [PATCH v3 00/12] Introduce threaded trace streaming for basic perf record operation Namhyung Kim
2020-12-15 15:05   ` Alexei Budankov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201120110916.GG94830@google.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.antonov@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=alexey.budankov@linux.intel.com \
    --cc=alexey.v.bayduraev@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.