All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexey Budankov <alexey.budankov@linux.intel.com>
To: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Andi Kleen <ak@linux.intel.com>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH v7 1/2]: perf util: map data buffer for preserving collected data
Date: Wed, 5 Sep 2018 10:19:56 +0300	[thread overview]
Message-ID: <cf6cd6ca-12dd-9ee8-8e18-ee02f289c45e@linux.intel.com> (raw)
In-Reply-To: <1fc1fc5b-a8cc-2b05-d43c-692e58855c81@linux.intel.com>


The map->data buffers are used to preserve map->base profiling data 
for writing to disk. AIO map->cblocks are used to queue corresponding 
map->data buffers for asynchronous writing. map->cblocks objects are 
located in the last page of every map->data buffer.

Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
 Changes in v7:
  - implemented handling record.aio setting from perfconfig file
 Changes in v6:
  - adjusted setting of priorities for cblocks;
 Changes in v5:
  - reshaped layout of data structures;
  - implemented --aio option;
 Changes in v4:
  - converted mmap()/munmap() to malloc()/free() for mmap->data buffer management 
 Changes in v2:
  - converted zalloc() to calloc() for allocation of mmap_aio array,
  - cleared typo and adjusted fallback branch code;
---
 tools/perf/builtin-record.c | 15 ++++++++++++-
 tools/perf/perf.h           |  1 +
 tools/perf/util/evlist.c    |  7 +++---
 tools/perf/util/evlist.h    |  3 ++-
 tools/perf/util/mmap.c      | 53 +++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/mmap.h      |  6 ++++-
 6 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 22ebeb92ac51..f17a6f9cb1ba 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -326,7 +326,8 @@ static int record__mmap_evlist(struct record *rec,
 
 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 				 opts->auxtrace_mmap_pages,
-				 opts->auxtrace_snapshot_mode) < 0) {
+				 opts->auxtrace_snapshot_mode,
+				 opts->nr_cblocks) < 0) {
 		if (errno == EPERM) {
 			pr_err("Permission error mapping pages.\n"
 			       "Consider increasing "
@@ -1287,6 +1288,8 @@ static int perf_record_config(const char *var, const char *value, void *cb)
 		var = "call-graph.record-mode";
 		return perf_default_config(var, value, cb);
 	}
+	if (!strcmp(var, "record.aio"))
+		rec->opts.nr_cblocks = strtol(value, NULL, 0);
 
 	return 0;
 }
@@ -1519,6 +1522,7 @@ static struct record record = {
 			.default_per_cpu = true,
 		},
 		.proc_map_timeout     = 500,
+		.nr_cblocks	      = 2
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -1678,6 +1682,8 @@ static struct option __record_options[] = {
 			  "signal"),
 	OPT_BOOLEAN(0, "dry-run", &dry_run,
 		    "Parse options then exit"),
+	OPT_INTEGER(0, "aio", &record.opts.nr_cblocks,
+		    "asynchronous trace write operations (min: 1, max: 32, default: 2)"),
 	OPT_END()
 };
 
@@ -1870,6 +1876,13 @@ int cmd_record(int argc, const char **argv)
 		goto out;
 	}
 
+	if (!(1 <= rec->opts.nr_cblocks && rec->opts.nr_cblocks <= 32))
+		rec->opts.nr_cblocks = 2;
+
+	if (verbose > 0)
+		pr_info("AIO trace writes: %d\n", rec->opts.nr_cblocks);
+
+
 	err = __cmd_record(&record, argc, argv);
 out:
 	perf_evlist__delete(rec->evlist);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 21bf7f5a3cf5..0a1ae2ae567a 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -82,6 +82,7 @@ struct record_opts {
 	bool         use_clockid;
 	clockid_t    clockid;
 	unsigned int proc_map_timeout;
+	int	     nr_cblocks;
 };
 
 struct option;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e7a4b31a84fb..08be79650a85 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1018,7 +1018,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
  */
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 unsigned int auxtrace_pages,
-			 bool auxtrace_overwrite)
+			 bool auxtrace_overwrite,
+			 int nr_cblocks)
 {
 	struct perf_evsel *evsel;
 	const struct cpu_map *cpus = evlist->cpus;
@@ -1028,7 +1029,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 	 * Its value is decided by evsel's write_backward.
 	 * So &mp should not be passed through const pointer.
 	 */
-	struct mmap_params mp;
+	struct mmap_params mp = { .nr_cblocks = nr_cblocks };
 
 	if (!evlist->mmap)
 		evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1060,7 +1061,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
 {
-	return perf_evlist__mmap_ex(evlist, pages, 0, false);
+	return perf_evlist__mmap_ex(evlist, pages, 0, false, 2);
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dc66436add98..a94d3c613254 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -162,7 +162,8 @@ unsigned long perf_event_mlock_kb_in_pages(void);
 
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 unsigned int auxtrace_pages,
-			 bool auxtrace_overwrite);
+			 bool auxtrace_overwrite,
+			 int nr_cblocks);
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index fc832676a798..384d17cd1379 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -155,6 +155,14 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
 
 void perf_mmap__munmap(struct perf_mmap *map)
 {
+	int i;
+	if (map->data) {
+		for (i = 0; i < map->nr_cblocks; ++i)
+			zfree(&(map->data[i]));
+		zfree(&(map->data));
+	}
+	if (map->cblocks)
+		zfree(&(map->cblocks));
 	if (map->base != NULL) {
 		munmap(map->base, perf_mmap__mmap_len(map));
 		map->base = NULL;
@@ -166,6 +174,7 @@ void perf_mmap__munmap(struct perf_mmap *map)
 
 int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
 {
+	int i;
 	/*
 	 * The last one will be done at perf_mmap__consume(), so that we
 	 * make sure we don't prevent tools from consuming every last event in
@@ -190,6 +199,50 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
 		map->base = NULL;
 		return -1;
 	}
+	map->nr_cblocks = mp->nr_cblocks;
+	map->cblocks = calloc(map->nr_cblocks, sizeof(struct aiocb*));
+	if (!map->cblocks) {
+		pr_debug2("failed to allocate perf event data buffers, error %d\n",
+				errno);
+		return -1;
+	}
+	map->data = calloc(map->nr_cblocks, sizeof(void*));
+	if (map->data) {
+		int delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
+		for (i = 0; i < map->nr_cblocks; ++i) {
+			map->data[i] = malloc(perf_mmap__mmap_len(map));
+			if (map->data[i]) {
+				int prio;
+				unsigned char *data = map->data[i];
+				map->cblocks[i] = (struct aiocb *)&data[map->mask + 1];
+				memset(map->cblocks[i], 0, sizeof(struct aiocb));
+				/* Use cblock.aio_fildes value different from -1
+				 * to denote started aio write operation on the
+				 * cblock so it requires explicit record__aio_sync()
+				 * call prior the cblock may be reused again.
+				 */
+				map->cblocks[i]->aio_fildes = -1;
+				/* Allocate cblocks with decreasing priority to
+				 * have faster aio_write() calls because queued
+				 * requests are kept in separate per-prio queues
+				 * and adding a new request iterates thru shorter
+				 * per-prio list.
+				 */
+				prio = delta_max - i;
+				if (prio < 0)
+					prio = 0;
+				map->cblocks[i]->aio_reqprio = prio;
+			} else {
+				pr_debug2("failed to allocate perf event data buffer, error %d\n",
+						errno);
+				return -1;
+			}
+		}
+	} else {
+		pr_debug2("failed to alloc perf event data buffers, error %d\n",
+				errno);
+		return -1;
+	}
 	map->fd = fd;
 
 	if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index d82294db1295..4a9bb0ecae4f 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <asm/barrier.h>
 #include <stdbool.h>
+#include <aio.h>
 #include "auxtrace.h"
 #include "event.h"
 
@@ -25,6 +26,9 @@ struct perf_mmap {
 	bool		 overwrite;
 	struct auxtrace_mmap auxtrace_mmap;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+	void 		 **data;
+	struct aiocb	 **cblocks;
+	int 		 nr_cblocks;
 };
 
 /*
@@ -56,7 +60,7 @@ enum bkw_mmap_state {
 };
 
 struct mmap_params {
-	int			    prot, mask;
+	int			    prot, mask, nr_cblocks;
 	struct auxtrace_mmap_params auxtrace_mp;
 };
 


  reply	other threads:[~2018-09-05  7:20 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-05  7:16 [PATCH v7 0/2]: perf: reduce data loss when profiling highly parallel CPU bound workloads Alexey Budankov
2018-09-05  7:19 ` Alexey Budankov [this message]
2018-09-06 11:04   ` [PATCH v7 1/2]: perf util: map data buffer for preserving collected data Jiri Olsa
2018-09-06 11:50     ` Alexey Budankov
2018-09-06 11:04   ` Jiri Olsa
2018-09-06 11:54     ` Alexey Budankov
2018-09-05  7:39 ` [PATCH v7 2/2]: perf record: enable asynchronous trace writing Alexey Budankov
2018-09-06 11:04   ` Jiri Olsa
2018-09-06 11:57     ` Alexey Budankov
2018-09-06 11:04   ` Jiri Olsa
2018-09-06 11:58     ` Alexey Budankov
2018-09-06 11:04   ` Jiri Olsa
2018-09-06 11:59     ` Alexey Budankov
2018-09-06 11:04   ` Jiri Olsa
2018-09-06 12:09     ` Alexey Budankov
2018-09-05 11:28 ` [PATCH v7 0/2]: perf: reduce data loss when profiling highly parallel CPU bound workloads Jiri Olsa
2018-09-05 17:37   ` Alexey Budankov
2018-09-05 18:51     ` Arnaldo Carvalho de Melo
2018-09-06  6:03       ` Alexey Budankov
2018-09-06  8:14         ` Jiri Olsa
2018-09-06  8:20           ` Alexey Budankov
2018-09-06  6:59       ` Alexey Budankov
2018-09-06  6:57   ` Alexey Budankov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cf6cd6ca-12dd-9ee8-8e18-ee02f289c45e@linux.intel.com \
    --to=alexey.budankov@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.