linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Alexey Budankov <alexey.budankov@linux.intel.com>
To: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Andi Kleen <ak@linux.intel.com>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH v6 1/2]: perf util: map data buffer for preserving collected data
Date: Tue, 4 Sep 2018 14:56:50 +0300	[thread overview]
Message-ID: <2325f44b-d9bf-d8da-2fdb-8b8e87a255fc@linux.intel.com> (raw)
In-Reply-To: <01963930-68b3-ff33-6e05-acaff3efc083@linux.intel.com>


The map->data buffers are used to preserve map->base profiling data 
for writing to disk. AIO map->cblocks are used to queue corresponding 
map->data buffers for asynchronous writing. map->cblocks objects are 
located in the last page of every map->data buffer.

Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
 Changes in v6:
  - adjusted setting of priorities for cblocks;
 Changes in v5:
  - reshaped layout of data structures;
  - implemented --aio option;
 Changes in v4:
  - converted mmap()/munmap() to malloc()/free() for mmap->data buffer management 
 Changes in v2:
  - converted zalloc() to calloc() for allocation of mmap_aio array,
  - cleared typo and adjusted fallback branch code;
--- 
 tools/perf/builtin-record.c | 12 +++++++++-
 tools/perf/perf.h           |  1 +
 tools/perf/util/evlist.c    |  7 +++---
 tools/perf/util/evlist.h    |  3 ++-
 tools/perf/util/mmap.c      | 53 +++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/mmap.h      |  6 ++++-
 6 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 22ebeb92ac51..8eb093f8d4d5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -326,7 +326,8 @@ static int record__mmap_evlist(struct record *rec,
 
 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 				 opts->auxtrace_mmap_pages,
-				 opts->auxtrace_snapshot_mode) < 0) {
+				 opts->auxtrace_snapshot_mode,
+				 opts->nr_cblocks) < 0) {
 		if (errno == EPERM) {
 			pr_err("Permission error mapping pages.\n"
 			       "Consider increasing "
@@ -1519,6 +1520,7 @@ static struct record record = {
 			.default_per_cpu = true,
 		},
 		.proc_map_timeout     = 500,
+		.nr_cblocks	      = 2
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -1678,6 +1680,8 @@ static struct option __record_options[] = {
 			  "signal"),
 	OPT_BOOLEAN(0, "dry-run", &dry_run,
 		    "Parse options then exit"),
+	OPT_INTEGER(0, "aio", &record.opts.nr_cblocks,
+		    "asynchronous trace write operations (min: 1, max: 32, default: 2)"),
 	OPT_END()
 };
 
@@ -1870,6 +1874,12 @@ int cmd_record(int argc, const char **argv)
 		goto out;
 	}
 
+	if (!(1 <= rec->opts.nr_cblocks && rec->opts.nr_cblocks <= 32)) {
+		if (verbose > 0)
+			pr_warning("Asynchronous trace writes reset to default.\n");
+		rec->opts.nr_cblocks = 2;
+	}
+
 	err = __cmd_record(&record, argc, argv);
 out:
 	perf_evlist__delete(rec->evlist);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 21bf7f5a3cf5..0a1ae2ae567a 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -82,6 +82,7 @@ struct record_opts {
 	bool         use_clockid;
 	clockid_t    clockid;
 	unsigned int proc_map_timeout;
+	int	     nr_cblocks;
 };
 
 struct option;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e7a4b31a84fb..08be79650a85 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1018,7 +1018,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
  */
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 unsigned int auxtrace_pages,
-			 bool auxtrace_overwrite)
+			 bool auxtrace_overwrite,
+			 int nr_cblocks)
 {
 	struct perf_evsel *evsel;
 	const struct cpu_map *cpus = evlist->cpus;
@@ -1028,7 +1029,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 	 * Its value is decided by evsel's write_backward.
 	 * So &mp should not be passed through const pointer.
 	 */
-	struct mmap_params mp;
+	struct mmap_params mp = { .nr_cblocks = nr_cblocks };
 
 	if (!evlist->mmap)
 		evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1060,7 +1061,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
 {
-	return perf_evlist__mmap_ex(evlist, pages, 0, false);
+	return perf_evlist__mmap_ex(evlist, pages, 0, false, 2);
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dc66436add98..a94d3c613254 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -162,7 +162,8 @@ unsigned long perf_event_mlock_kb_in_pages(void);
 
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 unsigned int auxtrace_pages,
-			 bool auxtrace_overwrite);
+			 bool auxtrace_overwrite,
+			 int nr_cblocks);
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index fc832676a798..384d17cd1379 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -155,6 +155,14 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
 
 void perf_mmap__munmap(struct perf_mmap *map)
 {
+	int i;
+	if (map->data) {
+		for (i = 0; i < map->nr_cblocks; ++i)
+			zfree(&(map->data[i]));
+		zfree(&(map->data));
+	}
+	if (map->cblocks)
+		zfree(&(map->cblocks));
 	if (map->base != NULL) {
 		munmap(map->base, perf_mmap__mmap_len(map));
 		map->base = NULL;
@@ -166,6 +174,7 @@ void perf_mmap__munmap(struct perf_mmap *map)
 
 int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
 {
+	int i;
 	/*
 	 * The last one will be done at perf_mmap__consume(), so that we
 	 * make sure we don't prevent tools from consuming every last event in
@@ -190,6 +199,50 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
 		map->base = NULL;
 		return -1;
 	}
+	map->nr_cblocks = mp->nr_cblocks;
+	map->cblocks = calloc(map->nr_cblocks, sizeof(struct aiocb*));
+	if (!map->cblocks) {
+		pr_debug2("failed to allocate perf event data buffers, error %d\n",
+				errno);
+		return -1;
+	}
+	map->data = calloc(map->nr_cblocks, sizeof(void*));
+	if (map->data) {
+		int delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
+		for (i = 0; i < map->nr_cblocks; ++i) {
+			map->data[i] = malloc(perf_mmap__mmap_len(map));
+			if (map->data[i]) {
+				int prio;
+				unsigned char *data = map->data[i];
+				map->cblocks[i] = (struct aiocb *)&data[map->mask + 1];
+				memset(map->cblocks[i], 0, sizeof(struct aiocb));
+				/* Use cblock.aio_fildes value different from -1
+				 * to denote started aio write operation on the
+				 * cblock so it requires explicit record__aio_sync()
+				 * call prior the cblock may be reused again.
+				 */
+				map->cblocks[i]->aio_fildes = -1;
+				/* Allocate cblocks with decreasing priority to
+				 * have faster aio_write() calls because queued
+				 * requests are kept in separate per-prio queues
+				 * and adding a new request iterates thru shorter
+				 * per-prio list.
+				 */
+				prio = delta_max - i;
+				if (prio < 0)
+					prio = 0;
+				map->cblocks[i]->aio_reqprio = prio;
+			} else {
+				pr_debug2("failed to allocate perf event data buffer, error %d\n",
+						errno);
+				return -1;
+			}
+		}
+	} else {
+		pr_debug2("failed to alloc perf event data buffers, error %d\n",
+				errno);
+		return -1;
+	}
 	map->fd = fd;
 
 	if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index d82294db1295..4a9bb0ecae4f 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <asm/barrier.h>
 #include <stdbool.h>
+#include <aio.h>
 #include "auxtrace.h"
 #include "event.h"
 
@@ -25,6 +26,9 @@ struct perf_mmap {
 	bool		 overwrite;
 	struct auxtrace_mmap auxtrace_mmap;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+	void 		 **data;
+	struct aiocb	 **cblocks;
+	int 		 nr_cblocks;
 };
 
 /*
@@ -56,7 +60,7 @@ enum bkw_mmap_state {
 };
 
 struct mmap_params {
-	int			    prot, mask;
+	int			    prot, mask, nr_cblocks;
 	struct auxtrace_mmap_params auxtrace_mp;
 };

       reply	other threads:[~2018-09-04 11:57 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <01963930-68b3-ff33-6e05-acaff3efc083@linux.intel.com>
2018-09-04 11:56 ` Alexey Budankov [this message]
2018-09-04 17:34   ` [PATCH v6 1/2]: perf util: map data buffer for preserving collected data Andi Kleen
2018-09-04 17:50     ` Alexey Budankov
2018-09-04 11:58 ` [PATCH v6 2/2]: perf record: enable asynchronous trace writing Alexey Budankov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2325f44b-d9bf-d8da-2fdb-8b8e87a255fc@linux.intel.com \
    --to=alexey.budankov@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).