linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Alexey Budankov <alexey.budankov@linux.intel.com>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	Andi Kleen <ak@linux.intel.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>,
	Alexander Antonov <alexander.antonov@linux.intel.com>
Subject: [PATCH v3 11/12] perf session: load data directory files for analysis
Date: Mon, 16 Nov 2020 15:22:38 +0300	[thread overview]
Message-ID: <e788567f-4774-f165-0a93-bcbf3f237451@linux.intel.com> (raw)
In-Reply-To: <7d197a2d-56e2-896d-bf96-6de0a4db1fb8@linux.intel.com>


Introduce decompressor into trace reader object so that decompression
could be executed on per data file basis separately for every data
file located in data directory.

Load data directory files and provide basic raw dump and aggregated
analysis support of data directories in report mode, still with no
memory consumption optimizations.

Design and implementation are based on the prototype [1], [2].

[1] git clone https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git -b perf/record_threads
[2] https://lore.kernel.org/lkml/20180913125450.21342-1-jolsa@kernel.org/

Suggested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
 tools/perf/util/session.c | 350 +++++++++++++++++++++++++++++++++-----
 tools/perf/util/session.h |   4 +
 2 files changed, 315 insertions(+), 39 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3c2fafb3a04d..3cb30c1667c0 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -34,6 +34,55 @@
 #include "arch/common.h"
 #include <internal/lib.h>
 
+struct reader;
+
+typedef s64 (*reader_cb_t)(struct perf_session *session,
+			   union perf_event *event,
+			   u64 file_offset,
+			   const char *file_path);
+
+/*
+ * On 64bit we can mmap the data file in one go. No need for tiny mmap
+ * slices. On 32bit we use 32MB.
+ */
+#if BITS_PER_LONG == 64
+#define MMAP_SIZE ULLONG_MAX
+#define NUM_MMAPS 1
+#else
+#define MMAP_SIZE (32 * 1024 * 1024ULL)
+#define NUM_MMAPS 128
+#endif
+
+struct reader_state {
+	char	*mmaps[NUM_MMAPS];
+	size_t	 mmap_size;
+	int	 mmap_idx;
+	char	*mmap_cur;
+	u64	 file_pos;
+	u64	 file_offset;
+	u64	 data_size;
+	u64	 head;
+	bool	 eof;
+	u64	 size;
+};
+
+enum {
+	READER_EOF	=  0,
+	READER_OK	=  1,
+};
+
+struct reader {
+	int		 fd;
+	const char	 *path;
+	u64		 data_size;
+	u64		 data_offset;
+	reader_cb_t	 process;
+	struct zstd_data zstd_data;
+	struct decomp	 *decomp;
+	struct decomp	 *decomp_last;
+	struct reader_state state;
+};
+
 #ifdef HAVE_ZSTD_SUPPORT
 static int perf_session__process_compressed_event(struct perf_session *session,
 						  union perf_event *event, u64 file_offset,
@@ -43,7 +92,10 @@ static int perf_session__process_compressed_event(struct perf_session *session,
 	size_t decomp_size, src_size;
 	u64 decomp_last_rem = 0;
 	size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
-	struct decomp *decomp, *decomp_last = session->decomp_last;
+	struct decomp *decomp, *decomp_last = session->active_reader ?
+		session->active_reader->decomp_last : session->decomp_last;
+	struct zstd_data *zstd_data = session->active_reader ?
+		&session->active_reader->zstd_data: &session->zstd_data;
 
 	if (decomp_last) {
 		decomp_last_rem = decomp_last->size - decomp_last->head;
@@ -71,7 +123,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
 	src = (void *)event + sizeof(struct perf_record_compressed);
 	src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
 
-	decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
+	decomp_size = zstd_decompress_stream(zstd_data, src, src_size,
 				&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
 	if (!decomp_size) {
 		munmap(decomp, mmap_len);
@@ -81,12 +133,22 @@ static int perf_session__process_compressed_event(struct perf_session *session,
 
 	decomp->size += decomp_size;
 
-	if (session->decomp == NULL) {
-		session->decomp = decomp;
-		session->decomp_last = decomp;
+	if (session->active_reader) {
+		if (session->active_reader->decomp == NULL) {
+			session->active_reader->decomp = decomp;
+			session->active_reader->decomp_last = decomp;
+		} else {
+			session->active_reader->decomp_last->next = decomp;
+			session->active_reader->decomp_last = decomp;
+		}
 	} else {
-		session->decomp_last->next = decomp;
-		session->decomp_last = decomp;
+		if (session->decomp == NULL) {
+			session->decomp = decomp;
+			session->decomp_last = decomp;
+		} else {
+			session->decomp_last->next = decomp;
+			session->decomp_last = decomp;
+		}
 	}
 
 	pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
@@ -277,11 +339,10 @@ static void perf_session__delete_threads(struct perf_session *session)
 	machine__delete_threads(&session->machines.host);
 }
 
-static void perf_session__release_decomp_events(struct perf_session *session)
+static void perf_decomp__release_events(struct decomp *next)
 {
-	struct decomp *next, *decomp;
+	struct decomp *decomp;
 	size_t mmap_len;
-	next = session->decomp;
 	do {
 		decomp = next;
 		if (decomp == NULL)
@@ -294,13 +355,21 @@ static void perf_session__release_decomp_events(struct perf_session *session)
 
 void perf_session__delete(struct perf_session *session)
 {
+	int r;
+
 	if (session == NULL)
 		return;
 	auxtrace__free(session);
 	auxtrace_index__free(&session->auxtrace_index);
 	perf_session__destroy_kernel_maps(session);
 	perf_session__delete_threads(session);
-	perf_session__release_decomp_events(session);
+	if (session->readers) {
+		for (r = 0; r < session->nr_readers; r++)
+			perf_decomp__release_events(session->readers[r].decomp);
+		zfree(&session->readers);
+		session->nr_readers = 0;
+	}
+	perf_decomp__release_events(session->decomp);
 	perf_env__exit(&session->header.env);
 	machines__exit(&session->machines);
 	if (session->data)
@@ -2094,7 +2163,8 @@ static int __perf_session__process_decomp_events(struct perf_session *session)
 {
 	s64 skip;
 	u64 size;
-	struct decomp *decomp = session->decomp_last;
+	struct decomp *decomp = session->active_reader ?
+		session->active_reader->decomp_last : session->decomp_last;
 
 	if (!decomp)
 		return 0;
@@ -2124,33 +2194,6 @@ static int __perf_session__process_decomp_events(struct perf_session *session)
 	return 0;
 }
 
-/*
- * On 64bit we can mmap the data file in one go. No need for tiny mmap
- * slices. On 32bit we use 32MB.
- */
-#if BITS_PER_LONG == 64
-#define MMAP_SIZE ULLONG_MAX
-#define NUM_MMAPS 1
-#else
-#define MMAP_SIZE (32 * 1024 * 1024ULL)
-#define NUM_MMAPS 128
-#endif
-
-struct reader;
-
-typedef s64 (*reader_cb_t)(struct perf_session *session,
-			   union perf_event *event,
-			   u64 file_offset,
-			   const char *file_path);
-
-struct reader {
-	int		 fd;
-	const char	 *path;
-	u64		 data_size;
-	u64		 data_offset;
-	reader_cb_t	 process;
-};
-
 static int
 reader__process_events(struct reader *rd, struct perf_session *session,
 		       struct ui_progress *prog)
@@ -2308,6 +2351,232 @@ static int __perf_session__process_events(struct perf_session *session)
 	return err;
 }
 
+static int
+reader__init(struct reader *rd, bool *one_mmap)
+{
+	struct reader_state *st = &rd->state;
+	char **mmaps = st->mmaps;
+
+	pr_debug("reader processing %s\n", rd->path);
+
+	st->head = rd->data_offset;
+
+	st->data_size = rd->data_size + rd->data_offset;
+
+	st->mmap_size = MMAP_SIZE;
+	if (st->mmap_size > st->data_size) {
+		st->mmap_size = st->data_size;
+		if (one_mmap)
+			*one_mmap = true;
+	}
+
+	memset(mmaps, 0, sizeof(st->mmaps));
+
+	if (zstd_init(&rd->zstd_data, 0))
+		return -1;
+
+	return 0;
+}
+
+static int
+reader__mmap(struct reader *rd, struct perf_session *session)
+{
+	struct reader_state *st = &rd->state;
+	int mmap_prot, mmap_flags;
+	char *buf, **mmaps = st->mmaps;
+	u64 page_offset;
+
+	if (st->file_pos >= st->data_size) {
+		st->eof = true;
+		return READER_EOF;
+	}
+
+	mmap_prot  = PROT_READ;
+	mmap_flags = MAP_SHARED;
+
+	if (session->header.needs_swap) {
+		mmap_prot  |= PROT_WRITE;
+		mmap_flags = MAP_PRIVATE;
+	}
+
+	if (mmaps[st->mmap_idx]) {
+		munmap(mmaps[st->mmap_idx], st->mmap_size);
+		mmaps[st->mmap_idx] = NULL;
+	}
+
+	page_offset = page_size * (st->head / page_size);
+	st->file_offset += page_offset;
+	st->head -= page_offset;
+
+	buf = mmap(NULL, st->mmap_size, mmap_prot, mmap_flags, rd->fd,
+		   st->file_offset);
+	if (buf == MAP_FAILED) {
+		pr_err("failed to mmap file\n");
+		return -errno;
+	}
+	mmaps[st->mmap_idx] = st->mmap_cur = buf;
+	st->mmap_idx = (st->mmap_idx + 1) & (ARRAY_SIZE(st->mmaps) - 1);
+	st->file_pos = st->file_offset + st->head;
+	return READER_OK;
+}
+
+static int
+reader__read_event(struct reader *rd, struct perf_session *session,
+		   struct ui_progress *prog)
+{
+	struct reader_state *st = &rd->state;
+	union perf_event *event;
+	int ret = READER_OK;
+	u64 size;
+	s64 skip;
+
+	event = fetch_mmaped_event(st->head, st->mmap_size, st->mmap_cur, session->header.needs_swap);
+	if (IS_ERR(event))
+		return PTR_ERR(event);
+
+	if (!event)
+		return READER_EOF;
+
+	session->active_reader = rd;
+	size = event->header.size;
+	skip = -EINVAL;
+
+	if (size < sizeof(struct perf_event_header) ||
+	    (skip = perf_session__process_event(session, event, st->file_pos, rd->path)) < 0) {
+		pr_err("%#" PRIx64 " [%s] [%#x]: failed to process type: %d [%s]\n",
+		       st->file_offset + st->head, rd->path, event->header.size,
+		       event->header.type, strerror(-skip));
+		ret = skip;
+		goto out;
+	}
+
+	if (skip)
+		size += skip;
+
+	st->size += size;
+	st->head += size;
+	st->file_pos += size;
+
+	skip = __perf_session__process_decomp_events(session);
+	if (skip)
+		ret = skip;
+
+	ui_progress__update(prog, size);
+
+out:
+	session->active_reader = NULL;;
+	return ret;
+}
+/*
+ * This function reads, merge and process directory data.
+ * It assumens the version 1 of directory data, where each
+ * data file holds per-cpu data, already sorted by kernel.
+ */
+static int __perf_session__process_dir_events(struct perf_session *session)
+{
+	struct perf_data *data = session->data;
+	struct perf_tool *tool = session->tool;
+	int i, ret = 0, readers = 1;
+	struct ui_progress prog;
+	u64 total_size = perf_data__size(session->data);
+	struct reader *rd;
+
+	perf_tool__fill_defaults(tool);
+
+	ui_progress__init_size(&prog, total_size, "Sorting events...");
+
+	for (i = 0; i < data->dir.nr; i++) {
+		if (data->dir.files[i].size)
+			readers++;
+	}
+
+	rd = session->readers = zalloc(readers * sizeof(struct reader));
+	if (!rd)
+		return -ENOMEM;
+	session->nr_readers = readers;
+	readers = 0;
+
+	rd[readers] = (struct reader) {
+		.fd		= perf_data__fd(session->data),
+		.path		= session->data->file.path,
+		.data_size	= session->header.data_size,
+		.data_offset	= session->header.data_offset,
+	};
+	reader__init(&rd[readers], &session->one_mmap);
+	if (reader__mmap(&rd[readers], session) != READER_OK)
+		goto out_err;
+	readers++;
+
+	for (i = 0; i < data->dir.nr; i++) {
+		if (data->dir.files[i].size) {
+			rd[readers] = (struct reader) {
+				.fd		= data->dir.files[i].fd,
+				.path		= data->dir.files[i].path,
+				.data_size	= data->dir.files[i].size,
+				.data_offset	= 0,
+			};
+			reader__init(&rd[readers], &session->one_mmap);
+			if (reader__mmap(&rd[readers], session) != READER_OK)
+				goto out_err;
+			readers++;
+		}
+	}
+
+	i = 0;
+
+	while ((ret >= 0) && readers) {
+		if (session_done())
+			return 0;
+
+		if (rd[i].state.eof) {
+			i = (i + 1) % session->nr_readers;
+			continue;
+		}
+
+		ret = reader__read_event(&rd[i], session, &prog);
+		if (ret < 0)
+			break;
+		if (ret == READER_EOF) {
+			ret = reader__mmap(&rd[i], session);
+			if (ret < 0)
+				goto out_err;
+			if (ret == READER_EOF)
+				readers--;
+		}
+
+		/*
+		 * Processing 10MBs of data from each reader in sequence,
+		 * because that's the way the ordered events sorting works
+		 * most efficiently.
+		 */
+		if (rd[i].state.size >= 10*1024*1024) {
+			rd[i].state.size = 0;
+			i = (i + 1) % session->nr_readers;
+		}
+	}
+
+	ret = ordered_events__flush(&session->ordered_events, OE_FLUSH__FINAL);
+	if (ret)
+		goto out_err;
+
+	ret = perf_session__flush_thread_stacks(session);
+out_err:
+	ui_progress__finish();
+
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
+
+	/*
+	 * We may switching perf.data output, make ordered_events
+	 * reusable.
+	 */
+	ordered_events__reinit(&session->ordered_events);
+
+	session->one_mmap = false;
+
+	return ret;
+}
+
 int perf_session__process_events(struct perf_session *session)
 {
 	if (perf_session__register_idle_thread(session) < 0)
@@ -2316,6 +2585,9 @@ int perf_session__process_events(struct perf_session *session)
 	if (perf_data__is_pipe(session->data))
 		return __perf_session__process_pipe_events(session);
 
+	if (perf_data__is_dir(session->data))
+		return __perf_session__process_dir_events(session);
+
 	return __perf_session__process_events(session);
 }
 
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 378ffc3e2809..cbc54615d155 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -19,6 +19,7 @@ struct thread;
 
 struct auxtrace;
 struct itrace_synth_opts;
+struct reader;
 
 struct perf_session {
 	struct perf_header	header;
@@ -41,6 +42,9 @@ struct perf_session {
 	struct zstd_data	zstd_data;
 	struct decomp		*decomp;
 	struct decomp		*decomp_last;
+	struct reader		*readers;
+	int			nr_readers;
+	struct reader		*active_reader;
 };
 
 struct decomp {
-- 
2.24.1

  parent reply	other threads:[~2020-11-16 12:38 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-16 12:12 [PATCH v3 00/12] Introduce threaded trace streaming for basic perf record operation Alexey Budankov
2020-11-16 12:14 ` [PATCH v3 01/12] perf record: introduce thread affinity and mmap masks Alexey Budankov
2020-11-20 10:01   ` Namhyung Kim
2020-11-16 12:15 ` [PATCH v3 02/12] perf record: introduce thread specific data array Alexey Budankov
2020-11-20 10:14   ` Namhyung Kim
2020-11-16 12:16 ` [PATCH v3 03/12] perf record: introduce thread local variable Alexey Budankov
2020-11-20 10:20   ` Namhyung Kim
2020-11-16 12:17 ` [PATCH v3 04/12] perf record: stop threads in the end of trace streaming Alexey Budankov
2020-11-16 12:18 ` [PATCH v3 05/12] perf record: start threads in the beginning " Alexey Budankov
2020-11-16 12:18 ` [PATCH v3 06/12] perf record: introduce data file at mmap buffer object Alexey Budankov
2020-11-20 10:28   ` Namhyung Kim
2020-11-16 12:19 ` [PATCH v3 07/12] perf record: init " Alexey Budankov
2020-11-20 10:49   ` Namhyung Kim
2021-03-01 11:16     ` Bayduraev, Alexey V
2021-03-01 11:44       ` Namhyung Kim
2021-03-01 13:33         ` Bayduraev, Alexey V
2021-03-01 14:20           ` Namhyung Kim
2020-11-16 12:20 ` [PATCH v3 08/12] perf record: introduce --threads=<spec> command line option Alexey Budankov
2020-11-20 11:09   ` Namhyung Kim
2020-11-16 12:21 ` [PATCH v3 09/12] perf record: document parallel data streaming mode Alexey Budankov
2020-11-16 12:22 ` [PATCH v3 10/12] perf report: output data file name in raw trace dump Alexey Budankov
2020-11-16 12:22 ` Alexey Budankov [this message]
2020-11-16 12:25 ` [PATCH v3 12/12] perf session: use reader functions to load perf data file Alexey Budankov
2020-11-20  9:45 ` [PATCH v3 00/12] Introduce threaded trace streaming for basic perf record operation Namhyung Kim
2020-12-15 15:05   ` Alexei Budankov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e788567f-4774-f165-0a93-bcbf3f237451@linux.intel.com \
    --to=alexey.budankov@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.antonov@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=alexey.v.bayduraev@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).