All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Zickus <dzickus@redhat.com>
To: acme@ghostprotocols.net
Cc: LKML <linux-kernel@vger.kernel.org>,
	jolsa@redhat.com, jmario@redhat.com, fowles@inreach.com,
	peterz@infradead.org, eranian@google.com, andi.kleen@intel.com,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	David Ahern <dsahern@gmail.com>, Don Zickus <dzickus@redhat.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Mike Galbraith <efault@gmx.de>, Paul Mackerras <paulus@samba.org>,
	Richard Fowles <rfowles@redhat.com>
Subject: [PATCH 03/15 V3] perf c2c: Shared data analyser
Date: Mon, 24 Mar 2014 15:36:54 -0400	[thread overview]
Message-ID: <1395689826-215033-4-git-send-email-dzickus@redhat.com> (raw)
In-Reply-To: <1395689826-215033-1-git-send-email-dzickus@redhat.com>

From: Arnaldo Carvalho de Melo <acme@redhat.com>

This is the start of a new perf tool that will collect information about
memory accesses and analyse it to find things like hot cachelines, etc.

This is basically trying to get a prototype written by Richard Fowles
written using the tools/perf coding style and libraries.

Start it from 'perf sched', this patch starts the process by adding the
'record' subcommand to collect the needed mem loads and stores samples.

It also have the basic 'report' skeleton, resolving the sample address
and hooking the events found in a perf.data file with methods to handle
them, right now just printing the resolved perf_sample data structure
after each event name.

[dcz: refreshed to latest upstream changes]

Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Fowles <rfowles@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-c2c.c |  22 +++++
 tools/perf/Makefile.perf            |   1 +
 tools/perf/builtin-c2c.c            | 185 ++++++++++++++++++++++++++++++++++++
 tools/perf/builtin.h                |   1 +
 tools/perf/perf.c                   |   1 +
 5 files changed, 210 insertions(+)
 create mode 100644 tools/perf/Documentation/perf-c2c.c
 create mode 100644 tools/perf/builtin-c2c.c

diff --git a/tools/perf/Documentation/perf-c2c.c b/tools/perf/Documentation/perf-c2c.c
new file mode 100644
index 0000000..4d52798
--- /dev/null
+++ b/tools/perf/Documentation/perf-c2c.c
@@ -0,0 +1,22 @@
+perf-c2c(1)
+===========
+
+NAME
+----
+perf-c2c - Shared Data C2C/HITM Analyzer.
+
+SYNOPSIS
+--------
+[verse]
+'perf c2c' record
+
+DESCRIPTION
+-----------
+These are the variants of perf c2c:
+
+  'perf c2c record <command>' to record the memory accesses of an arbitrary
+  workload.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-mem[1]
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 667e85a..069bdca 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -431,6 +431,7 @@ BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
 
+BUILTIN_OBJS += $(OUTPUT)builtin-c2c.o
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
 BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
 BUILTIN_OBJS += $(OUTPUT)builtin-help.o
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
new file mode 100644
index 0000000..2935484
--- /dev/null
+++ b/tools/perf/builtin-c2c.c
@@ -0,0 +1,185 @@
+#include "builtin.h"
+#include "cache.h"
+
+#include "util/evlist.h"
+#include "util/parse-options.h"
+#include "util/session.h"
+#include "util/tool.h"
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+
+struct perf_c2c {
+	struct perf_tool tool;
+};
+
+static int perf_sample__fprintf(struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct addr_location *al, FILE *fp)
+{
+	return fprintf(fp, "%25.25s: %5d %5d 0x%016" PRIx64 " 0x016%" PRIx64 " %5" PRIu64 " 0x%06" PRIx64 " %s:%s\n",
+		       perf_evsel__name(evsel),
+		       sample->pid, sample->tid, sample->ip, sample->addr,
+		       sample->weight, sample->data_src,
+		       al->map ? (al->map->dso ? al->map->dso->long_name : "???") : "???",
+		       al->sym ? al->sym->name : "???");
+}
+
+static int perf_c2c__process_load(struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  struct addr_location *al)
+{
+	perf_sample__fprintf(sample, evsel, al, stdout);
+	return 0;
+}
+
+static int perf_c2c__process_store(struct perf_evsel *evsel,
+				   struct perf_sample *sample,
+				   struct addr_location *al)
+{
+	perf_sample__fprintf(sample, evsel, al, stdout);
+	return 0;
+}
+
+static const struct perf_evsel_str_handler handlers[] = {
+	{ "cpu/mem-loads,ldlat=30/pp", perf_c2c__process_load, },
+	{ "cpu/mem-stores/pp",	       perf_c2c__process_store, },
+};
+
+typedef int (*sample_handler)(struct perf_evsel *evsel,
+			      struct perf_sample *sample,
+			      struct addr_location *al);
+
+static int perf_c2c__process_sample(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_sample *sample,
+				    struct perf_evsel *evsel,
+				    struct machine *machine)
+{
+	struct addr_location al;
+	int err = 0;
+
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+		pr_err("problem processing %d event, skipping it.\n",
+		       event->header.type);
+		return -1;
+	}
+
+	if (evsel->handler != NULL) {
+		sample_handler f = evsel->handler;
+		err = f(evsel, sample, &al);
+	}
+
+	return err;
+}
+
+static int perf_c2c__read_events(struct perf_c2c *c2c)
+{
+	int err = -1;
+	struct perf_session *session;
+	struct perf_data_file file = {
+			.path = input_name,
+			.mode = PERF_DATA_MODE_READ,
+	};
+	struct perf_evsel *evsel;
+
+	session = perf_session__new(&file, 0, &c2c->tool);
+	if (session == NULL) {
+		pr_debug("No memory for session\n");
+		goto out;
+	}
+
+	/* setup the evsel handlers for each event type */
+	evlist__for_each(session->evlist, evsel) {
+		const char *name = perf_evsel__name(evsel);
+		unsigned int i;
+
+		for (i = 0; i < ARRAY_SIZE(handlers); i++) {
+			if (!strcmp(name, handlers[i].name))
+				evsel->handler = handlers[i].handler;
+		}
+	}
+
+	err = perf_session__process_events(session, &c2c->tool);
+	if (err)
+		pr_err("Failed to process events, error %d", err);
+
+out:
+	return err;
+}
+
+static int perf_c2c__report(struct perf_c2c *c2c)
+{
+	setup_pager();
+	return perf_c2c__read_events(c2c);
+}
+
+static int perf_c2c__record(int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		/* "--phys-addr", */
+		"-W",
+		"-d",
+		"-a",
+	};
+
+	rec_argc = ARRAY_SIZE(record_args) + 2 * ARRAY_SIZE(handlers) + argc - 1;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = strdup(record_args[i]);
+
+	for (j = 0; j < ARRAY_SIZE(handlers); j++) {
+		rec_argv[i++] = strdup("-e");
+		rec_argv[i++] = strdup(handlers[j].name);
+	}
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_record(i, rec_argv, NULL);
+}
+
+int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+	struct perf_c2c c2c = {
+		.tool = {
+			.sample		 = perf_c2c__process_sample,
+			.comm		 = perf_event__process_comm,
+			.exit		 = perf_event__process_exit,
+			.fork		 = perf_event__process_fork,
+			.lost		 = perf_event__process_lost,
+			.ordered_samples = true,
+		},
+	};
+	const struct option c2c_options[] = {
+	OPT_END()
+	};
+	const char * const c2c_usage[] = {
+		"perf c2c {record|report}",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, c2c_options, c2c_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc)
+		usage_with_options(c2c_usage, c2c_options);
+
+	if (!strncmp(argv[0], "rec", 3)) {
+		return perf_c2c__record(argc, argv);
+	} else if (!strncmp(argv[0], "rep", 3)) {
+		return perf_c2c__report(&c2c);
+	} else {
+		usage_with_options(c2c_usage, c2c_options);
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index b210d62..2d0b1b5 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -17,6 +17,7 @@ extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_bench(int argc, const char **argv, const char *prefix);
 extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
 extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+extern int cmd_c2c(int argc, const char **argv, const char *prefix);
 extern int cmd_diff(int argc, const char **argv, const char *prefix);
 extern int cmd_evlist(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 431798a..c7012a3 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -35,6 +35,7 @@ struct cmd_struct {
 static struct cmd_struct commands[] = {
 	{ "buildid-cache", cmd_buildid_cache, 0 },
 	{ "buildid-list", cmd_buildid_list, 0 },
+	{ "c2c",	cmd_c2c,	0 },
 	{ "diff",	cmd_diff,	0 },
 	{ "evlist",	cmd_evlist,	0 },
 	{ "help",	cmd_help,	0 },
-- 
1.7.11.7


  parent reply	other threads:[~2014-03-24 19:39 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-24 19:36 [PATCH 00/15 V3] perf, c2c: Add new tool to analyze cacheline contention on NUMA systems Don Zickus
2014-03-24 19:36 ` [PATCH 01/15 V3] perf: Fix stddev calculation Don Zickus
2014-03-24 19:36 ` [PATCH 02/15 V3] perf, callchain: Add generic callchain print handler for stdio Don Zickus
2014-03-24 19:36 ` Don Zickus [this message]
2014-04-08  6:59   ` [PATCH 03/15 V3] perf c2c: Shared data analyser Namhyung Kim
2014-04-08 14:22     ` Don Zickus
2014-04-09  0:58       ` Namhyung Kim
2014-04-09  1:29         ` Andi Kleen
2014-04-08 14:23     ` Don Zickus
2014-03-24 19:36 ` [PATCH 04/15 V3] perf c2c: Dump raw records, decode data_src bits Don Zickus
2014-04-08  7:09   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 05/15 V3] perf, c2c: Rework setup code to prepare for features Don Zickus
2014-03-29 17:10   ` Jiri Olsa
2014-04-01  2:52     ` Don Zickus
2014-04-08  7:41     ` Namhyung Kim
2014-04-08 14:11       ` Don Zickus
2014-04-09  1:12         ` Namhyung Kim
2014-04-09  1:36           ` Don Zickus
2014-04-11 14:57             ` Jiri Olsa
2014-04-08  7:18   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 06/15 V3] perf, c2c: Add in new options to configure latency and stores Don Zickus
2014-03-29 17:11   ` Jiri Olsa
2014-04-01  2:55     ` Don Zickus
2014-04-06 13:14       ` Jiri Olsa
2014-04-07 18:16         ` Don Zickus
2014-04-09  0:17           ` Namhyung Kim
2014-04-08  7:37         ` Namhyung Kim
2014-04-08  7:31   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 07/15 V3] perf, c2c: Add in sort on physid Don Zickus
2014-04-08  7:56   ` Namhyung Kim
2014-04-08 14:17     ` Don Zickus
2014-04-09  1:30       ` Namhyung Kim
2014-04-09  1:56         ` Don Zickus
2014-03-24 19:36 ` [PATCH 08/15 V3] perf, c2c: Add stats to track data source bits and cpu to node maps Don Zickus
2014-04-08  8:05   ` Namhyung Kim
2014-03-24 19:37 ` [PATCH 09/15 V3] perf, c2c: Sort based on hottest cache line Don Zickus
2014-04-08  8:23   ` Namhyung Kim
2014-03-24 19:37 ` [PATCH 10/15 V3] perf, c2c: Display cacheline HITM analysis to stdout Don Zickus
2014-04-08  8:26   ` Namhyung Kim
2014-04-08 23:46   ` Namhyung Kim
2014-03-24 19:37 ` [PATCH 11/15 V3] perf, c2c: Add callchain support Don Zickus
2014-03-24 19:37 ` [PATCH 12/15 V3] perf, c2c: Output summary stats Don Zickus
2014-03-24 19:37 ` [PATCH 13/15 V3] perf, c2c: Dump rbtree for debugging Don Zickus
2014-03-24 19:37 ` [PATCH 14/15 V3] perf, c2c: Add symbol count table Don Zickus
2014-03-24 19:37 ` [PATCH 15/15 V3] perf, c2c: Add shared cachline summary table Don Zickus

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1395689826-215033-4-git-send-email-dzickus@redhat.com \
    --to=dzickus@redhat.com \
    --cc=acme@ghostprotocols.net \
    --cc=acme@redhat.com \
    --cc=andi.kleen@intel.com \
    --cc=dsahern@gmail.com \
    --cc=efault@gmx.de \
    --cc=eranian@google.com \
    --cc=fowles@inreach.com \
    --cc=fweisbec@gmail.com \
    --cc=jmario@redhat.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=rfowles@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.