linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/5] perf trace pagefaults
@ 2014-06-26 16:14 Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 1/5] perf trace: add perf_event parameter to tracepoint_handler Stanislav Fomichev
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-26 16:14 UTC (permalink / raw)
  To: a.p.zijlstra, paulus, mingo, acme, dsahern, stfomichev, jolsa
  Cc: linux-kernel

This patch series adds support for pagefaults tracing to 'perf trace' command.

v3:
  - added trace format documentation and example
  - added mode detailed trace when --verbose specified

v2:
  - added more info to the changelogs
  - reworked options (-f -> -F, --pgfaults -> --pf=[all|min|maj])
  - separated tracepoint_handler changes into additional patch
  - separated record/replay into additional patch
  - other fixes pointed out by Arnaldo Carvalho de Melo

Stanislav Fomichev (5):
  perf trace: add perf_event parameter to tracepoint_handler
  perf trace: add support for pagefault tracing
  perf trace: add pagefaults record and replay support
  perf trace: add pagefault statistics
  perf trace: add possibility to switch off syscall events

 tools/perf/Documentation/perf-trace.txt |  44 ++++++
 tools/perf/builtin-trace.c              | 248 +++++++++++++++++++++++++++-----
 2 files changed, 254 insertions(+), 38 deletions(-)

-- 
1.9.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/5] perf trace: add perf_event parameter to tracepoint_handler
  2014-06-26 16:14 [PATCH v3 0/5] perf trace pagefaults Stanislav Fomichev
@ 2014-06-26 16:14 ` Stanislav Fomichev
  2014-07-05 10:40   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 2/5] perf trace: add support for pagefault tracing Stanislav Fomichev
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-26 16:14 UTC (permalink / raw)
  To: a.p.zijlstra, paulus, mingo, acme, dsahern, stfomichev, jolsa
  Cc: linux-kernel

It will be used by next pagefault tracing patches in the series.

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
 tools/perf/builtin-trace.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f954c26de231..a9b542918da0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1535,6 +1535,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 }
 
 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
+				  union perf_event *event,
 				  struct perf_sample *sample);
 
 static struct syscall *trace__syscall_info(struct trace *trace,
@@ -1607,6 +1608,7 @@ static void thread__update_stats(struct thread_trace *ttrace,
 }
 
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
+			    union perf_event *event __maybe_unused,
 			    struct perf_sample *sample)
 {
 	char *msg;
@@ -1656,6 +1658,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 }
 
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
+			   union perf_event *event __maybe_unused,
 			   struct perf_sample *sample)
 {
 	int ret;
@@ -1735,6 +1738,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 }
 
 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
+			      union perf_event *event __maybe_unused,
 			      struct perf_sample *sample)
 {
 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
@@ -1742,6 +1746,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
 }
 
 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
+				     union perf_event *event __maybe_unused,
 				     struct perf_sample *sample)
 {
         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
@@ -1781,7 +1786,7 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 }
 
 static int trace__process_sample(struct perf_tool *tool,
-				 union perf_event *event __maybe_unused,
+				 union perf_event *event,
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel,
 				 struct machine *machine __maybe_unused)
@@ -1799,7 +1804,7 @@ static int trace__process_sample(struct perf_tool *tool,
 
 	if (handler) {
 		++trace->nr_events;
-		handler(trace, evsel, sample);
+		handler(trace, evsel, event, sample);
 	}
 
 	return err;
@@ -1990,7 +1995,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 			}
 
 			handler = evsel->handler;
-			handler(trace, evsel, &sample);
+			handler(trace, evsel, event, &sample);
 next_event:
 			perf_evlist__mmap_consume(evlist, i);
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/5] perf trace: add support for pagefault tracing
  2014-06-26 16:14 [PATCH v3 0/5] perf trace pagefaults Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 1/5] perf trace: add perf_event parameter to tracepoint_handler Stanislav Fomichev
@ 2014-06-26 16:14 ` Stanislav Fomichev
  2014-07-05 10:41   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 3/5] perf trace: add pagefaults record and replay support Stanislav Fomichev
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-26 16:14 UTC (permalink / raw)
  To: a.p.zijlstra, paulus, mingo, acme, dsahern, stfomichev, jolsa
  Cc: linux-kernel

This patch adds optional pagefault tracing support to 'perf trace'.
Using -F/--pf option user can specify whether he wants minor, major or
all pagefault events to be traced. This patch adds only live mode,
record and replace will come in a separate patch.

Example output:
1756272.905 ( 0.000 ms): curl/5937 majfault [0x7fa7261978b6] => /usr/lib/x86_64-linux-gnu/libkrb5.so.26.0.0@0x85288 (d.)
1862866.036 ( 0.000 ms): wget/8460 majfault [__clear_user+0x3f] => 0x659cb4 (?k)

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
 tools/perf/Documentation/perf-trace.txt |  37 ++++++++++
 tools/perf/builtin-trace.c              | 125 +++++++++++++++++++++++++++++++-
 2 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index fae38d9a44a4..2d7e041122dc 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -107,6 +107,43 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 	Show tool stats such as number of times fd->pathname was discovered thru
 	hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
 
+-F=[all|min|maj]::
+--pf=[all|min|maj]::
+	Trace pagefaults. Optionally, you can specify whether you want minor,
+	major or all pagefaults. Default value is maj.
+
+PAGEFAULTS
+----------
+
+When tracing pagefaults, the format of the trace is as follows:
+<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>).
+
+- min/maj indicates whether fault event is minor or major;
+- ip.symbol shows symbol for instruction pointer (the code that generated the
+  fault); if no debug symbols available, perf trace will print raw IP;
+- addr.dso shows DSO for the faulted address;
+- map type is either 'd' for non-executable maps or 'x' for executable maps;
+- addr level is either 'k' for kernel dso or '.' for user dso.
+
+For symbols resolution you may need to install debugging symbols.
+Please be aware that duration is currently always 0 and doesn't reflect actual
+time it took for fault to be handled!
+
+When --verbose specified, perf trace tries to print all available information
+for both IP and fault address in the form of dso@symbol+offset.
+
+EXAMPLES
+--------
+
+Trace syscalls, major and minor pagefaults:
+
+ $ perf trace -F all
+
+  1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.)
+
+  As you can see, there was major pagefault in python process, from
+  CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a9b542918da0..9e6f0eb4063f 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1177,6 +1177,9 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
 	return NULL;
 }
 
+#define TRACE_PFMAJ		(1 << 0)
+#define TRACE_PFMIN		(1 << 1)
+
 struct trace {
 	struct perf_tool	tool;
 	struct {
@@ -1211,6 +1214,7 @@ struct trace {
 	bool			summary_only;
 	bool			show_comm;
 	bool			show_tool_stats;
+	int			trace_pgfaults;
 };
 
 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
@@ -1773,6 +1777,68 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
 	return 0;
 }
 
+static void print_location(FILE *f, struct perf_sample *sample,
+			   struct addr_location *al,
+			   bool print_dso, bool print_sym)
+{
+
+	if ((verbose || print_dso) && al->map)
+		fprintf(f, "%s@", al->map->dso->long_name);
+
+	if ((verbose || print_sym) && al->sym)
+		fprintf(f, "%s+0x%lx", al->sym->name,
+			al->addr - al->sym->start);
+	else if (al->map)
+		fprintf(f, "0x%lx", al->addr);
+	else
+		fprintf(f, "0x%lx", sample->addr);
+}
+
+static int trace__pgfault(struct trace *trace,
+			  struct perf_evsel *evsel,
+			  union perf_event *event,
+			  struct perf_sample *sample)
+{
+	struct thread *thread;
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	struct addr_location al;
+	char map_type = 'd';
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+	thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
+			      sample->ip, &al);
+
+	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
+
+	fprintf(trace->output, "%sfault [",
+		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+		"maj" : "min");
+
+	print_location(trace->output, sample, &al, false, true);
+
+	fprintf(trace->output, "] => ");
+
+	thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
+				   sample->addr, &al);
+
+	if (!al.map) {
+		thread__find_addr_location(thread, trace->host, cpumode,
+					   MAP__FUNCTION, sample->addr, &al);
+
+		if (al.map)
+			map_type = 'x';
+		else
+			map_type = '?';
+	}
+
+	print_location(trace->output, sample, &al, true, false);
+
+	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+	return 0;
+}
+
 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 {
 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
@@ -1887,6 +1953,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
 	perf_evlist__add(evlist, evsel);
 }
 
+static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
+				    u64 config)
+{
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.mmap_data = 1,
+		.sample_period = 1,
+	};
+
+	attr.config = config;
+
+	event_attr_init(&attr);
+
+	evsel = perf_evsel__new(&attr);
+	if (!evsel)
+		return -ENOMEM;
+
+	evsel->handler = trace__pgfault;
+	perf_evlist__add(evlist, evsel);
+
+	return 0;
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = perf_evlist__new();
@@ -1907,6 +1997,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
 	perf_evlist__add_vfs_getname(evlist);
 
+	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
+	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+		goto out_error_tp;
+
+	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
+	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
+		goto out_error_tp;
+
 	if (trace->sched &&
 		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
 				trace__sched_stat_runtime))
@@ -1987,7 +2085,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 				goto next_event;
 			}
 
-			if (sample.raw_data == NULL) {
+			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+			    sample.raw_data == NULL) {
 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
 				       perf_evsel__name(evsel), sample.tid,
 				       sample.cpu, sample.raw_size);
@@ -2269,6 +2368,23 @@ static int trace__open_output(struct trace *trace, const char *filename)
 	return trace->output == NULL ? -errno : 0;
 }
 
+static int parse_pagefaults(const struct option *opt, const char *str,
+			    int unset __maybe_unused)
+{
+	int *trace_pgfaults = opt->value;
+
+	if (strcmp(str, "all") == 0)
+		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
+	else if (strcmp(str, "maj") == 0)
+		*trace_pgfaults |= TRACE_PFMAJ;
+	else if (strcmp(str, "min") == 0)
+		*trace_pgfaults |= TRACE_PFMIN;
+	else
+		return -1;
+
+	return 0;
+}
+
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const trace_usage[] = {
@@ -2335,6 +2451,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Show only syscall summary with statistics"),
 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
 		    "Show all syscalls and summary with statistics"),
+	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
+		     "Trace pagefaults", parse_pagefaults, "maj"),
 	OPT_END()
 	};
 	int err;
@@ -2349,6 +2467,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (trace.summary_only)
 		trace.summary = trace.summary_only;
 
+	if (trace.trace_pgfaults) {
+		trace.opts.sample_address = true;
+		trace.opts.sample_time = true;
+	}
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/5] perf trace: add pagefaults record and replay support
  2014-06-26 16:14 [PATCH v3 0/5] perf trace pagefaults Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 1/5] perf trace: add perf_event parameter to tracepoint_handler Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 2/5] perf trace: add support for pagefault tracing Stanislav Fomichev
@ 2014-06-26 16:14 ` Stanislav Fomichev
  2014-07-05 10:41   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 4/5] perf trace: add pagefault statistics Stanislav Fomichev
  2014-06-26 16:14 ` [PATCH 5/5] perf trace: add possibility to switch off syscall events Stanislav Fomichev
  4 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-26 16:14 UTC (permalink / raw)
  To: a.p.zijlstra, paulus, mingo, acme, dsahern, stfomichev, jolsa
  Cc: linux-kernel

Previous commit added live pagefault trace support, this one adds
record and replay support.

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
 tools/perf/builtin-trace.c | 63 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 18 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 9e6f0eb4063f..38f3dedcbb4a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1897,7 +1897,7 @@ static int parse_target_str(struct trace *trace)
 	return 0;
 }
 
-static int trace__record(int argc, const char **argv)
+static int trace__record(struct trace *trace, int argc, const char **argv)
 {
 	unsigned int rec_argc, i, j;
 	const char **rec_argv;
@@ -1906,34 +1906,52 @@ static int trace__record(int argc, const char **argv)
 		"-R",
 		"-m", "1024",
 		"-c", "1",
-		"-e",
 	};
 
+	const char * const sc_args[] = { "-e", };
+	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
+	const char * const majpf_args[] = { "-e", "major-faults" };
+	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
+	const char * const minpf_args[] = { "-e", "minor-faults" };
+	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
+
 	/* +1 is for the event string below */
-	rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
+	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
+		majpf_args_nr + minpf_args_nr + argc;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
 	if (rec_argv == NULL)
 		return -ENOMEM;
 
+	j = 0;
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
-		rec_argv[i] = record_args[i];
+		rec_argv[j++] = record_args[i];
+
+	for (i = 0; i < sc_args_nr; i++)
+		rec_argv[j++] = sc_args[i];
 
 	/* event string may be different for older kernels - e.g., RHEL6 */
 	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
-		rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+		rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
 	else if (is_valid_tracepoint("syscalls:sys_enter"))
-		rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
+		rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
 	else {
 		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
 		return -1;
 	}
-	i++;
 
-	for (j = 0; j < (unsigned int)argc; j++, i++)
-		rec_argv[i] = argv[j];
+	if (trace->trace_pgfaults & TRACE_PFMAJ)
+		for (i = 0; i < majpf_args_nr; i++)
+			rec_argv[j++] = majpf_args[i];
+
+	if (trace->trace_pgfaults & TRACE_PFMIN)
+		for (i = 0; i < minpf_args_nr; i++)
+			rec_argv[j++] = minpf_args[i];
+
+	for (i = 0; i < (unsigned int)argc; i++)
+		rec_argv[j++] = argv[i];
 
-	return cmd_record(i, rec_argv, NULL);
+	return cmd_record(j, rec_argv, NULL);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
@@ -2224,6 +2242,14 @@ static int trace__replay(struct trace *trace)
 		goto out;
 	}
 
+	evlist__for_each(session->evlist, evsel) {
+		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
+		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
+			evsel->handler = trace__pgfault;
+	}
+
 	err = parse_target_str(trace);
 	if (err != 0)
 		goto out;
@@ -2458,20 +2484,21 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	int err;
 	char bf[BUFSIZ];
 
-	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
-		return trace__record(argc-2, &argv[2]);
-
-	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
-
-	/* summary_only implies summary option, but don't overwrite summary if set */
-	if (trace.summary_only)
-		trace.summary = trace.summary_only;
+	argc = parse_options(argc, argv, trace_options, trace_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	if (trace.trace_pgfaults) {
 		trace.opts.sample_address = true;
 		trace.opts.sample_time = true;
 	}
 
+	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
+		return trace__record(&trace, argc-1, &argv[1]);
+
+	/* summary_only implies summary option, but don't overwrite summary if set */
+	if (trace.summary_only)
+		trace.summary = trace.summary_only;
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/5] perf trace: add pagefault statistics
  2014-06-26 16:14 [PATCH v3 0/5] perf trace pagefaults Stanislav Fomichev
                   ` (2 preceding siblings ...)
  2014-06-26 16:14 ` [PATCH 3/5] perf trace: add pagefaults record and replay support Stanislav Fomichev
@ 2014-06-26 16:14 ` Stanislav Fomichev
  2014-06-26 19:41   ` Arnaldo Carvalho de Melo
  2014-06-26 16:14 ` [PATCH 5/5] perf trace: add possibility to switch off syscall events Stanislav Fomichev
  4 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-26 16:14 UTC (permalink / raw)
  To: a.p.zijlstra, paulus, mingo, acme, dsahern, stfomichev, jolsa
  Cc: linux-kernel

'perf trace' can show summary of events using -S option. This commit
also reports number of major/minor pagefault events in this summary.

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
 tools/perf/builtin-trace.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 38f3dedcbb4a..73d1266e6303 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1132,6 +1132,7 @@ struct thread_trace {
 	u64		  exit_time;
 	bool		  entry_pending;
 	unsigned long	  nr_events;
+	unsigned long	  pfmaj, pfmin;
 	char		  *entry_str;
 	double		  runtime_ms;
 	struct {
@@ -1803,8 +1804,17 @@ static int trace__pgfault(struct trace *trace,
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct addr_location al;
 	char map_type = 'd';
+	struct thread_trace *ttrace;
 
 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		return -1;
+
+	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+		ttrace->pfmaj++;
+	else
+		ttrace->pfmin++;
 
 	thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
@@ -2347,6 +2357,7 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv)
 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
 	printed += fprintf(fp, "%.1f%%", ratio);
+	printed += fprintf(fp, ", %lu:%lu faults", ttrace->pfmaj, ttrace->pfmin);
 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
 	printed += thread__dump_stats(ttrace, trace, fp);
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/5] perf trace: add possibility to switch off syscall events
  2014-06-26 16:14 [PATCH v3 0/5] perf trace pagefaults Stanislav Fomichev
                   ` (3 preceding siblings ...)
  2014-06-26 16:14 ` [PATCH 4/5] perf trace: add pagefault statistics Stanislav Fomichev
@ 2014-06-26 16:14 ` Stanislav Fomichev
  2014-07-05 10:41   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
  4 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-26 16:14 UTC (permalink / raw)
  To: a.p.zijlstra, paulus, mingo, acme, dsahern, stfomichev, jolsa
  Cc: linux-kernel

Currently, we may either trace syscalls or syscalls+pagefaults. We'd
like to be able to trace *only* pagefaults and this commit implements
this feature.

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
 tools/perf/Documentation/perf-trace.txt |  7 ++++
 tools/perf/builtin-trace.c              | 58 ++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 2d7e041122dc..42ee3361ee43 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -112,6 +112,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 	Trace pagefaults. Optionally, you can specify whether you want minor,
 	major or all pagefaults. Default value is maj.
 
+--syscalls::
+	Trace system calls. This options is enabled by default.
+
 PAGEFAULTS
 ----------
 
@@ -135,6 +138,10 @@ for both IP and fault address in the form of dso@symbol+offset.
 EXAMPLES
 --------
 
+Trace only major pagefaults:
+
+ $ perf trace --no-syscalls -F
+
 Trace syscalls, major and minor pagefaults:
 
  $ perf trace -F all
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 73d1266e6303..fb620ddc2230 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1215,6 +1215,7 @@ struct trace {
 	bool			summary_only;
 	bool			show_comm;
 	bool			show_tool_stats;
+	bool			trace_syscalls;
 	int			trace_pgfaults;
 };
 
@@ -1937,17 +1938,19 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[j++] = record_args[i];
 
-	for (i = 0; i < sc_args_nr; i++)
-		rec_argv[j++] = sc_args[i];
-
-	/* event string may be different for older kernels - e.g., RHEL6 */
-	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
-		rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
-	else if (is_valid_tracepoint("syscalls:sys_enter"))
-		rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
-	else {
-		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
-		return -1;
+	if (trace->trace_syscalls) {
+		for (i = 0; i < sc_args_nr; i++)
+			rec_argv[j++] = sc_args[i];
+
+		/* event string may be different for older kernels - e.g., RHEL6 */
+		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
+			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+		else if (is_valid_tracepoint("syscalls:sys_enter"))
+			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
+		else {
+			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
+			return -1;
+		}
 	}
 
 	if (trace->trace_pgfaults & TRACE_PFMAJ)
@@ -2020,10 +2023,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		goto out;
 	}
 
-	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
+	if (trace->trace_syscalls &&
+	    perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
+					   trace__sys_exit))
 		goto out_error_tp;
 
-	perf_evlist__add_vfs_getname(evlist);
+	if (trace->trace_syscalls)
+		perf_evlist__add_vfs_getname(evlist);
 
 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
@@ -2225,13 +2231,10 @@ static int trace__replay(struct trace *trace)
 	if (evsel == NULL)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_enter");
-	if (evsel == NULL) {
-		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
-		goto out;
-	}
 
-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
-	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
 		goto out;
 	}
@@ -2241,13 +2244,9 @@ static int trace__replay(struct trace *trace)
 	if (evsel == NULL)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_exit");
-	if (evsel == NULL) {
-		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
-		goto out;
-	}
-
-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
-	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
 		goto out;
 	}
@@ -2451,6 +2450,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		},
 		.output = stdout,
 		.show_comm = true,
+		.trace_syscalls = true,
 	};
 	const char *output_name = NULL;
 	const char *ev_qualifier_str = NULL;
@@ -2490,6 +2490,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Show all syscalls and summary with statistics"),
 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
 		     "Trace pagefaults", parse_pagefaults, "maj"),
+	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
 	OPT_END()
 	};
 	int err;
@@ -2510,6 +2511,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (trace.summary_only)
 		trace.summary = trace.summary_only;
 
+	if (!trace.trace_syscalls && !trace.trace_pgfaults) {
+		pr_err("Please specify something to trace.\n");
+		return -1;
+	}
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] perf trace: add pagefault statistics
  2014-06-26 16:14 ` [PATCH 4/5] perf trace: add pagefault statistics Stanislav Fomichev
@ 2014-06-26 19:41   ` Arnaldo Carvalho de Melo
  2014-06-27  8:43     ` Stanislav Fomichev
  0 siblings, 1 reply; 16+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-06-26 19:41 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: a.p.zijlstra, paulus, mingo, dsahern, jolsa, linux-kernel

Em Thu, Jun 26, 2014 at 08:14:27PM +0400, Stanislav Fomichev escreveu:
> 'perf trace' can show summary of events using -S option. This commit
> also reports number of major/minor pagefault events in this summary.
> 
> Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>

Where is it?

[root@zoo /]# perf trace -S -F all -- usleep 1 | tail -30
Failed to open /etc/ld.so.cache, continuing without symbols
     1.241 ( 0.001 ms): brk(                                                                  ) = 0x24c9000
     1.245 ( 0.003 ms): brk(brk: 0x24ea000                                                    ) = 0x24ea000
     1.248 ( 0.001 ms): brk(                                                                  ) = 0x24ea000
     1.253 ( 0.000 ms): minfault [_int_malloc+0x116a] => [heap]@0x24c9008 (d.)
     1.268 ( 0.000 ms): minfault [__GI_____strtoull_l_internal+0x47] => /usr/lib64/libc-2.18.so@0x164ae3 (x.)
     1.273 ( 0.000 ms): minfault [__GI___libc_nanosleep+0x0] => /usr/lib64/libc-2.18.so@0xbc960 (x.)
     1.336 ( 0.062 ms): nanosleep(rqtp: 0x7fff30ccf640                                        ) = 0
     1.344 ( 0.000 ms): exit_group(                                                           

 Summary of events:

 usleep (19155), 151 events, 84.4%, 0:68 faults, 0.000 msec

   syscall            calls      min       avg       max      stddev
                               (msec)    (msec)    (msec)        (%)
   --------------- -------- --------- --------- ---------     ------
   read                   3     0.000     0.002     0.003     50.38%
   open                   3     0.006     0.007     0.008      8.51%
   close                  3     0.001     0.001     0.002      6.05%
   fstat                  3     0.001     0.002     0.002      7.70%
   mmap                   9     0.003     0.005     0.006      9.56%
   mprotect               6     0.004     0.007     0.010     14.12%
   munmap                 1     0.013     0.013     0.013      0.00%
   brk                    4     0.001     0.002     0.003     28.94%
   access                 1     0.007     0.007     0.007      0.00%
   nanosleep              1     0.062     0.062     0.062      0.00%
   execve                 7     0.000     0.086     0.574     94.41%
   arch_prctl             1     0.002     0.002     0.002      0.00%


[root@zoo /]# 

-----------------------------------------

There were minor page faults, were in the summary are they?

I haven't even looked at the code, first I wanted to check if just doing what
is expected would work, i.e. enable all the page fault types, run a workload,
ask for -S and check if there were any stats for major and minor page faults.

I can update the changelog comment with an example if you tell me what I am
doing wrong.

Applied and tested the previous patches.

- Arnaldo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] perf trace: add pagefault statistics
  2014-06-26 19:41   ` Arnaldo Carvalho de Melo
@ 2014-06-27  8:43     ` Stanislav Fomichev
  2014-06-27 13:53       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-27  8:43 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: a.p.zijlstra, paulus, mingo, dsahern, jolsa, linux-kernel

> Where is it?
>  usleep (19155), 151 events, 84.4%, 0:68 faults, 0.000 msec
                                      ^^^^^^^^^^^
				      maj:min

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] perf trace: add pagefault statistics
  2014-06-27  8:43     ` Stanislav Fomichev
@ 2014-06-27 13:53       ` Arnaldo Carvalho de Melo
  2014-06-27 15:44         ` Stanislav Fomichev
  0 siblings, 1 reply; 16+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-06-27 13:53 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: a.p.zijlstra, paulus, mingo, dsahern, jolsa, linux-kernel

Em Fri, Jun 27, 2014 at 12:43:07PM +0400, Stanislav Fomichev escreveu:
> > Where is it?
> >  usleep (19155), 151 events, 84.4%, 0:68 faults, 0.000 msec
>                                       ^^^^^^^^^^^
> 				      maj:min

Ok, thanks for the explanation, I'll add that to the commit log and
check if the documentation mentions that in the statistics section.

- Arnaldo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] perf trace: add pagefault statistics
  2014-06-27 13:53       ` Arnaldo Carvalho de Melo
@ 2014-06-27 15:44         ` Stanislav Fomichev
  2014-06-27 21:16           ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2014-06-27 15:44 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: a.p.zijlstra, paulus, mingo, dsahern, jolsa, linux-kernel

On Fri, Jun 27, 2014 at 10:53:30AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, Jun 27, 2014 at 12:43:07PM +0400, Stanislav Fomichev escreveu:
> > > Where is it?
> > >  usleep (19155), 151 events, 84.4%, 0:68 faults, 0.000 msec
> >                                       ^^^^^^^^^^^
> > 				      maj:min
> 
> Ok, thanks for the explanation, I'll add that to the commit log and
> check if the documentation mentions that in the statistics section.
Currently, there is no documentation for statistics.
Maybe it makes sense to split this cryptic message into
"0 majfaults, 68 minfauls" and then we don't even need to have
documentation at all?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] perf trace: add pagefault statistics
  2014-06-27 15:44         ` Stanislav Fomichev
@ 2014-06-27 21:16           ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 16+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-06-27 21:16 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: a.p.zijlstra, paulus, mingo, dsahern, jolsa, linux-kernel

Em Fri, Jun 27, 2014 at 07:44:17PM +0400, Stanislav Fomichev escreveu:
> On Fri, Jun 27, 2014 at 10:53:30AM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Fri, Jun 27, 2014 at 12:43:07PM +0400, Stanislav Fomichev escreveu:
> > > > Where is it?
> > > >  usleep (19155), 151 events, 84.4%, 0:68 faults, 0.000 msec
> > >                                       ^^^^^^^^^^^
> > > 				      maj:min

> > Ok, thanks for the explanation, I'll add that to the commit log and
> > check if the documentation mentions that in the statistics section.

> Currently, there is no documentation for statistics.
> Maybe it makes sense to split this cryptic message into
> "0 majfaults, 68 minfauls" and then we don't even need to have
> documentation at all?

I guess this is a good step, i.e. to add it in the same way as existing
events, the syscalls.

At some point we should add docs about the statistics.

- Arnaldo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [tip:perf/core] perf trace: Add perf_event parameter to tracepoint_handler
  2014-06-26 16:14 ` [PATCH 1/5] perf trace: add perf_event parameter to tracepoint_handler Stanislav Fomichev
@ 2014-07-05 10:40   ` tip-bot for Stanislav Fomichev
  0 siblings, 0 replies; 16+ messages in thread
From: tip-bot for Stanislav Fomichev @ 2014-07-05 10:40 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, mingo, hpa, mingo, a.p.zijlstra,
	jolsa, stfomichev, dsahern, tglx

Commit-ID:  0c82adcf141935b6312593a53f87342dbb12b704
Gitweb:     http://git.kernel.org/tip/0c82adcf141935b6312593a53f87342dbb12b704
Author:     Stanislav Fomichev <stfomichev@yandex-team.ru>
AuthorDate: Thu, 26 Jun 2014 20:14:24 +0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Thu, 26 Jun 2014 16:00:45 -0300

perf trace: Add perf_event parameter to tracepoint_handler

It will be used by next pagefault tracing patches in the series.

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1403799268-1367-2-git-send-email-stfomichev@yandex-team.ru
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5549cee..4a9e26b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1538,6 +1538,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 }
 
 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
+				  union perf_event *event,
 				  struct perf_sample *sample);
 
 static struct syscall *trace__syscall_info(struct trace *trace,
@@ -1610,6 +1611,7 @@ static void thread__update_stats(struct thread_trace *ttrace,
 }
 
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
+			    union perf_event *event __maybe_unused,
 			    struct perf_sample *sample)
 {
 	char *msg;
@@ -1658,6 +1660,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 }
 
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
+			   union perf_event *event __maybe_unused,
 			   struct perf_sample *sample)
 {
 	int ret;
@@ -1735,6 +1738,7 @@ out:
 }
 
 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
+			      union perf_event *event __maybe_unused,
 			      struct perf_sample *sample)
 {
 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
@@ -1742,6 +1746,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
 }
 
 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
+				     union perf_event *event __maybe_unused,
 				     struct perf_sample *sample)
 {
         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
@@ -1781,7 +1786,7 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 }
 
 static int trace__process_sample(struct perf_tool *tool,
-				 union perf_event *event __maybe_unused,
+				 union perf_event *event,
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel,
 				 struct machine *machine __maybe_unused)
@@ -1799,7 +1804,7 @@ static int trace__process_sample(struct perf_tool *tool,
 
 	if (handler) {
 		++trace->nr_events;
-		handler(trace, evsel, sample);
+		handler(trace, evsel, event, sample);
 	}
 
 	return err;
@@ -1990,7 +1995,7 @@ again:
 			}
 
 			handler = evsel->handler;
-			handler(trace, evsel, &sample);
+			handler(trace, evsel, event, &sample);
 next_event:
 			perf_evlist__mmap_consume(evlist, i);
 

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [tip:perf/core] perf trace: Add support for pagefault tracing
  2014-06-26 16:14 ` [PATCH 2/5] perf trace: add support for pagefault tracing Stanislav Fomichev
@ 2014-07-05 10:41   ` tip-bot for Stanislav Fomichev
  0 siblings, 0 replies; 16+ messages in thread
From: tip-bot for Stanislav Fomichev @ 2014-07-05 10:41 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, mingo, hpa, mingo, a.p.zijlstra,
	jolsa, stfomichev, dsahern, tglx

Commit-ID:  598d02c5a07b60e5c824184cdaf697b70f3c452a
Gitweb:     http://git.kernel.org/tip/598d02c5a07b60e5c824184cdaf697b70f3c452a
Author:     Stanislav Fomichev <stfomichev@yandex-team.ru>
AuthorDate: Thu, 26 Jun 2014 20:14:25 +0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Thu, 26 Jun 2014 16:07:43 -0300

perf trace: Add support for pagefault tracing

This patch adds optional pagefault tracing support to 'perf trace'.

Using -F/--pf option user can specify whether he wants minor, major or
all pagefault events to be traced. This patch adds only live mode,
record and replace will come in a separate patch.

Example output:

  1756272.905 ( 0.000 ms): curl/5937 majfault [0x7fa7261978b6] => /usr/lib/x86_64-linux-gnu/libkrb5.so.26.0.0@0x85288 (d.)
  1862866.036 ( 0.000 ms): wget/8460 majfault [__clear_user+0x3f] => 0x659cb4 (?k)

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1403799268-1367-3-git-send-email-stfomichev@yandex-team.ru
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt |  39 ++++++++++
 tools/perf/builtin-trace.c              | 125 +++++++++++++++++++++++++++++++-
 2 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index fae38d9..72397d9 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -107,6 +107,45 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 	Show tool stats such as number of times fd->pathname was discovered thru
 	hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
 
+-F=[all|min|maj]::
+--pf=[all|min|maj]::
+	Trace pagefaults. Optionally, you can specify whether you want minor,
+	major or all pagefaults. Default value is maj.
+
+PAGEFAULTS
+----------
+
+When tracing pagefaults, the format of the trace is as follows:
+
+<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>).
+
+- min/maj indicates whether fault event is minor or major;
+- ip.symbol shows symbol for instruction pointer (the code that generated the
+  fault); if no debug symbols available, perf trace will print raw IP;
+- addr.dso shows DSO for the faulted address;
+- map type is either 'd' for non-executable maps or 'x' for executable maps;
+- addr level is either 'k' for kernel dso or '.' for user dso.
+
+For symbols resolution you may need to install debugging symbols.
+
+Please be aware that duration is currently always 0 and doesn't reflect actual
+time it took for fault to be handled!
+
+When --verbose specified, perf trace tries to print all available information
+for both IP and fault address in the form of dso@symbol+offset.
+
+EXAMPLES
+--------
+
+Trace syscalls, major and minor pagefaults:
+
+ $ perf trace -F all
+
+  1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.)
+
+  As you can see, there was major pagefault in python process, from
+  CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4a9e26b..1985c3b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1178,6 +1178,9 @@ fail:
 	return NULL;
 }
 
+#define TRACE_PFMAJ		(1 << 0)
+#define TRACE_PFMIN		(1 << 1)
+
 struct trace {
 	struct perf_tool	tool;
 	struct {
@@ -1212,6 +1215,7 @@ struct trace {
 	bool			summary_only;
 	bool			show_comm;
 	bool			show_tool_stats;
+	int			trace_pgfaults;
 };
 
 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
@@ -1773,6 +1777,68 @@ out_dump:
 	return 0;
 }
 
+static void print_location(FILE *f, struct perf_sample *sample,
+			   struct addr_location *al,
+			   bool print_dso, bool print_sym)
+{
+
+	if ((verbose || print_dso) && al->map)
+		fprintf(f, "%s@", al->map->dso->long_name);
+
+	if ((verbose || print_sym) && al->sym)
+		fprintf(f, "%s+0x%lx", al->sym->name,
+			al->addr - al->sym->start);
+	else if (al->map)
+		fprintf(f, "0x%lx", al->addr);
+	else
+		fprintf(f, "0x%lx", sample->addr);
+}
+
+static int trace__pgfault(struct trace *trace,
+			  struct perf_evsel *evsel,
+			  union perf_event *event,
+			  struct perf_sample *sample)
+{
+	struct thread *thread;
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	struct addr_location al;
+	char map_type = 'd';
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+	thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
+			      sample->ip, &al);
+
+	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
+
+	fprintf(trace->output, "%sfault [",
+		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+		"maj" : "min");
+
+	print_location(trace->output, sample, &al, false, true);
+
+	fprintf(trace->output, "] => ");
+
+	thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
+				   sample->addr, &al);
+
+	if (!al.map) {
+		thread__find_addr_location(thread, trace->host, cpumode,
+					   MAP__FUNCTION, sample->addr, &al);
+
+		if (al.map)
+			map_type = 'x';
+		else
+			map_type = '?';
+	}
+
+	print_location(trace->output, sample, &al, true, false);
+
+	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+	return 0;
+}
+
 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 {
 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
@@ -1887,6 +1953,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
 	perf_evlist__add(evlist, evsel);
 }
 
+static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
+				    u64 config)
+{
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.mmap_data = 1,
+		.sample_period = 1,
+	};
+
+	attr.config = config;
+
+	event_attr_init(&attr);
+
+	evsel = perf_evsel__new(&attr);
+	if (!evsel)
+		return -ENOMEM;
+
+	evsel->handler = trace__pgfault;
+	perf_evlist__add(evlist, evsel);
+
+	return 0;
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = perf_evlist__new();
@@ -1907,6 +1997,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
 	perf_evlist__add_vfs_getname(evlist);
 
+	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
+	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+		goto out_error_tp;
+
+	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
+	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
+		goto out_error_tp;
+
 	if (trace->sched &&
 		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
 				trace__sched_stat_runtime))
@@ -1987,7 +2085,8 @@ again:
 				goto next_event;
 			}
 
-			if (sample.raw_data == NULL) {
+			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+			    sample.raw_data == NULL) {
 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
 				       perf_evsel__name(evsel), sample.tid,
 				       sample.cpu, sample.raw_size);
@@ -2269,6 +2368,23 @@ static int trace__open_output(struct trace *trace, const char *filename)
 	return trace->output == NULL ? -errno : 0;
 }
 
+static int parse_pagefaults(const struct option *opt, const char *str,
+			    int unset __maybe_unused)
+{
+	int *trace_pgfaults = opt->value;
+
+	if (strcmp(str, "all") == 0)
+		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
+	else if (strcmp(str, "maj") == 0)
+		*trace_pgfaults |= TRACE_PFMAJ;
+	else if (strcmp(str, "min") == 0)
+		*trace_pgfaults |= TRACE_PFMIN;
+	else
+		return -1;
+
+	return 0;
+}
+
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const trace_usage[] = {
@@ -2335,6 +2451,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Show only syscall summary with statistics"),
 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
 		    "Show all syscalls and summary with statistics"),
+	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
+		     "Trace pagefaults", parse_pagefaults, "maj"),
 	OPT_END()
 	};
 	int err;
@@ -2349,6 +2467,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (trace.summary_only)
 		trace.summary = trace.summary_only;
 
+	if (trace.trace_pgfaults) {
+		trace.opts.sample_address = true;
+		trace.opts.sample_time = true;
+	}
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [tip:perf/core] perf trace: Add pagefaults record and replay support
  2014-06-26 16:14 ` [PATCH 3/5] perf trace: add pagefaults record and replay support Stanislav Fomichev
@ 2014-07-05 10:41   ` tip-bot for Stanislav Fomichev
  0 siblings, 0 replies; 16+ messages in thread
From: tip-bot for Stanislav Fomichev @ 2014-07-05 10:41 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, mingo, hpa, mingo, a.p.zijlstra,
	jolsa, stfomichev, dsahern, tglx

Commit-ID:  1e28fe0a4ff8680d5a0fb84995fd2444dac19cc4
Gitweb:     http://git.kernel.org/tip/1e28fe0a4ff8680d5a0fb84995fd2444dac19cc4
Author:     Stanislav Fomichev <stfomichev@yandex-team.ru>
AuthorDate: Thu, 26 Jun 2014 20:14:26 +0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Thu, 26 Jun 2014 17:43:24 -0300

perf trace: Add pagefaults record and replay support

Previous commit added live pagefault trace support, this one adds record
and replay support.

Example:

  [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace -F all record -a sleep 10
  [ perf record: Woken up 0 times to write data ]
  [ perf record: Captured and wrote 1029.722 MB perf.data (~44989242 samples) ]

  [root@zoo /]# ls -la perf.data
  -rw-------. 1 root root 1083921722 Jun 26 17:44 perf.data

  [root@zoo /]# perf evlist
  raw_syscalls:sys_enter
  raw_syscalls:sys_exit
  major-faults
  minor-faults

  [root@zoo /]# trace -i perf.data | grep -v trace\/ | tail -15
     156.137 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.)
     156.139 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0x123] => 0x0 (?.)
     156.140 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0xc4] => 0x0 (?.)
     156.144 ( 0.000 ms): perl/18476 minfault [_int_free+0xda] => 0x0 (?.)
     156.151 ( 0.000 ms): perl/18476 minfault [_int_free+0x1df] => 0x0 (?.)
     156.158 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.)
     156.161 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.)
     156.168 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.)
     156.172 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.)
     156.173 ( 0.000 ms): perl/18476 minfault [_int_free+0xda] => 0x0 (?.)
     156.183 ( 0.000 ms): perl/18476 minfault [Perl_hfree_next_entry+0xb4] => 0x0 (?.)
     156.197 ( 0.000 ms): perl/18476 minfault [_int_free+0x1df] => 0x0 (?.)
     156.216 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0x123] => 0x0 (?.)
     156.221 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0x123] => 0x0 (?.)
  [root@zoo /]#

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1403799268-1367-4-git-send-email-stfomichev@yandex-team.ru
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 63 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 18 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1985c3b..0b58e24 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1897,7 +1897,7 @@ static int parse_target_str(struct trace *trace)
 	return 0;
 }
 
-static int trace__record(int argc, const char **argv)
+static int trace__record(struct trace *trace, int argc, const char **argv)
 {
 	unsigned int rec_argc, i, j;
 	const char **rec_argv;
@@ -1906,34 +1906,52 @@ static int trace__record(int argc, const char **argv)
 		"-R",
 		"-m", "1024",
 		"-c", "1",
-		"-e",
 	};
 
+	const char * const sc_args[] = { "-e", };
+	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
+	const char * const majpf_args[] = { "-e", "major-faults" };
+	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
+	const char * const minpf_args[] = { "-e", "minor-faults" };
+	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
+
 	/* +1 is for the event string below */
-	rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
+	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
+		majpf_args_nr + minpf_args_nr + argc;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
 	if (rec_argv == NULL)
 		return -ENOMEM;
 
+	j = 0;
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
-		rec_argv[i] = record_args[i];
+		rec_argv[j++] = record_args[i];
+
+	for (i = 0; i < sc_args_nr; i++)
+		rec_argv[j++] = sc_args[i];
 
 	/* event string may be different for older kernels - e.g., RHEL6 */
 	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
-		rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+		rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
 	else if (is_valid_tracepoint("syscalls:sys_enter"))
-		rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
+		rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
 	else {
 		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
 		return -1;
 	}
-	i++;
 
-	for (j = 0; j < (unsigned int)argc; j++, i++)
-		rec_argv[i] = argv[j];
+	if (trace->trace_pgfaults & TRACE_PFMAJ)
+		for (i = 0; i < majpf_args_nr; i++)
+			rec_argv[j++] = majpf_args[i];
+
+	if (trace->trace_pgfaults & TRACE_PFMIN)
+		for (i = 0; i < minpf_args_nr; i++)
+			rec_argv[j++] = minpf_args[i];
+
+	for (i = 0; i < (unsigned int)argc; i++)
+		rec_argv[j++] = argv[i];
 
-	return cmd_record(i, rec_argv, NULL);
+	return cmd_record(j, rec_argv, NULL);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
@@ -2224,6 +2242,14 @@ static int trace__replay(struct trace *trace)
 		goto out;
 	}
 
+	evlist__for_each(session->evlist, evsel) {
+		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
+		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
+			evsel->handler = trace__pgfault;
+	}
+
 	err = parse_target_str(trace);
 	if (err != 0)
 		goto out;
@@ -2458,20 +2484,21 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	int err;
 	char bf[BUFSIZ];
 
-	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
-		return trace__record(argc-2, &argv[2]);
-
-	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
-
-	/* summary_only implies summary option, but don't overwrite summary if set */
-	if (trace.summary_only)
-		trace.summary = trace.summary_only;
+	argc = parse_options(argc, argv, trace_options, trace_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	if (trace.trace_pgfaults) {
 		trace.opts.sample_address = true;
 		trace.opts.sample_time = true;
 	}
 
+	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
+		return trace__record(&trace, argc-1, &argv[1]);
+
+	/* summary_only implies summary option, but don't overwrite summary if set */
+	if (trace.summary_only)
+		trace.summary = trace.summary_only;
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [tip:perf/core] perf trace: Add possibility to switch off syscall events
  2014-06-26 16:14 ` [PATCH 5/5] perf trace: add possibility to switch off syscall events Stanislav Fomichev
@ 2014-07-05 10:41   ` tip-bot for Stanislav Fomichev
  0 siblings, 0 replies; 16+ messages in thread
From: tip-bot for Stanislav Fomichev @ 2014-07-05 10:41 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, mingo, hpa, mingo, a.p.zijlstra,
	jolsa, stfomichev, dsahern, tglx

Commit-ID:  e281a9606d7073c517f2571e83faaff029ddc1cf
Gitweb:     http://git.kernel.org/tip/e281a9606d7073c517f2571e83faaff029ddc1cf
Author:     Stanislav Fomichev <stfomichev@yandex-team.ru>
AuthorDate: Thu, 26 Jun 2014 20:14:28 +0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Thu, 26 Jun 2014 17:48:07 -0300

perf trace: Add possibility to switch off syscall events

Currently, we may either trace syscalls or syscalls+pagefaults.

We'd like to be able to trace *only* pagefaults and this commit
implements this feature.

Example:

  [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace --no-syscalls -F -p `pidof xchat`
       0.000 ( 0.000 ms): xchat/4574 majfault [g_unichar_get_script+0x11] => /usr/lib64/libglib-2.0.so.0.3800.2@0xc403b (x.)
       0.202 ( 0.000 ms): xchat/4574 majfault [_cairo_hash_table_lookup+0x53] => 0x2280ff0 (?.)
      20.854 ( 0.000 ms): xchat/4574 majfault [gdk_cairo_set_source_pixbuf+0x110] => /usr/bin/xchat@0x6da1f (x.)
    1022.000 ( 0.000 ms): xchat/4574 majfault [__memcpy_sse2_unaligned+0x29] => 0x7ff5a8ca0400 (?.)
  ^C[root@zoo /]#

Below we can see malloc calls, 'trace' reading symbol tables in libraries to
resolve symbols, etc.

  [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace --no-syscalls -F all --cpu 1 sleep 10
       0.000 ( 0.000 ms): chrome/26589 minfault [0x1b53129] => /tmp/perf-26589.map@0x33cbcbf7f000 (x.)
      96.477 ( 0.000 ms): libvirtd/947 minfault [copy_user_enhanced_fast_string+0x5] => 0x7f7685bba000 (?k)
     113.164 ( 0.000 ms): Xorg/1063 minfault [0x786da] => 0x7fce52882a3c (?.)
    7162.801 ( 0.000 ms): chrome/3747 minfault [0x8e1a89] => 0xfcaefed0008 (?.)
<SNIP>
    7773.138 ( 0.000 ms): chrome/3886 minfault [0x8e1a89] => 0xfcb0ce28008 (?.)
    7992.022 ( 0.000 ms): chrome/26574 minfault [0x1b5a708] => 0x3de7b5fc5000 (?.)
    8108.949 ( 0.000 ms): qemu-system-x8/4537 majfault [_int_malloc+0xee] => 0x7faffc466d60 (?.)
    8108.975 ( 0.000 ms): qemu-system-x8/4537 minfault [_int_malloc+0x102] => 0x7faffc466d60 (?.)
<SNIP>
    8148.174 ( 0.000 ms): qemu-system-x8/4537 minfault [_int_malloc+0x102] => 0x7faffc4eb500 (?.)
    8270.855 ( 0.000 ms): chrome/26245 minfault [do_bo_emit_reloc+0xdb] => 0x45d092bc004 (?.)
    8270.869 ( 0.000 ms): chrome/26245 minfault [do_bo_emit_reloc+0x108] => 0x45d09150000 (?.)
no symbols found in /usr/lib64/libspice-server.so.1.9.0, maybe install a debug package?
    8273.831 ( 0.000 ms): trace/20198 majfault [__memcmp_sse4_1+0xbc6] => /usr/lib64/libspice-server.so.1.9.0@0xdf000 (d.)
<SNIP>
    8275.121 ( 0.000 ms): trace/20198 minfault [dso__load+0x38] => 0x14fe756 (?.)
no symbols found in /usr/lib64/libelf-0.158.so, maybe install a debug package?
    8275.142 ( 0.000 ms): trace/20198 minfault [__memcmp_sse4_1+0xbc6] => /usr/lib64/libelf-0.158.so@0x0 (d.)
<SNIP>
  [root@zoo /]#

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1403799268-1367-6-git-send-email-stfomichev@yandex-team.ru
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt |  7 ++++
 tools/perf/builtin-trace.c              | 58 ++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 72397d9..02aac83 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -112,6 +112,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 	Trace pagefaults. Optionally, you can specify whether you want minor,
 	major or all pagefaults. Default value is maj.
 
+--syscalls::
+	Trace system calls. This options is enabled by default.
+
 PAGEFAULTS
 ----------
 
@@ -137,6 +140,10 @@ for both IP and fault address in the form of dso@symbol+offset.
 EXAMPLES
 --------
 
+Trace only major pagefaults:
+
+ $ perf trace --no-syscalls -F
+
 Trace syscalls, major and minor pagefaults:
 
  $ perf trace -F all
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0b58e24..dc7a694 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1215,6 +1215,7 @@ struct trace {
 	bool			summary_only;
 	bool			show_comm;
 	bool			show_tool_stats;
+	bool			trace_syscalls;
 	int			trace_pgfaults;
 };
 
@@ -1927,17 +1928,19 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[j++] = record_args[i];
 
-	for (i = 0; i < sc_args_nr; i++)
-		rec_argv[j++] = sc_args[i];
-
-	/* event string may be different for older kernels - e.g., RHEL6 */
-	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
-		rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
-	else if (is_valid_tracepoint("syscalls:sys_enter"))
-		rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
-	else {
-		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
-		return -1;
+	if (trace->trace_syscalls) {
+		for (i = 0; i < sc_args_nr; i++)
+			rec_argv[j++] = sc_args[i];
+
+		/* event string may be different for older kernels - e.g., RHEL6 */
+		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
+			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+		else if (is_valid_tracepoint("syscalls:sys_enter"))
+			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
+		else {
+			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
+			return -1;
+		}
 	}
 
 	if (trace->trace_pgfaults & TRACE_PFMAJ)
@@ -2010,10 +2013,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		goto out;
 	}
 
-	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
+	if (trace->trace_syscalls &&
+	    perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
+					   trace__sys_exit))
 		goto out_error_tp;
 
-	perf_evlist__add_vfs_getname(evlist);
+	if (trace->trace_syscalls)
+		perf_evlist__add_vfs_getname(evlist);
 
 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
@@ -2215,13 +2221,10 @@ static int trace__replay(struct trace *trace)
 	if (evsel == NULL)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_enter");
-	if (evsel == NULL) {
-		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
-		goto out;
-	}
 
-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
-	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
 		goto out;
 	}
@@ -2231,13 +2234,9 @@ static int trace__replay(struct trace *trace)
 	if (evsel == NULL)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_exit");
-	if (evsel == NULL) {
-		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
-		goto out;
-	}
-
-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
-	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
 		goto out;
 	}
@@ -2440,6 +2439,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		},
 		.output = stdout,
 		.show_comm = true,
+		.trace_syscalls = true,
 	};
 	const char *output_name = NULL;
 	const char *ev_qualifier_str = NULL;
@@ -2479,6 +2479,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Show all syscalls and summary with statistics"),
 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
 		     "Trace pagefaults", parse_pagefaults, "maj"),
+	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
 	OPT_END()
 	};
 	int err;
@@ -2499,6 +2500,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (trace.summary_only)
 		trace.summary = trace.summary_only;
 
+	if (!trace.trace_syscalls && !trace.trace_pgfaults) {
+		pr_err("Please specify something to trace.\n");
+		return -1;
+	}
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/5] perf trace: Add possibility to switch off syscall events
  2014-06-26 21:01 [GIT PULL 0/5] perf/core improvements and a fix Arnaldo Carvalho de Melo
@ 2014-06-26 21:01 ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 16+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-06-26 21:01 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Stanislav Fomichev, David Ahern, Ingo Molnar,
	Jiri Olsa, Paul Mackerras, Peter Zijlstra,
	Arnaldo Carvalho de Melo

From: Stanislav Fomichev <stfomichev@yandex-team.ru>

Currently, we may either trace syscalls or syscalls+pagefaults.

We'd like to be able to trace *only* pagefaults and this commit
implements this feature.

Example:

  [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace --no-syscalls -F -p `pidof xchat`
       0.000 ( 0.000 ms): xchat/4574 majfault [g_unichar_get_script+0x11] => /usr/lib64/libglib-2.0.so.0.3800.2@0xc403b (x.)
       0.202 ( 0.000 ms): xchat/4574 majfault [_cairo_hash_table_lookup+0x53] => 0x2280ff0 (?.)
      20.854 ( 0.000 ms): xchat/4574 majfault [gdk_cairo_set_source_pixbuf+0x110] => /usr/bin/xchat@0x6da1f (x.)
    1022.000 ( 0.000 ms): xchat/4574 majfault [__memcpy_sse2_unaligned+0x29] => 0x7ff5a8ca0400 (?.)
  ^C[root@zoo /]#

Below we can see malloc calls, 'trace' reading symbol tables in libraries to
resolve symbols, etc.

  [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace --no-syscalls -F all --cpu 1 sleep 10
       0.000 ( 0.000 ms): chrome/26589 minfault [0x1b53129] => /tmp/perf-26589.map@0x33cbcbf7f000 (x.)
      96.477 ( 0.000 ms): libvirtd/947 minfault [copy_user_enhanced_fast_string+0x5] => 0x7f7685bba000 (?k)
     113.164 ( 0.000 ms): Xorg/1063 minfault [0x786da] => 0x7fce52882a3c (?.)
    7162.801 ( 0.000 ms): chrome/3747 minfault [0x8e1a89] => 0xfcaefed0008 (?.)
<SNIP>
    7773.138 ( 0.000 ms): chrome/3886 minfault [0x8e1a89] => 0xfcb0ce28008 (?.)
    7992.022 ( 0.000 ms): chrome/26574 minfault [0x1b5a708] => 0x3de7b5fc5000 (?.)
    8108.949 ( 0.000 ms): qemu-system-x8/4537 majfault [_int_malloc+0xee] => 0x7faffc466d60 (?.)
    8108.975 ( 0.000 ms): qemu-system-x8/4537 minfault [_int_malloc+0x102] => 0x7faffc466d60 (?.)
<SNIP>
    8148.174 ( 0.000 ms): qemu-system-x8/4537 minfault [_int_malloc+0x102] => 0x7faffc4eb500 (?.)
    8270.855 ( 0.000 ms): chrome/26245 minfault [do_bo_emit_reloc+0xdb] => 0x45d092bc004 (?.)
    8270.869 ( 0.000 ms): chrome/26245 minfault [do_bo_emit_reloc+0x108] => 0x45d09150000 (?.)
no symbols found in /usr/lib64/libspice-server.so.1.9.0, maybe install a debug package?
    8273.831 ( 0.000 ms): trace/20198 majfault [__memcmp_sse4_1+0xbc6] => /usr/lib64/libspice-server.so.1.9.0@0xdf000 (d.)
<SNIP>
    8275.121 ( 0.000 ms): trace/20198 minfault [dso__load+0x38] => 0x14fe756 (?.)
no symbols found in /usr/lib64/libelf-0.158.so, maybe install a debug package?
    8275.142 ( 0.000 ms): trace/20198 minfault [__memcmp_sse4_1+0xbc6] => /usr/lib64/libelf-0.158.so@0x0 (d.)
<SNIP>
  [root@zoo /]#

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1403799268-1367-6-git-send-email-stfomichev@yandex-team.ru
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt |  7 ++++
 tools/perf/builtin-trace.c              | 58 ++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 72397d9aa2ec..02aac831bdd9 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -112,6 +112,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 	Trace pagefaults. Optionally, you can specify whether you want minor,
 	major or all pagefaults. Default value is maj.
 
+--syscalls::
+	Trace system calls. This options is enabled by default.
+
 PAGEFAULTS
 ----------
 
@@ -137,6 +140,10 @@ for both IP and fault address in the form of dso@symbol+offset.
 EXAMPLES
 --------
 
+Trace only major pagefaults:
+
+ $ perf trace --no-syscalls -F
+
 Trace syscalls, major and minor pagefaults:
 
  $ perf trace -F all
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0b58e24c7ccb..dc7a694b61fe 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1215,6 +1215,7 @@ struct trace {
 	bool			summary_only;
 	bool			show_comm;
 	bool			show_tool_stats;
+	bool			trace_syscalls;
 	int			trace_pgfaults;
 };
 
@@ -1927,17 +1928,19 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[j++] = record_args[i];
 
-	for (i = 0; i < sc_args_nr; i++)
-		rec_argv[j++] = sc_args[i];
-
-	/* event string may be different for older kernels - e.g., RHEL6 */
-	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
-		rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
-	else if (is_valid_tracepoint("syscalls:sys_enter"))
-		rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
-	else {
-		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
-		return -1;
+	if (trace->trace_syscalls) {
+		for (i = 0; i < sc_args_nr; i++)
+			rec_argv[j++] = sc_args[i];
+
+		/* event string may be different for older kernels - e.g., RHEL6 */
+		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
+			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+		else if (is_valid_tracepoint("syscalls:sys_enter"))
+			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
+		else {
+			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
+			return -1;
+		}
 	}
 
 	if (trace->trace_pgfaults & TRACE_PFMAJ)
@@ -2010,10 +2013,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		goto out;
 	}
 
-	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
+	if (trace->trace_syscalls &&
+	    perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
+					   trace__sys_exit))
 		goto out_error_tp;
 
-	perf_evlist__add_vfs_getname(evlist);
+	if (trace->trace_syscalls)
+		perf_evlist__add_vfs_getname(evlist);
 
 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
@@ -2215,13 +2221,10 @@ static int trace__replay(struct trace *trace)
 	if (evsel == NULL)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_enter");
-	if (evsel == NULL) {
-		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
-		goto out;
-	}
 
-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
-	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
 		goto out;
 	}
@@ -2231,13 +2234,9 @@ static int trace__replay(struct trace *trace)
 	if (evsel == NULL)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_exit");
-	if (evsel == NULL) {
-		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
-		goto out;
-	}
-
-	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
-	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
 		goto out;
 	}
@@ -2440,6 +2439,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		},
 		.output = stdout,
 		.show_comm = true,
+		.trace_syscalls = true,
 	};
 	const char *output_name = NULL;
 	const char *ev_qualifier_str = NULL;
@@ -2479,6 +2479,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Show all syscalls and summary with statistics"),
 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
 		     "Trace pagefaults", parse_pagefaults, "maj"),
+	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
 	OPT_END()
 	};
 	int err;
@@ -2499,6 +2500,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (trace.summary_only)
 		trace.summary = trace.summary_only;
 
+	if (!trace.trace_syscalls && !trace.trace_pgfaults) {
+		pr_err("Please specify something to trace.\n");
+		return -1;
+	}
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2014-07-05 10:42 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-26 16:14 [PATCH v3 0/5] perf trace pagefaults Stanislav Fomichev
2014-06-26 16:14 ` [PATCH 1/5] perf trace: add perf_event parameter to tracepoint_handler Stanislav Fomichev
2014-07-05 10:40   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
2014-06-26 16:14 ` [PATCH 2/5] perf trace: add support for pagefault tracing Stanislav Fomichev
2014-07-05 10:41   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
2014-06-26 16:14 ` [PATCH 3/5] perf trace: add pagefaults record and replay support Stanislav Fomichev
2014-07-05 10:41   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
2014-06-26 16:14 ` [PATCH 4/5] perf trace: add pagefault statistics Stanislav Fomichev
2014-06-26 19:41   ` Arnaldo Carvalho de Melo
2014-06-27  8:43     ` Stanislav Fomichev
2014-06-27 13:53       ` Arnaldo Carvalho de Melo
2014-06-27 15:44         ` Stanislav Fomichev
2014-06-27 21:16           ` Arnaldo Carvalho de Melo
2014-06-26 16:14 ` [PATCH 5/5] perf trace: add possibility to switch off syscall events Stanislav Fomichev
2014-07-05 10:41   ` [tip:perf/core] perf trace: Add " tip-bot for Stanislav Fomichev
2014-06-26 21:01 [GIT PULL 0/5] perf/core improvements and a fix Arnaldo Carvalho de Melo
2014-06-26 21:01 ` [PATCH 5/5] perf trace: Add possibility to switch off syscall events Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).