From: Stanislav Fomichev <stfomichev@yandex-team.ru>
To: a.p.zijlstra@chello.nl, paulus@samba.org, mingo@redhat.com,
acme@kernel.org, dsahern@gmail.com, stfomichev@yandex-team.ru,
jolsa@redhat.com, xiaoguangrong@linux.vnet.ibm.com,
yangds.fnst@cn.fujitsu.com, adrian.hunter@intel.com,
namhyung@kernel.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH 1/5] perf trace: add support for pagefault tracing
Date: Wed, 18 Jun 2014 18:59:21 +0400 [thread overview]
Message-ID: <1403103565-6388-2-git-send-email-stfomichev@yandex-team.ru> (raw)
In-Reply-To: <1403103565-6388-1-git-send-email-stfomichev@yandex-team.ru>
Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
tools/perf/Documentation/perf-trace.txt | 12 ++
tools/perf/builtin-trace.c | 198 ++++++++++++++++++++++++++------
2 files changed, 176 insertions(+), 34 deletions(-)
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index fae38d9a44a4..7da5f75a45f1 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -107,6 +107,18 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
Show tool stats such as number of times fd->pathname was discovered thru
hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+-f::
+--pgfaults::
+ Trace major pagefaults. To also trace minor pagefaults, specify this
+ option twice.
+
+EXAMPLES
+--------
+
+Trace syscalls, major and minor pagefaults:
+
+ $ perf trace -f -f
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f954c26de231..6c7ae048db59 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1211,6 +1211,7 @@ struct trace {
bool summary_only;
bool show_comm;
bool show_tool_stats;
+ int trace_pgfaults;
};
static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
@@ -1534,7 +1535,9 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
return printed;
}
-typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
+typedef int (*tracepoint_handler)(struct trace *trace,
+ union perf_event *event,
+ struct perf_evsel *evsel,
struct perf_sample *sample);
static struct syscall *trace__syscall_info(struct trace *trace,
@@ -1606,7 +1609,9 @@ static void thread__update_stats(struct thread_trace *ttrace,
update_stats(stats, duration);
}
-static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
+static int trace__sys_enter(struct trace *trace,
+ union perf_event *event __maybe_unused,
+ struct perf_evsel *evsel,
struct perf_sample *sample)
{
char *msg;
@@ -1655,7 +1660,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
return 0;
}
-static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
+static int trace__sys_exit(struct trace *trace,
+ union perf_event *event __maybe_unused,
+ struct perf_evsel *evsel,
struct perf_sample *sample)
{
int ret;
@@ -1734,14 +1741,18 @@ out:
return 0;
}
-static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
+static int trace__vfs_getname(struct trace *trace,
+ union perf_event *event __maybe_unused,
+ struct perf_evsel *evsel,
struct perf_sample *sample)
{
trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
return 0;
}
-static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
+static int trace__sched_stat_runtime(struct trace *trace,
+ union perf_event *event __maybe_unused,
+ struct perf_evsel *evsel,
struct perf_sample *sample)
{
u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
@@ -1768,6 +1779,69 @@ out_dump:
return 0;
}
+static bool valid_dso(struct addr_location *al, struct perf_sample *sample)
+{
+ if (al->map && al->map->dso &&
+ al->map->start <= sample->addr &&
+ al->map->end > sample->addr)
+ return true;
+ else
+ return false;
+}
+
+static int trace__pgfault(struct trace *trace,
+ union perf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct addr_location al;
+ char map_type = 'd';
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+ thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
+ sample->ip, &al);
+
+ trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
+
+ fprintf(trace->output, "%sfault ",
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+ "maj" : "min");
+
+ if (al.sym)
+ fprintf(trace->output, "[%s+0x%lx]",
+ al.sym->name, al.addr - al.sym->start);
+ else
+ fprintf(trace->output, "[0x%lx]", sample->ip);
+
+ fprintf(trace->output, " => ");
+
+ thread__find_addr_map(thread, trace->host, cpumode, MAP__VARIABLE,
+ sample->addr, &al);
+
+ if (!valid_dso(&al, sample)) {
+ thread__find_addr_map(thread, trace->host, cpumode,
+ MAP__FUNCTION, sample->addr, &al);
+
+ if (valid_dso(&al, sample))
+ map_type = 'x';
+ }
+
+ if (valid_dso(&al, sample)) {
+ fprintf(trace->output, "%s+0x%lx",
+ al.map->dso->long_name, sample->addr - al.map->start);
+ } else {
+ map_type = '?';
+ fprintf(trace->output, "0x%lx", sample->addr);
+ }
+
+ fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+ return 0;
+}
+
static bool skip_sample(struct trace *trace, struct perf_sample *sample)
{
if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
@@ -1781,7 +1855,7 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
}
static int trace__process_sample(struct perf_tool *tool,
- union perf_event *event __maybe_unused,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused)
@@ -1799,7 +1873,7 @@ static int trace__process_sample(struct perf_tool *tool,
if (handler) {
++trace->nr_events;
- handler(trace, evsel, sample);
+ handler(trace, event, evsel, sample);
}
return err;
@@ -1826,7 +1900,7 @@ static int parse_target_str(struct trace *trace)
return 0;
}
-static int trace__record(int argc, const char **argv)
+static int trace__record(struct trace *trace, int argc, const char **argv)
{
unsigned int rec_argc, i, j;
const char **rec_argv;
@@ -1835,34 +1909,52 @@ static int trace__record(int argc, const char **argv)
"-R",
"-m", "1024",
"-c", "1",
- "-e",
};
+ const char * const sc_args[] = { "-e", };
+ unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
+ const char * const majpf_args[] = { "-e", "major-faults" };
+ unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
+ const char * const minpf_args[] = { "-e", "minor-faults" };
+ unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
+
/* +1 is for the event string below */
- rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
+ rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
+ majpf_args_nr + minpf_args_nr + argc;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (rec_argv == NULL)
return -ENOMEM;
+ j = 0;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
- rec_argv[i] = record_args[i];
+ rec_argv[j++] = record_args[i];
+
+ for (i = 0; i < sc_args_nr; i++)
+ rec_argv[j++] = sc_args[i];
/* event string may be different for older kernels - e.g., RHEL6 */
if (is_valid_tracepoint("raw_syscalls:sys_enter"))
- rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+ rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
else if (is_valid_tracepoint("syscalls:sys_enter"))
- rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
+ rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
else {
pr_err("Neither raw_syscalls nor syscalls events exist.\n");
return -1;
}
- i++;
- for (j = 0; j < (unsigned int)argc; j++, i++)
- rec_argv[i] = argv[j];
+ if (trace->trace_pgfaults)
+ for (i = 0; i < majpf_args_nr; i++)
+ rec_argv[j++] = majpf_args[i];
+
+ if (trace->trace_pgfaults > 1)
+ for (i = 0; i < minpf_args_nr; i++)
+ rec_argv[j++] = minpf_args[i];
- return cmd_record(i, rec_argv, NULL);
+ for (i = 0; i < (unsigned int)argc; i++)
+ rec_argv[j++] = argv[i];
+
+ return cmd_record(j, rec_argv, NULL);
}
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
@@ -1882,6 +1974,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
perf_evlist__add(evlist, evsel);
}
+static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
+ u64 config)
+{
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .mmap_data = 1,
+ .sample_period = 1,
+ };
+
+ attr.config = config;
+
+ event_attr_init(&attr);
+
+ evsel = perf_evsel__new(&attr);
+ if (!evsel)
+ return -ENOMEM;
+
+ evsel->handler = trace__pgfault;
+ perf_evlist__add(evlist, evsel);
+
+ return 0;
+}
+
static int trace__run(struct trace *trace, int argc, const char **argv)
{
struct perf_evlist *evlist = perf_evlist__new();
@@ -1902,6 +2018,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
perf_evlist__add_vfs_getname(evlist);
+ if (trace->trace_pgfaults &&
+ perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+ goto out_error_tp;
+
+ if (trace->trace_pgfaults > 1 &&
+ perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
+ goto out_error_tp;
+
if (trace->sched &&
perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
trace__sched_stat_runtime))
@@ -1982,7 +2106,8 @@ again:
goto next_event;
}
- if (sample.raw_data == NULL) {
+ if (evsel->attr.type != PERF_TYPE_SOFTWARE &&
+ sample.raw_data == NULL) {
fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
perf_evsel__name(evsel), sample.tid,
sample.cpu, sample.raw_size);
@@ -1990,7 +2115,7 @@ again:
}
handler = evsel->handler;
- handler(trace, evsel, &sample);
+ handler(trace, event, evsel, &sample);
next_event:
perf_evlist__mmap_consume(evlist, i);
@@ -2093,13 +2218,10 @@ static int trace__replay(struct trace *trace)
if (evsel == NULL)
evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
"syscalls:sys_enter");
- if (evsel == NULL) {
- pr_err("Data file does not have raw_syscalls:sys_enter event\n");
- goto out;
- }
- if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
- perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
+ if (evsel &&
+ (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+ perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
pr_err("Error during initialize raw_syscalls:sys_enter event\n");
goto out;
}
@@ -2109,15 +2231,19 @@ static int trace__replay(struct trace *trace)
if (evsel == NULL)
evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
"syscalls:sys_exit");
- if (evsel == NULL) {
- pr_err("Data file does not have raw_syscalls:sys_exit event\n");
+ if (evsel &&
+ (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+ perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
+ pr_err("Error during initialize raw_syscalls:sys_exit event\n");
goto out;
}
- if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
- perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
- pr_err("Error during initialize raw_syscalls:sys_exit event\n");
- goto out;
+ evlist__for_each(session->evlist, evsel) {
+ if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
+ (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
+ evsel->handler = trace__pgfault;
}
err = parse_target_str(trace);
@@ -2290,6 +2416,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
.user_interval = ULLONG_MAX,
.no_buffering = true,
.mmap_pages = 1024,
+ .sample_address = true,
+ .sample_time = true,
},
.output = stdout,
.show_comm = true,
@@ -2330,15 +2458,17 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
"Show only syscall summary with statistics"),
OPT_BOOLEAN('S', "with-summary", &trace.summary,
"Show all syscalls and summary with statistics"),
+ OPT_INCR('f', "pgfaults", &trace.trace_pgfaults, "Trace pagefaults"),
OPT_END()
};
int err;
char bf[BUFSIZ];
- if ((argc > 1) && (strcmp(argv[1], "record") == 0))
- return trace__record(argc-2, &argv[2]);
+ argc = parse_options(argc, argv, trace_options, trace_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
- argc = parse_options(argc, argv, trace_options, trace_usage, 0);
+ if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
+ return trace__record(&trace, argc-1, &argv[1]);
/* summary_only implies summary option, but don't overwrite summary if set */
if (trace.summary_only)
--
1.8.3.2
next prev parent reply other threads:[~2014-06-18 15:00 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-18 14:59 [RFC PATCH 0/5] perf trace pagefaults Stanislav Fomichev
2014-06-18 14:59 ` Stanislav Fomichev [this message]
2014-06-18 15:47 ` [PATCH 1/5] perf trace: add support for pagefault tracing Arnaldo Carvalho de Melo
2014-06-18 17:42 ` Stanislav Fomichev
2014-06-18 20:04 ` Arnaldo Carvalho de Melo
2014-06-19 12:06 ` Stanislav Fomichev
2014-06-25 11:33 ` Stanislav Fomichev
2014-06-18 14:59 ` [PATCH 2/5] perf trace: add pagefault statistics Stanislav Fomichev
2014-06-18 14:59 ` [PATCH 3/5] perf trace: add possibility to switch off syscall events Stanislav Fomichev
2014-06-18 14:59 ` [PATCH 4/5] perf kvm: move perf_kvm__mmap_read into session utils Stanislav Fomichev
2014-06-18 14:59 ` [PATCH 5/5] perf trace: add events cache Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1403103565-6388-2-git-send-email-stfomichev@yandex-team.ru \
--to=stfomichev@yandex-team.ru \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=dsahern@gmail.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=paulus@samba.org \
--cc=xiaoguangrong@linux.vnet.ibm.com \
--cc=yangds.fnst@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).