All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexis Berlemont <alexis.berlemont@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Alexis Berlemont <alexis.berlemont@gmail.com>,
	peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
	alexander.shishkin@linux.intel.com
Subject: [PATCH 2/2] perf: add page fault duration measures in perf trace
Date: Wed, 26 Oct 2016 01:52:00 +0200	[thread overview]
Message-ID: <20161025235200.28781-3-alexis.berlemont@gmail.com> (raw)
In-Reply-To: <20161025235200.28781-1-alexis.berlemont@gmail.com>

Signed-off-by: Alexis Berlemont <alexis.berlemont@gmail.com>
---
 tools/perf/Documentation/perf-trace.txt |   4 +-
 tools/perf/builtin-trace.c              | 225 ++++++++++++++++++++++++++++----
 2 files changed, 202 insertions(+), 27 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 781b019..53c103c 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,7 +117,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 -F=[all|min|maj]::
 --pf=[all|min|maj]::
 	Trace pagefaults. Optionally, you can specify whether you want minor,
-	major or all pagefaults. Default value is maj.
+	major or all pagefaults. Default value is maj. Durations of
+	page-fault handling will be printed if possible (need for some
+	architecture-dependent tracepoints).
 
 --syscalls::
 	Trace system calls. This options is enabled by default.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5f45166..100c28a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -861,6 +861,8 @@ struct thread_trace {
 	} paths;
 
 	struct intlist *syscall_stats;
+	u64	       pgfault_entry_time;
+	char           *pgfault_entry_str;
 };
 
 static struct thread_trace *thread_trace__new(void)
@@ -1797,21 +1799,56 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 	return 0;
 }
 
-static void print_location(FILE *f, struct perf_sample *sample,
-			   struct addr_location *al,
-			   bool print_dso, bool print_sym)
+static int trace__pgfault_enter(struct trace *trace,
+				struct perf_evsel *evsel __maybe_unused,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample)
 {
+	struct thread *thread;
+	struct thread_trace *ttrace;
+	int err = -1;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	if (!thread)
+		goto out;
+
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		goto out_put;
+
+	ttrace->pgfault_entry_time = sample->time;
+
+out:
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static size_t scnprintf_location(char *bf, size_t size,
+				struct perf_sample *sample,
+				struct addr_location *al,
+				bool print_dso, bool print_sym)
+{
+	size_t printed = 0;
 
 	if ((verbose || print_dso) && al->map)
-		fprintf(f, "%s@", al->map->dso->long_name);
+		printed += scnprintf(bf + printed,
+				size - printed, "%s@", al->map->dso->long_name);
 
 	if ((verbose || print_sym) && al->sym)
-		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
-			al->addr - al->sym->start);
+		printed += scnprintf(bf + printed,
+				size - printed,
+				"%s+0x%" PRIx64, al->sym->name,
+				al->addr - al->sym->start);
 	else if (al->map)
-		fprintf(f, "0x%" PRIx64, al->addr);
+		printed += scnprintf(bf + printed,
+				size - printed, "0x%" PRIx64, al->addr);
 	else
-		fprintf(f, "0x%" PRIx64, sample->addr);
+		printed += scnprintf(bf + printed,
+				size - printed, "0x%" PRIx64, sample->addr);
+
+	return printed;
 }
 
 static int trace__pgfault(struct trace *trace,
@@ -1823,13 +1860,22 @@ static int trace__pgfault(struct trace *trace,
 	struct addr_location al;
 	char map_type = 'd';
 	struct thread_trace *ttrace;
+	size_t printed = 0;
 	int err = -1;
 	int callchain_ret = 0;
 
 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	if (!thread)
+		goto out;
 
-	if (sample->callchain) {
-		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		goto out_put;
+
+	if (!ttrace->pgfault_entry_time && sample->callchain) {
+		callchain_ret =
+			trace__resolve_callchain(trace, evsel,
+						sample, &callchain_cursor);
 		if (callchain_ret == 0) {
 			if (callchain_cursor.nr < trace->min_stack)
 				goto out_put;
@@ -1837,10 +1883,6 @@ static int trace__pgfault(struct trace *trace,
 		}
 	}
 
-	ttrace = thread__trace(thread, trace->output);
-	if (ttrace == NULL)
-		goto out_put;
-
 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
 		ttrace->pfmaj++;
 	else
@@ -1849,18 +1891,27 @@ static int trace__pgfault(struct trace *trace,
 	if (trace->summary_only)
 		goto out;
 
+	if (ttrace->pgfault_entry_str == NULL) {
+		ttrace->pgfault_entry_str = malloc(trace__entry_str_size);
+		if (!ttrace->pgfault_entry_str)
+			goto out_put;
+		ttrace->pgfault_entry_str[0] = '\0';
+	}
+
 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
 
-	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
-
-	fprintf(trace->output, "%sfault [",
-		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
-		"maj" : "min");
+	printed += scnprintf(ttrace->pgfault_entry_str + printed,
+			trace__entry_str_size - printed, "%sfault [",
+			evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+			"maj" : "min");
 
-	print_location(trace->output, sample, &al, false, true);
+	printed += scnprintf_location(ttrace->pgfault_entry_str + printed,
+				trace__entry_str_size - printed,
+				sample, &al, false, true);
 
-	fprintf(trace->output, "] => ");
+	printed += scnprintf(ttrace->pgfault_entry_str + printed,
+			trace__entry_str_size - printed, "] => ");
 
 	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
 				   sample->addr, &al);
@@ -1875,14 +1926,97 @@ static int trace__pgfault(struct trace *trace,
 			map_type = '?';
 	}
 
-	print_location(trace->output, sample, &al, true, false);
+	printed += scnprintf_location(ttrace->pgfault_entry_str + printed,
+				trace__entry_str_size - printed,
+				sample, &al, true, false);
 
-	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+	printed += scnprintf(ttrace->pgfault_entry_str + printed,
+			trace__entry_str_size - printed,
+			" (%c%c)", map_type, al.level);
+
+	if (!ttrace->pgfault_entry_time) {
+		trace__fprintf_entry_head(trace, thread,
+					0, sample->time, trace->output);
+		fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str);
+
+		if (callchain_ret > 0)
+			trace__fprintf_callchain(trace, sample);
+		else if (callchain_ret < 0)
+			pr_err("Problem processing %s callchain, skipping...\n",
+				perf_evsel__name(evsel));
+	}
+
+out:
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int trace__pgfault_exit(struct trace *trace, struct perf_evsel *evsel,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample)
+{
+	struct thread *thread;
+	struct thread_trace *ttrace;
+	u64 duration = 0;
+	int err = -1, callchain_ret = 0;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+	ttrace = thread__priv(thread);
+	if (!ttrace)
+		goto out_put;
+
+	if (!ttrace->pgfault_entry_time)
+		goto out;
+
+	/*
+	 * The check below is necessary for a specific case: it is
+	 * possible to enable only major (or minor) page faults
+	 * software events but it is impossible to filter page-fault
+	 * related tracepoints according to the major / minor
+	 * characteristic.
+	 */
+
+	if (!ttrace->pgfault_entry_str ||
+		strlen(ttrace->pgfault_entry_str) == 0)
+		goto out;
+
+	if (sample->callchain) {
+		callchain_ret =
+			trace__resolve_callchain(trace, evsel,
+						sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out;
+			callchain_ret = 1;
+		}
+	}
+
+	if (ttrace->pgfault_entry_time) {
+		duration = sample->time - ttrace->pgfault_entry_time;
+		if (trace__filter_duration(trace, duration))
+			goto out;
+	}
+
+	trace__fprintf_entry_head(trace, thread,
+				duration, sample->time, trace->output);
+
+	fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str);
+
+	/*
+	 * Once the string is printed; clear it just in case the next
+	 * software events is filtered (because of major / minor.
+	 */
+	ttrace->pgfault_entry_str[0] = '\0';
 
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
-		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+		pr_err("Problem processing %s callchain, skipping...\n",
+			perf_evsel__name(evsel));
+
 out:
 	err = 0;
 out_put:
@@ -2062,6 +2196,37 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
 	return evsel;
 }
 
+static int trace__add_pgfault_newtp(struct trace *trace)
+{
+	int err = -1;
+	struct perf_evlist *evlist = trace->evlist;
+
+	/*
+	 * The tracepoints exceptions::page_fault_* are not available
+	 * on all architecture.; so, we consider it is not an error if
+	 * the 1st one's initialization is KO...
+	 */
+	err = perf_evlist__add_newtp(evlist, "exceptions",
+				"page_fault_exit", trace__pgfault_exit);
+	if (err < 0)
+		return 0;
+
+	/*
+	 * ...however, if the 2nd or the 3rd init is KO; there is
+	 * definitely something wrong not related with the absence of
+	 * tracepoints
+	 */
+	err = perf_evlist__add_newtp(evlist, "exceptions",
+				"page_fault_kernel", trace__pgfault_enter);
+	if (err < 0)
+		return -1;
+
+	err = perf_evlist__add_newtp(evlist, "exceptions",
+				"page_fault_user", trace__pgfault_enter);
+
+	return err;
+}
+
 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
 {
 	const u32 type = event->header.type;
@@ -2193,9 +2358,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		perf_evlist__add(evlist, pgfault_min);
 	}
 
+	if (trace->trace_pgfaults && trace__add_pgfault_newtp(trace))
+		goto out_error_pgfault_tp;
+
 	if (trace->sched &&
-	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
-				   trace__sched_stat_runtime))
+		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
+				trace__sched_stat_runtime))
 		goto out_error_sched_stat_runtime;
 
 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
@@ -2399,6 +2567,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
 	goto out_error;
 
+out_error_pgfault_tp:
+	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf),
+				"exceptions", "page_fault_(kernel|user)");
+	goto out_error;
+
 out_error_mmap:
 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
 	goto out_error;
-- 
2.10.1

  parent reply	other threads:[~2016-10-25 23:56 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-25 23:51 [PATCH 0/2] perf: measure page fault duration in perf trace Alexis Berlemont
2016-10-25 23:51 ` [PATCH 1/2] perf, x86-mm: Add exit-fault tracing Alexis Berlemont
2016-10-26  8:51   ` Peter Zijlstra
2016-10-26 18:53     ` Arnaldo Carvalho de Melo
2016-10-27  8:37       ` Peter Zijlstra
2016-10-27 23:31         ` [PATCH v2] perf, x86-mm: declare page-faults tracepoints like irq-vectors ones Alexis Berlemont
2016-10-27 23:31         ` Alexis Berlemont
2016-11-18  0:21         ` [PATCH v3 0/2] perf: measure page fault duration in perf trace Alexis Berlemont
2016-12-05 23:40           ` Alexis Berlemont
2016-12-06 14:46             ` Arnaldo Carvalho de Melo
2016-11-18  0:21         ` [PATCH v3 1/2] perf, x86-mm: declare page-faults tracepoints like irq-vectors ones Alexis Berlemont
2016-11-18  0:21         ` [PATCH v3 2/2] perf: add page fault duration measures in perf trace Alexis Berlemont
2016-10-27  8:38       ` [PATCH 1/2] perf, x86-mm: Add exit-fault tracing Peter Zijlstra
2016-10-25 23:52 ` Alexis Berlemont [this message]
2016-10-26  8:46 ` [PATCH 0/2] perf: measure page fault duration in perf trace Peter Zijlstra
2016-10-26 18:47   ` Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161025235200.28781-3-alexis.berlemont@gmail.com \
    --to=alexis.berlemont@gmail.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.