linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jiri Olsa <jolsa@redhat.com>, LKML <linux-kernel@vger.kernel.org>,
	David Ahern <dsahern@gmail.com>, Minchan Kim <minchan@kernel.org>,
	Joonsoo Kim <js1304@gmail.com>
Subject: [PATCH 5/6] perf kmem: Implement stat --page --caller
Date: Thu, 12 Mar 2015 16:32:50 +0900	[thread overview]
Message-ID: <1426145571-3065-6-git-send-email-namhyung@kernel.org> (raw)
In-Reply-To: <1426145571-3065-1-git-send-email-namhyung@kernel.org>

It perf kmem support caller statistics for page.  Unlike slab case,
the tracepoints in page allocator don't provide callsite info.  So
it records with callchain and extracts callsite info.

Note that the callchain contains several memory allocation functions
which has no meaning for users.  So skip those functions to get proper
callsites.  I used following regex pattern to skip the allocator
functions:

  ^_?_?(alloc|get_free|get_zeroed)_pages?

This gave me a following list of functions:

  # perf kmem record --page sleep 3
  # perf kmem stat --page -v
  ...
  alloc func: __get_free_pages
  alloc func: get_zeroed_page
  alloc func: alloc_pages_exact
  alloc func: __alloc_pages_direct_compact
  alloc func: __alloc_pages_nodemask
  alloc func: alloc_page_interleave
  alloc func: alloc_pages_current
  alloc func: alloc_pages_vma
  alloc func: alloc_page_buffers
  alloc func: alloc_pages_exact_nid
  ...

The output looks mostly same as --alloc (I also added callsite column
to that) but groups entries by callsite.  Currently, the order,
migrate type and GFP flag info is for the last allocation and not
guaranteed to be same for all allocations from the callsite.

  --------------------------------------------------------------------------------
  Total_alloc/Per | Hit      | Order | Migrate type | GFP flag | Callsite
  --------------------------------------------------------------------------------
    1089536/4096  |      266 |     0 |    UNMOVABLE | 000000d0 | __pollwait
      53248/4096  |       13 |     0 |    UNMOVABLE | 002084d0 | pte_alloc_one
      45056/4096  |       11 |     0 |      MOVABLE | 000280da | handle_mm_fault
      20480/4096  |        5 |     0 |      MOVABLE | 000200da | do_cow_fault
      20480/4096  |        5 |     0 |      MOVABLE | 000200da | do_wp_page
      16384/4096  |        4 |     0 |    UNMOVABLE | 000084d0 | __pmd_alloc
      16384/4096  |        4 |     0 |    UNMOVABLE | 00000200 | __tlb_remove_page
      12288/4096  |        3 |     0 |    UNMOVABLE | 000084d0 | __pud_alloc
       8192/4096  |        2 |     0 |    UNMOVABLE | 00000010 | bio_copy_user_iov
       4096/4096  |        1 |     0 |    UNMOVABLE | 000200d2 | pipe_write
       4096/4096  |        1 |     0 |      MOVABLE | 000280da | do_wp_page
       4096/4096  |        1 |     0 |    UNMOVABLE | 002084d0 | pgd_alloc
  --------------------------------------------------------------------------------

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-kmem.c | 299 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 275 insertions(+), 24 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0807183e63ae..59475bd3d6d4 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -10,6 +10,7 @@
 #include "util/header.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/callchain.h"
 
 #include "util/parse-options.h"
 #include "util/trace-event.h"
@@ -20,6 +21,7 @@
 
 #include <linux/rbtree.h>
 #include <linux/string.h>
+#include <regex.h>
 
 static int	kmem_slab;
 static int	kmem_page;
@@ -237,6 +239,8 @@ static unsigned long nr_page_frees;
 static unsigned long nr_page_fails;
 static unsigned long nr_page_nomatch;
 
+static struct perf_session *kmem_session;
+
 #define MAX_MIGRATE_TYPES  6
 #define MAX_PAGE_ORDER     11
 
@@ -245,6 +249,7 @@ static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
 struct page_stat {
 	struct rb_node 	node;
 	u64 		page;
+	u64 		callsite;
 	int 		order;
 	unsigned 	gfp_flags;
 	unsigned 	migrate_type;
@@ -254,12 +259,142 @@ struct page_stat {
 	int 		nr_free;
 };
 
-static struct rb_root page_tree;
+static struct rb_root page_alloc_tree;
 static struct rb_root page_alloc_sorted;
+static struct rb_root page_caller_tree;
+static struct rb_root page_caller_sorted;
+
+struct alloc_func {
+	u64 start;
+	u64 end;
+	char *name;
+};
+
+static int nr_alloc_funcs;
+static struct alloc_func *alloc_func_list;
+
+static int funcmp(const void *a, const void *b)
+{
+	const struct alloc_func *fa = a;
+	const struct alloc_func *fb = b;
+
+	if (fa->start > fb->start)
+		return 1;
+	else
+		return -1;
+}
+
+static int callcmp(const void *a, const void *b)
+{
+	const struct alloc_func *fa = a;
+	const struct alloc_func *fb = b;
+
+	if (fb->start <= fa->start && fa->end < fb->end)
+		return 0;
+
+	if (fa->start > fb->start)
+		return 1;
+	else
+		return -1;
+}
+
+static int build_alloc_func_list(void)
+{
+	int ret;
+	struct map *kernel_map;
+	struct symbol *sym;
+	struct rb_node *node;
+	struct alloc_func *func;
+	struct machine *machine = &kmem_session->machines.host;
+
+	regex_t alloc_func_regex;
+	const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
+
+	ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
+	if (ret) {
+		char err[BUFSIZ];
+
+		regerror(ret, &alloc_func_regex, err, sizeof(err));
+		pr_err("Invalid regex: %s\n%s", pattern, err);
+		return -EINVAL;
+	}
+
+	kernel_map = machine->vmlinux_maps[MAP__FUNCTION];
+	map__load(kernel_map, NULL);
+
+	map__for_each_symbol(kernel_map, sym, node) {
+		if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
+			continue;
+
+		func = realloc(alloc_func_list,
+			       (nr_alloc_funcs + 1) * sizeof(*func));
+		if (func == NULL)
+			return -ENOMEM;
+
+		pr_debug("alloc func: %s\n", sym->name);
+		func[nr_alloc_funcs].start = sym->start;
+		func[nr_alloc_funcs].end   = sym->end;
+		func[nr_alloc_funcs].name  = sym->name;
+
+		alloc_func_list = func;
+		nr_alloc_funcs++;
+	}
+
+	qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
+
+	regfree(&alloc_func_regex);
+	return 0;
+}
+
+/*
+ * Find first non-memory allocation function from callchain.
+ * The allocation functions are in the 'alloc_func_list'.
+ */
+static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+	struct addr_location al;
+	struct machine *machine = &kmem_session->machines.host;
+	struct callchain_cursor_node *node;
+
+	if (alloc_func_list == NULL)
+		build_alloc_func_list();
+
+	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	sample__resolve_callchain(sample, NULL, evsel, &al, 16);
+
+	callchain_cursor_commit(&callchain_cursor);
+	while (true) {
+		struct alloc_func key, *caller;
+		u64 addr;
+
+		node = callchain_cursor_current(&callchain_cursor);
+		if (node == NULL)
+			break;
+
+		key.start = key.end = node->ip;
+		caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
+				 sizeof(key), callcmp);
+		if (!caller) {
+			/* found */
+			if (node->map)
+				addr = map__unmap_ip(node->map, node->ip);
+			else
+				addr = node->ip;
+
+			return addr;
+		} else
+			pr_debug3("skipping alloc function: %s\n", caller->name);
 
-static struct page_stat *search_page_stat(unsigned long page, bool create)
+		callchain_cursor_advance(&callchain_cursor);
+	}
+
+	pr_debug2("unknown callsite: %"PRIx64, sample->ip);
+	return sample->ip;
+}
+
+static struct page_stat *search_page_alloc_stat(u64 page, bool create)
 {
-	struct rb_node **node = &page_tree.rb_node;
+	struct rb_node **node = &page_alloc_tree.rb_node;
 	struct rb_node *parent = NULL;
 	struct page_stat *data;
 
@@ -286,7 +421,42 @@ static struct page_stat *search_page_stat(unsigned long page, bool create)
 		data->page = page;
 
 		rb_link_node(&data->node, parent, node);
-		rb_insert_color(&data->node, &page_tree);
+		rb_insert_color(&data->node, &page_alloc_tree);
+	}
+
+	return data;
+}
+
+static struct page_stat *search_page_caller_stat(u64 callsite, bool create)
+{
+	struct rb_node **node = &page_caller_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct page_stat *data;
+
+	while (*node) {
+		s64 cmp;
+
+		parent = *node;
+		data = rb_entry(*node, struct page_stat, node);
+
+		cmp = data->callsite - callsite;
+		if (cmp < 0)
+			node = &parent->rb_left;
+		else if (cmp > 0)
+			node = &parent->rb_right;
+		else
+			return data;
+	}
+
+	if (!create)
+		return NULL;
+
+	data = zalloc(sizeof(*data));
+	if (data != NULL) {
+		data->callsite = callsite;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &page_caller_tree);
 	}
 
 	return data;
@@ -301,6 +471,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
 						       "migratetype");
 	u64 bytes = kmem_page_size << order;
+	u64 callsite;
 	struct page_stat *stat;
 
 	if (page == 0) {
@@ -308,22 +479,39 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 		return 0;
 	}
 
+	callsite = find_callsite(evsel, sample);
+
 	/*
 	 * XXX: We'd better to use PFN instead of page pointer to deal
 	 * with things like partial freeing.  But AFAIK there's no way
 	 * to convert a pointer to struct page into PFN in userspace.
 	 */
-	stat = search_page_stat(page, true);
-	if (stat == NULL)
+	stat = search_page_alloc_stat(page, true);
+	if (stat == NULL) {
+		pr_err("cannot create page alloc stat\n");
 		return -1;
+	}
 
 	stat->order = order;
 	stat->gfp_flags = gfp_flags;
 	stat->migrate_type = migrate_type;
+	stat->callsite = callsite;
+	stat->nr_alloc++;
+	stat->alloc_bytes += bytes;
+
+	stat = search_page_caller_stat(callsite, true);
+	if (stat == NULL) {
+		pr_err("cannot create page caller stat\n");
+		return -1;
+	}
 
+	stat->order = order;
+	stat->gfp_flags = gfp_flags;
+	stat->migrate_type = migrate_type;
 	stat->nr_alloc++;
-	nr_page_allocs++;
 	stat->alloc_bytes += bytes;
+
+	nr_page_allocs++;
 	total_page_alloc_bytes += bytes;
 
 	order_stats[order][migrate_type]++;
@@ -342,7 +530,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	nr_page_frees++;
 	total_page_free_bytes += bytes;
 
-	stat = search_page_stat(page, false);
+	stat = search_page_alloc_stat(page, false);
 	if (stat == NULL) {
 		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 			  page, order);
@@ -353,6 +541,12 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	stat->nr_free++;
 	stat->free_bytes += bytes;
 
+	stat = search_page_caller_stat(stat->callsite, false);
+	if (stat != NULL) {
+		stat->nr_free++;
+		stat->free_bytes += bytes;
+	}
+
 	return 0;
 }
 
@@ -463,36 +657,85 @@ static const char * const migrate_type_str[] = {
 	"UNKNOWN",
 };
 
-static void __print_page_result(struct rb_root *root,
-				struct perf_session *session __maybe_unused,
-				int n_lines)
+static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 {
-	struct rb_node *next = rb_first(root);
+	struct rb_node *next = rb_first(&page_alloc_sorted);
+	struct machine *machine = &session->machines.host;
 
-	printf("\n%.80s\n", graph_dotted_line);
-	printf(" Page             | Total_alloc/Per | Hit      | Order | Migrate type | GFP flag\n");
-	printf("%.80s\n", graph_dotted_line);
+	printf("\n%.92s\n", graph_dotted_line);
+	printf(" Page             | Total_alloc/Per | Hit      | Order | Migrate type | GFP flag | Callsite\n");
+	printf("%.92s\n", graph_dotted_line);
 
 	while (next && n_lines--) {
 		struct page_stat *data;
+		struct symbol *sym;
+		struct map *map;
+		char buf[32];
+		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
+		sym = machine__find_kernel_function(machine, data->callsite,
+						    &map, NULL);
+		if (sym && sym->name)
+			caller = sym->name;
+		else
+			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
 
-		printf(" %016llx | %9llu/%-5lu | %8d | %5d | %12s | %08lx\n",
+		printf(" %016llx | %9llu/%-5lu | %8d | %5d | %12s | %08lx | %s\n",
 		       (unsigned long long)data->page,
 		       (unsigned long long)data->alloc_bytes,
 		       (unsigned long)data->alloc_bytes / data->nr_alloc,
 		       data->nr_alloc, data->order,
 		       migrate_type_str[data->migrate_type],
-		       (unsigned long)data->gfp_flags);
+		       (unsigned long)data->gfp_flags, caller);
+
+		next = rb_next(next);
+	}
+
+	if (n_lines == -1)
+		printf(" ...              | ...             | ...      | ...   | ...          | ...      | ...\n");
+
+	printf("%.92s\n", graph_dotted_line);
+}
+
+static void __print_page_caller_result(struct perf_session *session, int n_lines)
+{
+	struct rb_node *next = rb_first(&page_caller_sorted);
+	struct machine *machine = &session->machines.host;
+
+	printf("\n%.92s\n", graph_dotted_line);
+	printf(" Total_alloc/Per | Hit      | Order | Migrate type | GFP flag | Callsite\n");
+	printf("%.92s\n", graph_dotted_line);
+
+	while (next && n_lines--) {
+		struct page_stat *data;
+		struct symbol *sym;
+		struct map *map;
+		char buf[32];
+		char *caller = buf;
+
+		data = rb_entry(next, struct page_stat, node);
+		sym = machine__find_kernel_function(machine, data->callsite,
+						    &map, NULL);
+		if (sym && sym->name)
+			caller = sym->name;
+		else
+			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+		printf(" %9llu/%-5lu | %8d | %5d | %12s | %08lx | %s\n",
+		       (unsigned long long)data->alloc_bytes,
+		       (unsigned long)data->alloc_bytes / data->nr_alloc,
+		       data->nr_alloc, data->order,
+		       migrate_type_str[data->migrate_type],
+		       (unsigned long)data->gfp_flags, caller);
 
 		next = rb_next(next);
 	}
 
 	if (n_lines == -1)
-		printf(" ...              | ...             | ...      | ...   | ...          | ...     \n");
+		printf(" ...             | ...      | ...   | ...          | ...      | ...\n");
 
-	printf("%.80s\n", graph_dotted_line);
+	printf("%.92s\n", graph_dotted_line);
 }
 
 static void print_slab_summary(void)
@@ -547,8 +790,10 @@ static void print_slab_result(struct perf_session *session)
 
 static void print_page_result(struct perf_session *session)
 {
+	if (caller_flag)
+		__print_page_caller_result(session, caller_lines);
 	if (alloc_flag)
-		__print_page_result(&page_alloc_sorted, session, alloc_lines);
+		__print_page_alloc_result(session, alloc_lines);
 	print_page_summary();
 }
 
@@ -666,7 +911,8 @@ static void sort_result(void)
 				   &caller_sort);
 	}
 	if (kmem_page) {
-		__sort_page_result(&page_tree, &page_alloc_sorted);
+		__sort_page_result(&page_alloc_tree, &page_alloc_sorted);
+		__sort_page_result(&page_caller_tree, &page_caller_sorted);
 	}
 }
 
@@ -696,8 +942,10 @@ static int __cmd_kmem(struct perf_session *session)
 
 	setup_pager();
 	err = perf_session__process_events(session, &perf_kmem);
-	if (err != 0)
+	if (err != 0) {
+		pr_err("error during process events: %d\n", err);
 		goto out;
+	}
 	sort_result();
 	print_result(session);
 out:
@@ -938,7 +1186,7 @@ static int __cmd_record(int argc, const char **argv)
 	if (kmem_slab)
 		rec_argc += ARRAY_SIZE(slab_events);
 	if (kmem_page)
-		rec_argc += ARRAY_SIZE(page_events);
+		rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
 
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
@@ -953,6 +1201,8 @@ static int __cmd_record(int argc, const char **argv)
 			rec_argv[i] = strdup(slab_events[j]);
 	}
 	if (kmem_page) {
+		rec_argv[i++] = strdup("-g");
+
 		for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
 			rec_argv[i] = strdup(page_events[j]);
 	}
@@ -1011,7 +1261,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 		return __cmd_record(argc, argv);
 	}
 
-	session = perf_session__new(&file, false, &perf_kmem);
+	kmem_session = session = perf_session__new(&file, false, &perf_kmem);
 	if (session == NULL)
 		return -1;
 
@@ -1024,6 +1274,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 
 		kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
+		symbol_conf.use_callchain = true;
 	}
 
 	symbol__init(&session->header.env);
-- 
2.3.1


  parent reply	other threads:[~2015-03-12  7:38 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-12  7:32 [RFC/PATCHSET 0/6] perf kmem: Implement page allocation analysis (v1) Namhyung Kim
2015-03-12  7:32 ` [PATCH 1/6] perf kmem: Fix segfault when invalid sort key is given Namhyung Kim
2015-03-14  7:06   ` [tip:perf/core] " tip-bot for Namhyung Kim
2015-03-12  7:32 ` [PATCH 2/6] perf kmem: Allow -v option Namhyung Kim
2015-03-14  7:06   ` [tip:perf/core] " tip-bot for Namhyung Kim
2015-03-12  7:32 ` [PATCH 3/6] perf kmem: Fix alignment of slab result table Namhyung Kim
2015-03-14  7:07   ` [tip:perf/core] " tip-bot for Namhyung Kim
2015-03-12  7:32 ` [PATCH 4/6] perf kmem: Analyze page allocator events also Namhyung Kim
2015-03-12 11:01   ` Jiri Olsa
2015-03-12 15:11     ` Namhyung Kim
2015-03-12  7:32 ` Namhyung Kim [this message]
2015-03-12  7:32 ` [PATCH 6/6] perf kmem: Support sort keys on page analysis Namhyung Kim
2015-03-12 10:41 ` [RFC/PATCHSET 0/6] perf kmem: Implement page allocation analysis (v1) Ingo Molnar
2015-03-12 14:58   ` Namhyung Kim
2015-03-12 15:54     ` Ingo Molnar
2015-03-13  8:19       ` Namhyung Kim
2015-03-13 12:44         ` Ingo Molnar
2015-03-16  2:06           ` Namhyung Kim
2015-03-16  2:10     ` Namhyung Kim
2015-03-16  8:26       ` Ingo Molnar
2015-03-16  8:35         ` Namhyung Kim
2015-03-16  8:43           ` Ingo Molnar
2015-03-12 19:07   ` Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1426145571-3065-6-git-send-email-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=dsahern@gmail.com \
    --cc=jolsa@redhat.com \
    --cc=js1304@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=minchan@kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).