All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Jiri Olsa <jolsa@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>
Cc: Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Ingo Molnar <mingo@kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	linux-perf-users@vger.kernel.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Stephane Eranian <eranian@google.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	linux-toolchains@vger.kernel.org,
	linux-trace-devel@vger.kernel.org
Subject: [PATCH 03/17] perf annotate-data: Add find_data_type()
Date: Tue, 12 Dec 2023 16:13:09 -0800	[thread overview]
Message-ID: <20231213001323.718046-4-namhyung@kernel.org> (raw)
In-Reply-To: <20231213001323.718046-1-namhyung@kernel.org>

The find_data_type() is to get a data type from the memory access at the
given address (IP) using a register and an offset.  It requires DWARF
debug info in the DSO and searches the list of variables and function
parameters in the scope.

In a pseudo code, it does basically the following:

  find_data_type(dso, ip, reg, offset)
  {
      pc = map__rip_2objdump(ip);
      CU = dwarf_addrdie(dso->dwarf, pc);
      scopes = die_get_scopes(CU, pc);
      for_each_scope(S, scopes) {
          V = die_find_variable_by_reg(S, pc, reg);
          if (V && V.type == pointer_type) {
              T = die_get_real_type(V);
              if (offset < T.size)
                  return T;
          }
      }
      return NULL;
  }

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/Build           |   1 +
 tools/perf/util/annotate-data.c | 163 ++++++++++++++++++++++++++++++++
 tools/perf/util/annotate-data.h |  40 ++++++++
 3 files changed, 204 insertions(+)
 create mode 100644 tools/perf/util/annotate-data.c
 create mode 100644 tools/perf/util/annotate-data.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 73e3f194f949..5cf000302080 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -196,6 +196,7 @@ perf-$(CONFIG_DWARF) += probe-finder.o
 perf-$(CONFIG_DWARF) += dwarf-aux.o
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_DWARF) += debuginfo.o
+perf-$(CONFIG_DWARF) += annotate-data.o
 
 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 perf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
new file mode 100644
index 000000000000..1ddec786721c
--- /dev/null
+++ b/tools/perf/util/annotate-data.c
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Convert sample address to data type using DWARF debug info.
+ *
+ * Written by Namhyung Kim <namhyung@kernel.org>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "annotate-data.h"
+#include "debuginfo.h"
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "map_symbol.h"
+#include "strbuf.h"
+#include "symbol.h"
+
+static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
+{
+	Dwarf_Off off, next_off;
+	size_t header_size;
+
+	if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL)
+		return cu_die;
+
+	/*
+	 * There are some kernels don't have full aranges and contain only a few
+	 * aranges entries.  Fallback to iterate all CU entries in .debug_info
+	 * in case it's missing.
+	 */
+	off = 0;
+	while (dwarf_nextcu(di->dbg, off, &next_off, &header_size,
+			    NULL, NULL, NULL) == 0) {
+		if (dwarf_offdie(di->dbg, off + header_size, cu_die) &&
+		    dwarf_haspc(cu_die, pc))
+			return true;
+
+		off = next_off;
+	}
+	return false;
+}
+
+/* The type info will be saved in @type_die */
+static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
+{
+	Dwarf_Word size;
+
+	/* Get the type of the variable */
+	if (die_get_real_type(var_die, type_die) == NULL) {
+		pr_debug("variable has no type\n");
+		return -1;
+	}
+
+	/*
+	 * It expects a pointer type for a memory access.
+	 * Convert to a real type it points to.
+	 */
+	if (dwarf_tag(type_die) != DW_TAG_pointer_type ||
+	    die_get_real_type(type_die, type_die) == NULL) {
+		pr_debug("no pointer or no type\n");
+		return -1;
+	}
+
+	/* Get the size of the actual type */
+	if (dwarf_aggregate_size(type_die, &size) < 0) {
+		pr_debug("type size is unknown\n");
+		return -1;
+	}
+
+	/* Minimal sanity check */
+	if ((unsigned)offset >= size) {
+		pr_debug("offset: %d is bigger than size: %lu\n", offset, size);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* The result will be saved in @type_die */
+static int find_data_type_die(struct debuginfo *di, u64 pc,
+			      int reg, int offset, Dwarf_Die *type_die)
+{
+	Dwarf_Die cu_die, var_die;
+	Dwarf_Die *scopes = NULL;
+	int ret = -1;
+	int i, nr_scopes;
+
+	/* Get a compile_unit for this address */
+	if (!find_cu_die(di, pc, &cu_die)) {
+		pr_debug("cannot find CU for address %lx\n", pc);
+		return -1;
+	}
+
+	/* Get a list of nested scopes - i.e. (inlined) functions and blocks. */
+	nr_scopes = die_get_scopes(&cu_die, pc, &scopes);
+
+	/* Search from the inner-most scope to the outer */
+	for (i = nr_scopes - 1; i >= 0; i--) {
+		/* Look up variables/parameters in this scope */
+		if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die))
+			continue;
+
+		/* Found a variable, see if it's correct */
+		ret = check_variable(&var_die, type_die, offset);
+		break;
+	}
+
+	free(scopes);
+	return ret;
+}
+
+/**
+ * find_data_type - Return a data type at the location
+ * @ms: map and symbol at the location
+ * @ip: instruction address of the memory access
+ * @reg: register that holds the base address
+ * @offset: offset from the base address
+ *
+ * This functions searches the debug information of the binary to get the data
+ * type it accesses.  The exact location is expressed by (ip, reg, offset).
+ * It return %NULL if not found.
+ */
+struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
+					   int reg, int offset)
+{
+	struct annotated_data_type *result = NULL;
+	struct dso *dso = map__dso(ms->map);
+	struct debuginfo *di;
+	Dwarf_Die type_die;
+	struct strbuf sb;
+	u64 pc;
+
+	di = debuginfo__new(dso->long_name);
+	if (di == NULL) {
+		pr_debug("cannot get the debug info\n");
+		return NULL;
+	}
+
+	/*
+	 * IP is a relative instruction address from the start of the map, as
+	 * it can be randomized/relocated, it needs to translate to PC which is
+	 * a file address for DWARF processing.
+	 */
+	pc = map__rip_2objdump(ms->map, ip);
+	if (find_data_type_die(di, pc, reg, offset, &type_die) < 0)
+		goto out;
+
+	result = zalloc(sizeof(*result));
+	if (result == NULL)
+		goto out;
+
+	strbuf_init(&sb, 32);
+	if (die_get_typename_from_type(&type_die, &sb) < 0)
+		strbuf_add(&sb, "(unknown type)", 14);
+
+	result->type_name = strbuf_detach(&sb, NULL);
+
+out:
+	debuginfo__delete(di);
+	return result;
+}
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
new file mode 100644
index 000000000000..633147f78ca5
--- /dev/null
+++ b/tools/perf/util/annotate-data.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_ANNOTATE_DATA_H
+#define _PERF_ANNOTATE_DATA_H
+
+#include <errno.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct map_symbol;
+
+/**
+ * struct annotated_data_type - Data type to profile
+ * @type_name: Name of the data type
+ * @type_size: Size of the data type
+ *
+ * This represents a data type accessed by samples in the profile data.
+ */
+struct annotated_data_type {
+	char *type_name;
+	int type_size;
+};
+
+#ifdef HAVE_DWARF_SUPPORT
+
+/* Returns data type at the location (ip, reg, offset) */
+struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
+					   int reg, int offset);
+
+#else /* HAVE_DWARF_SUPPORT */
+
+static inline struct annotated_data_type *
+find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused,
+	       int reg __maybe_unused, int offset __maybe_unused)
+{
+	return NULL;
+}
+
+#endif /* HAVE_DWARF_SUPPORT */
+
+#endif /* _PERF_ANNOTATE_DATA_H */
-- 
2.43.0.472.g3155946c3a-goog


  parent reply	other threads:[~2023-12-13  0:13 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-13  0:13 [PATCHSET 00/17] perf tools: Introduce data type profiling (v3) Namhyung Kim
2023-12-13  0:13 ` [PATCH 01/17] perf dwarf-aux: Factor out die_get_typename_from_type() Namhyung Kim
2023-12-13  0:13 ` [PATCH 02/17] perf dwarf-regs: Add get_dwarf_regnum() Namhyung Kim
2023-12-13  0:13 ` Namhyung Kim [this message]
2023-12-13  0:13 ` [PATCH 04/17] perf annotate-data: Add dso->data_types tree Namhyung Kim
2023-12-13  0:13 ` [PATCH 05/17] perf annotate: Factor out evsel__get_arch() Namhyung Kim
2023-12-13  0:13 ` [PATCH 06/17] perf annotate: Add annotate_get_insn_location() Namhyung Kim
2023-12-13  0:13 ` [PATCH 07/17] perf annotate: Implement hist_entry__get_data_type() Namhyung Kim
2023-12-13  0:13 ` [PATCH 08/17] perf report: Add 'type' sort key Namhyung Kim
2023-12-13  0:13 ` [PATCH 09/17] perf report: Support data type profiling Namhyung Kim
2023-12-13  0:13 ` [PATCH 10/17] perf annotate-data: Add member field in the data type Namhyung Kim
2023-12-13  0:13 ` [PATCH 11/17] perf annotate-data: Update sample histogram for type Namhyung Kim
2023-12-13  0:13 ` [PATCH 12/17] perf report: Add 'typeoff' sort key Namhyung Kim
2023-12-13  0:13 ` [PATCH 13/17] perf report: Add 'symoff' " Namhyung Kim
2023-12-13  0:13 ` [PATCH 14/17] perf annotate: Add --data-type option Namhyung Kim
2023-12-13  0:13 ` [PATCH 15/17] perf annotate: Support event group display Namhyung Kim
2023-12-13  0:13 ` [PATCH 16/17] perf annotate: Add --type-stat option for debugging Namhyung Kim
2023-12-13  0:13 ` [PATCH 17/17] perf annotate: Add --insn-stat " Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231213001323.718046-4-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-toolchains@vger.kernel.org \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.