linux-trace-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4][WiP] trace-cmd: Add "perf" sub command
@ 2020-08-07 12:06 Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 1/4] trace-cmd: Internal refactoring of pid address map logic Tzvetomir Stoyanov (VMware)
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2020-08-07 12:06 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

A new "trace-cmd perf" sub command is implemented, to collect performance
information about a user space program, using perf. The command has one
argumemnt, mandatory, using to specify a PID of running userspace process:
 trace-cmd perf --pid <PID>
This is a work in progress, not completed yet.

What is implemented:
 - Initial infrastructure for configuring perf, using its kernel interface,
 and running per CPU threads for reading collected perf data. Perf is used 
 to collect periodically CALLCHAINs (values of IP register) of the requested
 PID.
 - trace-cmd internal APIs, wrapper around libbfd, for parsing an ELF header.
 - Resolving collected IP values to function names, using the binary file of
   the running application and all libraries that it uses.
 - New trace.dat file option for storing address to function name mapping
   and a new API for address to name resolving using stored information.

What is not yet implemented:
 - There are problems reading the perf mmap pages with collected data,
   currently only the first few records from each CPU page are read correctly.
 - Assemble the trace.dat file with all mandatory headers. Think about a new
   type of trace data in the file and its format. This should be designed
   flexible, to be able to store various perf data, not only CALLCHAINs.
 - Add support in "trace-dat report" for reporting the new data.
 - Add APIs in libtracecmd for reading the new data.
 - Enable collecting of kernel functions in the CALLCHAIN and implement
   resolving of kernel address to a kernel function name.
 - Formatting and beautifying the code.
 - A lot of testing and bug fixing.

Tzvetomir Stoyanov (VMware) (4):
  trace-cmd: Internal refactoring of pid address map logic
  trace-cmd: New internal APIs for reading ELF header
  trace-cmd: Add a new option in trace.dat file for the address to
    function name mapping
  trace-cmd: Add new subcomand "trace-cmd perf"

 Makefile                       |  10 +
 include/trace-cmd/trace-cmd.h  |  13 +
 lib/trace-cmd/trace-input.c    | 144 ++++++
 tracecmd/Makefile              |   2 +
 tracecmd/include/trace-local.h |  33 +-
 tracecmd/trace-cmd.c           |   1 +
 tracecmd/trace-dump.c          |   3 +
 tracecmd/trace-obj-debug.c     | 770 +++++++++++++++++++++++++++++++++
 tracecmd/trace-perf.c          | 540 +++++++++++++++++++++++
 tracecmd/trace-record.c        | 107 +----
 10 files changed, 1510 insertions(+), 113 deletions(-)
 create mode 100644 tracecmd/trace-obj-debug.c
 create mode 100644 tracecmd/trace-perf.c

-- 
2.26.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] trace-cmd: Internal refactoring of pid address map logic
  2020-08-07 12:06 [PATCH 0/4][WiP] trace-cmd: Add "perf" sub command Tzvetomir Stoyanov (VMware)
@ 2020-08-07 12:06 ` Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 2/4] trace-cmd: New internal APIs for reading ELF header Tzvetomir Stoyanov (VMware)
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2020-08-07 12:06 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

Make functions for collecting file to memory map of a PID non static,
so they can be reused in the trace-cmd application context. Created new
file trace-obj-debug.c with these functions.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 tracecmd/Makefile              |   1 +
 tracecmd/include/trace-local.h |  19 +++--
 tracecmd/trace-obj-debug.c     | 138 +++++++++++++++++++++++++++++++++
 tracecmd/trace-record.c        | 107 +------------------------
 4 files changed, 152 insertions(+), 113 deletions(-)
 create mode 100644 tracecmd/trace-obj-debug.c

diff --git a/tracecmd/Makefile b/tracecmd/Makefile
index 5e59adf8..f9435558 100644
--- a/tracecmd/Makefile
+++ b/tracecmd/Makefile
@@ -31,6 +31,7 @@ TRACE_CMD_OBJS += trace-show.o
 TRACE_CMD_OBJS += trace-list.o
 TRACE_CMD_OBJS += trace-usage.o
 TRACE_CMD_OBJS += trace-dump.o
+TRACE_CMD_OBJS += trace-obj-debug.o
 ifeq ($(VSOCK_DEFINED), 1)
 TRACE_CMD_OBJS += trace-tsync.o
 endif
diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index d148aa16..c5c225e0 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -178,14 +178,6 @@ struct func_list {
 	const char *mod;
 };
 
-struct pid_addr_maps {
-	struct pid_addr_maps		*next;
-	struct tracecmd_proc_addr_map	*lib_maps;
-	unsigned int			nr_lib_maps;
-	char				*proc_name;
-	int				pid;
-};
-
 struct opt_list {
 	struct opt_list *next;
 	const char	*option;
@@ -314,4 +306,15 @@ void *malloc_or_die(unsigned int size); /* Can be overridden */
 void __noreturn __die(const char *fmt, ...);
 void __noreturn _vdie(const char *fmt, va_list ap);
 
+/* --- Debug symbols--- */
+struct pid_addr_maps {
+	struct pid_addr_maps	*next;
+	struct tracecmd_proc_addr_map	*lib_maps;
+	unsigned int			nr_lib_maps;
+	char				*proc_name;
+	int				pid;
+};
+int trace_debug_get_filemap(struct pid_addr_maps **file_maps, int pid);
+void trace_debug_free_filemap(struct pid_addr_maps *maps);
+
 #endif /* __TRACE_LOCAL_H */
diff --git a/tracecmd/trace-obj-debug.c b/tracecmd/trace-obj-debug.c
new file mode 100644
index 00000000..9aa9baae
--- /dev/null
+++ b/tracecmd/trace-obj-debug.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "trace-local.h"
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+int trace_debug_get_filemap(struct pid_addr_maps **pid_maps, int pid)
+{
+	struct pid_addr_maps *maps = *pid_maps;
+	struct tracecmd_proc_addr_map *map;
+	unsigned long long begin, end;
+	struct pid_addr_maps *m;
+	char mapname[PATH_MAX+1];
+	char fname[PATH_MAX+1];
+	char buf[PATH_MAX+100];
+	FILE *f;
+	int ret;
+	int res;
+	int i;
+
+	sprintf(fname, "/proc/%d/exe", pid);
+	ret = readlink(fname, mapname, PATH_MAX);
+	if (ret >= PATH_MAX || ret < 0)
+		return -ENOENT;
+	mapname[ret] = 0;
+
+	sprintf(fname, "/proc/%d/maps", pid);
+	f = fopen(fname, "r");
+	if (!f)
+		return -ENOENT;
+
+	while (maps) {
+		if (pid == maps->pid)
+			break;
+		maps = maps->next;
+	}
+
+	ret = -ENOMEM;
+	if (!maps) {
+		maps = calloc(1, sizeof(*maps));
+		if (!maps)
+			goto out_fail;
+		maps->pid = pid;
+		maps->next = *pid_maps;
+		*pid_maps = maps;
+	} else {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		free(maps->lib_maps);
+		maps->lib_maps = NULL;
+		maps->nr_lib_maps = 0;
+		free(maps->proc_name);
+	}
+
+	maps->proc_name = strdup(mapname);
+	if (!maps->proc_name)
+		goto out;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		mapname[0] = '\0';
+		res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s",
+			     &begin, &end, mapname);
+		if (res == 3 && mapname[0] != '\0') {
+			map = realloc(maps->lib_maps,
+				      (maps->nr_lib_maps + 1) * sizeof(*map));
+			if (!map)
+				goto out_fail;
+			map[maps->nr_lib_maps].end = end;
+			map[maps->nr_lib_maps].start = begin;
+			map[maps->nr_lib_maps].lib_name = strdup(mapname);
+			if (!map[maps->nr_lib_maps].lib_name)
+				goto out_fail;
+			maps->lib_maps = map;
+			maps->nr_lib_maps++;
+		}
+	}
+out:
+	fclose(f);
+	return 0;
+
+out_fail:
+	fclose(f);
+	if (maps) {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		if (*pid_maps != maps) {
+			m = *pid_maps;
+			while (m) {
+				if (m->next == maps) {
+					m->next = maps->next;
+					break;
+				}
+				m = m->next;
+			}
+		} else
+			*pid_maps = maps->next;
+		free(maps->lib_maps);
+		maps->lib_maps = NULL;
+		maps->nr_lib_maps = 0;
+		free(maps->proc_name);
+		maps->proc_name = NULL;
+		free(maps);
+	}
+	return ret;
+}
+
+static void procmap_free(struct pid_addr_maps *maps)
+{
+	int i;
+
+	if (!maps)
+		return;
+	if (maps->lib_maps) {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		free(maps->lib_maps);
+	}
+	free(maps->proc_name);
+	free(maps);
+}
+
+void trace_debug_free_filemap(struct pid_addr_maps *maps)
+{
+	struct pid_addr_maps *del;
+
+	while (maps) {
+		del = maps;
+		maps = maps->next;
+		procmap_free(del);
+	}
+}
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index bd004574..a48475b3 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -1080,109 +1080,6 @@ static char *make_pid_filter(struct buffer_instance *instance,
 	return filter;
 }
 
-#define _STRINGIFY(x) #x
-#define STRINGIFY(x) _STRINGIFY(x)
-
-static int get_pid_addr_maps(struct buffer_instance *instance, int pid)
-{
-	struct pid_addr_maps *maps = instance->pid_maps;
-	struct tracecmd_proc_addr_map *map;
-	unsigned long long begin, end;
-	struct pid_addr_maps *m;
-	char mapname[PATH_MAX+1];
-	char fname[PATH_MAX+1];
-	char buf[PATH_MAX+100];
-	FILE *f;
-	int ret;
-	int res;
-	int i;
-
-	sprintf(fname, "/proc/%d/exe", pid);
-	ret = readlink(fname, mapname, PATH_MAX);
-	if (ret >= PATH_MAX || ret < 0)
-		return -ENOENT;
-	mapname[ret] = 0;
-
-	sprintf(fname, "/proc/%d/maps", pid);
-	f = fopen(fname, "r");
-	if (!f)
-		return -ENOENT;
-
-	while (maps) {
-		if (pid == maps->pid)
-			break;
-		maps = maps->next;
-	}
-
-	ret = -ENOMEM;
-	if (!maps) {
-		maps = calloc(1, sizeof(*maps));
-		if (!maps)
-			goto out_fail;
-		maps->pid = pid;
-		maps->next = instance->pid_maps;
-		instance->pid_maps = maps;
-	} else {
-		for (i = 0; i < maps->nr_lib_maps; i++)
-			free(maps->lib_maps[i].lib_name);
-		free(maps->lib_maps);
-		maps->lib_maps = NULL;
-		maps->nr_lib_maps = 0;
-		free(maps->proc_name);
-	}
-
-	maps->proc_name = strdup(mapname);
-	if (!maps->proc_name)
-		goto out;
-
-	while (fgets(buf, sizeof(buf), f)) {
-		mapname[0] = '\0';
-		res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s",
-			     &begin, &end, mapname);
-		if (res == 3 && mapname[0] != '\0') {
-			map = realloc(maps->lib_maps,
-				      (maps->nr_lib_maps + 1) * sizeof(*map));
-			if (!map)
-				goto out_fail;
-			map[maps->nr_lib_maps].end = end;
-			map[maps->nr_lib_maps].start = begin;
-			map[maps->nr_lib_maps].lib_name = strdup(mapname);
-			if (!map[maps->nr_lib_maps].lib_name)
-				goto out_fail;
-			maps->lib_maps = map;
-			maps->nr_lib_maps++;
-		}
-	}
-out:
-	fclose(f);
-	return 0;
-
-out_fail:
-	fclose(f);
-	if (maps) {
-		for (i = 0; i < maps->nr_lib_maps; i++)
-			free(maps->lib_maps[i].lib_name);
-		if (instance->pid_maps != maps) {
-			m = instance->pid_maps;
-			while (m) {
-				if (m->next == maps) {
-					m->next = maps->next;
-					break;
-				}
-				m = m->next;
-			}
-		} else
-			instance->pid_maps = maps->next;
-		free(maps->lib_maps);
-		maps->lib_maps = NULL;
-		maps->nr_lib_maps = 0;
-		free(maps->proc_name);
-		maps->proc_name = NULL;
-		free(maps);
-	}
-	return ret;
-}
-
 static void get_filter_pid_maps(void)
 {
 	struct buffer_instance *instance;
@@ -1194,7 +1091,7 @@ static void get_filter_pid_maps(void)
 		for (p = instance->filter_pids; p; p = p->next) {
 			if (p->exclude)
 				continue;
-			get_pid_addr_maps(instance, p->pid);
+			trace_debug_get_filemap(&instance->pid_maps, p->pid);
 		}
 	}
 }
@@ -1524,7 +1421,7 @@ static void ptrace_wait(enum trace_type type)
 			case PTRACE_EVENT_EXIT:
 				instance = get_intance_fpid(pid);
 				if (instance && instance->get_procmap)
-					get_pid_addr_maps(instance, pid);
+					trace_debug_get_filemap(&instance->pid_maps, pid);
 				ptrace(PTRACE_GETEVENTMSG, pid, NULL, &cstatus);
 				ptrace(PTRACE_DETACH, pid, NULL, NULL);
 				break;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] trace-cmd: New internal APIs for reading ELF header
  2020-08-07 12:06 [PATCH 0/4][WiP] trace-cmd: Add "perf" sub command Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 1/4] trace-cmd: Internal refactoring of pid address map logic Tzvetomir Stoyanov (VMware)
@ 2020-08-07 12:06 ` Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 3/4] trace-cmd: Add a new option in trace.dat file for the address to function name mapping Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 4/4] trace-cmd: Add new subcomand "trace-cmd perf" Tzvetomir Stoyanov (VMware)
  3 siblings, 0 replies; 5+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2020-08-07 12:06 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

Implemented new trace-cmd internal APIs for parsing ELF header and
resolving VMA to function name and function name to VMA and file offset.
The bfd library is used to read and parse the binary file. The new APIs
are part of trace-cmd application and are visible only inside its
context.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 Makefile                       |  10 +
 include/trace-cmd/trace-cmd.h  |   8 +
 tracecmd/include/trace-local.h |  12 +
 tracecmd/trace-obj-debug.c     | 632 +++++++++++++++++++++++++++++++++
 4 files changed, 662 insertions(+)

diff --git a/Makefile b/Makefile
index b0340427..0d657969 100644
--- a/Makefile
+++ b/Makefile
@@ -245,6 +245,16 @@ endif
 CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
 export CUNIT_INSTALLED
 
+BFD_INSTALLED := $(shell if (echo -e "\#include <bfd.h>\n void main(){bfd_init();}" | $(CC) -xc  - -lbfd >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+
+export BFD_INSTALLED
+ifeq ($(BFD_INSTALLED), 1)
+CFLAGS += -DBFD_INSTALLED
+LIBS += -lbfd
+else
+$(warning libbfd is not installed)
+endif
+
 export CFLAGS
 export INCLUDES
 
diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h
index f3c95f30..5ebd076e 100644
--- a/include/trace-cmd/trace-cmd.h
+++ b/include/trace-cmd/trace-cmd.h
@@ -135,6 +135,14 @@ struct tracecmd_proc_addr_map {
 	char			*lib_name;
 };
 
+struct tracecmd_debug_symbols {
+	char *name;			/* symbol's name */
+	char *fname;			/* symbol's file */
+	unsigned long long vma_start;	/* symbol's start VMA */
+	unsigned long long vma_near;	/* symbol's requested VMA */
+	unsigned long long foffset;	/* symbol's offset in the binary file*/
+};
+
 typedef void (*tracecmd_show_data_func)(struct tracecmd_input *handle,
 					struct tep_record *record);
 typedef void (*tracecmd_handle_init_func)(struct tracecmd_input *handle,
diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index c5c225e0..ccae61d4 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -317,4 +317,16 @@ struct pid_addr_maps {
 int trace_debug_get_filemap(struct pid_addr_maps **file_maps, int pid);
 void trace_debug_free_filemap(struct pid_addr_maps *maps);
 
+struct trace_debug_object;
+struct trace_debug_object *trace_debug_obj_create_file(char *file);
+struct trace_debug_object *trace_debug_obj_create_pid(int pid);
+void trace_debug_obj_destroy(struct trace_debug_object *debug);
+
+int trace_debug_resolve_symbols(struct trace_debug_object *obj);
+int trace_debug_add_resolve_symbol(struct trace_debug_object *obj,
+				   unsigned long long vma, char *name);
+
+void trace_debug_walk_resolved_symbols(struct trace_debug_object *obj,
+				       int (*callback)(struct tracecmd_debug_symbols *, void *),
+				       void *context);
 #endif /* __TRACE_LOCAL_H */
diff --git a/tracecmd/trace-obj-debug.c b/tracecmd/trace-obj-debug.c
index 9aa9baae..b3bdc486 100644
--- a/tracecmd/trace-obj-debug.c
+++ b/tracecmd/trace-obj-debug.c
@@ -4,10 +4,642 @@
  *
  */
 #include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
 #include <errno.h>
+#include <bfd.h>
 #include <unistd.h>
 
 #include "trace-local.h"
+#include "trace-cmd.h"
+
+#ifdef BFD_INSTALLED
+
+struct trace_debug_handle {
+	bfd *bfd;
+	unsigned long long addr_offset;
+};
+
+struct trace_debug_vmem_range {
+	struct trace_debug_vmem_range	*next;
+	unsigned long long		start;
+	unsigned long long		end;
+};
+
+struct debug_symbols {
+	struct debug_symbols		*next;
+	struct tracecmd_debug_symbols	symbol;
+};
+
+struct trace_debug_file {
+	struct trace_debug_file		*next;
+	char				*file_name;
+	struct trace_debug_vmem_range	*vmem;
+	struct trace_debug_handle	*dbg;
+	int				sym_count;
+	struct debug_symbols		*sym; /* symbols to resolve,
+					       * look in this file only
+					       */
+};
+
+struct trace_debug_object {
+	int				pid;
+	char				*fname;
+	struct pid_addr_maps		*fmaps;
+	int				sym_count;
+	struct debug_symbols		*sym;	/* symbols to resolve,
+						 * look into all files
+						 */
+	struct trace_debug_file		*files;
+};
+
+#define RESOLVE_NAME		(1 << 0)
+#define RESOLVE_VMA		(1 << 1)
+#define RESOLVE_FOFFSET		(1 << 2)
+struct trace_obj_job {
+	unsigned int flags;
+	unsigned long long addr_offset;
+	struct debug_symbols *symbols;
+};
+
+struct dwarf_bfd_context {
+	asymbol **table;
+	struct trace_obj_job *job;
+};
+
+static void bfd_dwarf_section(bfd *abfd, asection *section, void *param)
+{
+	struct dwarf_bfd_context *context = (struct dwarf_bfd_context *)param;
+	unsigned int discriminator;
+	const char *functionname;
+	struct debug_symbols *s;
+	unsigned long long vma;
+	const char *filename;
+	unsigned int line;
+	bfd_boolean found;
+
+	if (!(section->flags & SEC_CODE))
+		return;
+
+	for (s = context->job->symbols; s; s = s->next) {
+		if (!s->symbol.vma_near)
+			continue;
+		if (abfd->flags & DYNAMIC)
+			vma = s->symbol.vma_near - context->job->addr_offset;
+		else
+			vma = s->symbol.vma_near;
+		if (vma && section->vma <= vma &&
+		    (section->vma + section->size) > vma) {
+			if (!s->symbol.fname)
+				s->symbol.fname = strdup(abfd->filename);
+			if (context->job->flags & RESOLVE_FOFFSET)
+				s->symbol.foffset = section->filepos + (vma - section->vma);
+			if (!s->symbol.name && (context->job->flags & RESOLVE_NAME)) {
+				found = bfd_find_nearest_line_discriminator(abfd, section, context->table,
+									    vma - section->vma, &filename,
+									    &functionname, &line, &discriminator);
+				if (found)
+					s->symbol.name = strdup(functionname);
+			}
+		}
+	}
+}
+
+static asymbol **get_sym_table(bfd *handle)
+{
+	bfd_boolean dyn = FALSE;
+	asymbol **symtable;
+	long count;
+	long size;
+
+	if ((bfd_get_file_flags(handle) & HAS_SYMS) == 0)
+		return NULL;
+	size = bfd_get_symtab_upper_bound(handle);
+	if (size == 0) {
+		size = bfd_get_dynamic_symtab_upper_bound(handle);
+		dyn = TRUE;
+	}
+	if (size <= 0)
+		return NULL;
+
+	symtable = (asymbol **) calloc(1, size);
+	if (!symtable)
+		return NULL;
+	if (dyn)
+		count = bfd_canonicalize_dynamic_symtab(handle, symtable);
+	else
+		count = bfd_canonicalize_symtab(handle, symtable);
+	if (count <= 0) {
+		free(symtable);
+		return NULL;
+	}
+/*
+ *	 alloc = bfd_demangle(cur_bfd, name, demangle_flags);
+ */
+
+	return symtable;
+}
+
+static int bfd_symlookup(struct dwarf_bfd_context *context)
+{
+	struct debug_symbols *s;
+	asymbol **sp;
+	int res = 0;
+
+	for (sp = context->table; *sp != NULL; sp++) {
+		if (!((*sp)->flags & BSF_FUNCTION))
+			continue;
+		for (s = context->job->symbols; s; s = s->next) {
+			if (!s->symbol.name ||
+			    (strlen(s->symbol.name) != strlen((*sp)->name)))
+				continue;
+			if (!strcmp(s->symbol.name, (*sp)->name)) {
+				s->symbol.vma_start = (*sp)->value + (*sp)->section->vma;
+				if ((*sp)->the_bfd->flags & DYNAMIC)
+					s->symbol.vma_start += context->job->addr_offset;
+				res++;
+			}
+		}
+	}
+	return res;
+}
+
+static int bfd_process_object(bfd *abfd, struct trace_obj_job *job)
+{
+	struct dwarf_bfd_context context;
+	int ret = 0;
+
+	memset(&context, 0, sizeof(context));
+	context.job = job;
+
+	if (bfd_check_format_matches(abfd, bfd_object, NULL) ||
+	    bfd_check_format_matches(abfd, bfd_core, NULL)) {
+		context.table = get_sym_table(abfd);
+		if (job->flags & RESOLVE_VMA)
+			bfd_symlookup(&context);
+		if ((job->flags & RESOLVE_NAME) || (job->flags & RESOLVE_FOFFSET))
+			bfd_map_over_sections(abfd, bfd_dwarf_section, &context);
+		free(context.table);
+	} else {
+		ret = -1;
+	}
+
+	return ret;
+}
+
+static int bfd_read_all(bfd *abfd, struct trace_obj_job *job)
+{
+	bfd *last_arfile = NULL;
+	bfd *arfile = NULL;
+	int ret = 0;
+
+	if (bfd_check_format(abfd, bfd_archive)) {
+		for (;;) {
+			bfd_set_error(bfd_error_no_error);
+			arfile = bfd_openr_next_archived_file(abfd, arfile);
+			if (!arfile) {
+				if (bfd_get_error() != bfd_error_no_more_archived_files)
+					break;
+			}
+			ret = bfd_read_all(arfile, job);
+			if (last_arfile)
+				bfd_close(last_arfile);
+			last_arfile = arfile;
+		}
+		if (last_arfile)
+			bfd_close(last_arfile);
+	} else
+		ret = bfd_process_object(abfd, job);
+
+	return ret;
+}
+
+/**
+ * resolve_symbol_vma - name -> (vma, file offset) resolving
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ * @symbols - link list with desired symbols, with given name
+ *
+ * Get VMA and file offset of the symbols with given name
+ * Return 0 on success, -1 on error
+ */
+static int resolve_symbol_vma(struct trace_debug_handle *obj,
+			      struct debug_symbols *symbols)
+{
+	struct trace_obj_job job;
+	int ret;
+
+	memset(&job, 0, sizeof(job));
+	job.flags |= RESOLVE_VMA;
+	job.flags |= RESOLVE_FOFFSET;
+	job.symbols = symbols;
+	job.addr_offset = obj->addr_offset;
+	ret = bfd_read_all(obj->bfd, &job);
+
+	return ret;
+}
+
+/**
+ * resolve_symbol_name - vma -> name resolving
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ * @symbols - link list with desired symbols, with given VMA
+ *
+ * Get names of the symbols with given VMA, look for nearest symbol to that VMA
+ * Return 0 on success, -1 on error
+ */
+static int resolve_symbol_name(struct trace_debug_handle *obj,
+			       struct debug_symbols *symbols)
+{
+	struct trace_obj_job job;
+
+	if (!obj || !obj->bfd)
+		return -1;
+	memset(&job, 0, sizeof(job));
+	job.flags |= RESOLVE_NAME;
+	job.addr_offset = obj->addr_offset;
+	job.symbols = symbols;
+	return bfd_read_all(obj->bfd, &job);
+}
+
+/**
+ * debug_handle_destroy - Close file opened with trace_obj_debug_create()
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ *
+ * Close the file and free any allocated resources, related to file's debug
+ * information
+ */
+static void debug_handle_destroy(struct trace_debug_handle *obj)
+{
+	if (obj && obj->bfd)
+		bfd_close(obj->bfd);
+	free(obj);
+}
+
+/**
+ * debug_handle_create - Open binary file for parsing ELF and DWARF information
+ * @name: Name of the binary ELF file.
+ *
+ * Return pointer to trace_obj_debug structure, that can be passed to other APIs
+ * for extracting debug information from the file. NULL in case of an error.
+ */
+static struct trace_debug_handle *debug_handle_create(char *file)
+{
+	struct trace_debug_handle *obj = NULL;
+
+	obj = calloc(1, sizeof(*obj));
+	if (!obj)
+		return NULL;
+
+	bfd_init();
+	obj->bfd = bfd_openr(file, NULL);
+	if (!obj->bfd)
+		goto error;
+	obj->bfd->flags |= BFD_DECOMPRESS;
+
+	return obj;
+
+error:
+	debug_handle_destroy(obj);
+	return NULL;
+}
+
+static void set_vma_offset(struct trace_debug_handle *obj,
+				unsigned long long addr_offset)
+{
+	if (obj)
+		obj->addr_offset = addr_offset;
+}
+
+static char *get_full_name(int pid)
+{
+	char mapname[PATH_MAX+1];
+	char fname[PATH_MAX+1];
+	int ret;
+
+	sprintf(fname, "/proc/%d/exe", pid);
+	ret = readlink(fname, mapname, PATH_MAX);
+	if (ret >= PATH_MAX || ret < 0)
+		return NULL;
+	mapname[ret] = 0;
+
+	return strdup(mapname);
+}
+
+static struct trace_debug_file *get_mapped_file(struct trace_debug_object *dbg, char *fname)
+{
+	struct trace_debug_file *file = dbg->files;
+
+	while (file) {
+		if (!strcmp(fname, file->file_name))
+			break;
+		file = file->next;
+	}
+	if (file)
+		return file;
+
+	file = calloc(1, sizeof(*file));
+	if (!file)
+		return NULL;
+	file->file_name = strdup(fname);
+	file->dbg = debug_handle_create(fname);
+	file->next = dbg->files;
+	dbg->files = file;
+	return file;
+}
+
+void trace_debug_obj_destroy(struct trace_debug_object *dbg)
+{
+	struct trace_debug_vmem_range *mdel;
+	struct trace_debug_file *fdel;
+	struct debug_symbols *sdel;
+
+	while (dbg->sym) {
+		sdel = dbg->sym;
+		dbg->sym = dbg->sym->next;
+		free(sdel->symbol.name);
+		free(sdel->symbol.fname);
+		free(sdel);
+	}
+	while (dbg->files) {
+		fdel = dbg->files;
+		dbg->files = dbg->files->next;
+		debug_handle_destroy(fdel->dbg);
+		while (fdel->sym) {
+			sdel = fdel->sym;
+			fdel->sym = fdel->sym->next;
+			free(sdel->symbol.name);
+			free(sdel->symbol.fname);
+			free(sdel);
+		}
+		while (fdel->vmem) {
+			mdel = fdel->vmem;
+			fdel->vmem = fdel->vmem->next;
+			free(mdel);
+		}
+		free(fdel);
+	}
+
+	free(dbg->fname);
+	trace_debug_free_filemap(dbg->fmaps);
+	free(dbg);
+}
+
+struct trace_debug_object *trace_debug_obj_create_pid(int pid)
+{
+	struct trace_debug_vmem_range *mem;
+	struct trace_debug_object *dbg;
+	struct trace_debug_file *file;
+	int i;
+
+	dbg = calloc(1, sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	dbg->pid = pid;
+	dbg->fname = get_full_name(pid);
+	trace_debug_get_filemap(&dbg->fmaps, pid);
+
+	for (i = 0; i < dbg->fmaps->nr_lib_maps; i++) {
+		file = get_mapped_file(dbg, dbg->fmaps->lib_maps[i].lib_name);
+		if (!file)
+			goto error;
+		if (file->vmem && file->vmem->end == dbg->fmaps->lib_maps[i].start) {
+			file->vmem->end = dbg->fmaps->lib_maps[i].end;
+		} else {
+			mem = calloc(1, sizeof(*mem));
+			if (!mem)
+				goto error;
+			mem->start = dbg->fmaps->lib_maps[i].start;
+			mem->end = dbg->fmaps->lib_maps[i].end;
+			mem->next = file->vmem;
+			file->vmem = mem;
+			set_vma_offset(file->dbg, mem->start);
+		}
+	}
+	return dbg;
+
+error:
+	trace_debug_obj_destroy(dbg);
+	return NULL;
+}
+
+struct trace_debug_object *trace_debug_obj_create_file(char *fname)
+{
+	struct trace_debug_object *dbg;
+	struct trace_debug_file *file;
+
+	dbg = calloc(1, sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	dbg->fname = strdup(fname);
+	file = get_mapped_file(dbg, fname);
+	if (!file)
+		goto error;
+
+	return dbg;
+
+error:
+	trace_debug_obj_destroy(dbg);
+	return NULL;
+}
+
+static void set_unknown(struct debug_symbols *sym, char *file)
+{
+	while (sym) {
+		if (!sym->symbol.fname)
+			sym->symbol.fname = strdup(file);
+		sym = sym->next;
+	}
+}
+
+int trace_debug_resolve_symbols(struct trace_debug_object *obj)
+{
+	struct trace_debug_file *file;
+
+	for (file = obj->files; file; file = file->next) {
+		if (!file->dbg) {
+			set_unknown(file->sym, file->file_name);
+			continue;
+		}
+		/* near VMA -> name resolving */
+		resolve_symbol_name(file->dbg, file->sym);
+		/* name -> exact VMA resolving */
+		resolve_symbol_vma(file->dbg, file->sym);
+		resolve_symbol_vma(file->dbg, obj->sym);
+	}
+
+	return 0;
+}
+
+static int add_resolve_vma2name(struct trace_debug_object *obj,
+				unsigned long long vma)
+{
+	struct trace_debug_vmem_range *vmem;
+	struct debug_symbols *s = NULL;
+	struct trace_debug_file *file;
+
+	file = obj->files;
+	while (file) {
+		if (!file->vmem)
+			break;
+		vmem = file->vmem;
+		while (vmem) {
+			if (vma >= vmem->start && vma <= vmem->end)
+				break;
+			vmem = vmem->next;
+		}
+		if (vmem)
+			break;
+		file = file->next;
+	}
+	if (file) {
+		s = file->sym;
+		while (s) {
+			if (s->symbol.vma_near == vma)
+				break;
+			s = s->next;
+		}
+		if (!s) {
+			s = calloc(1, sizeof(*s));
+			if (!s)
+				return -1;
+			s->symbol.vma_near = vma;
+			s->next = file->sym;
+			file->sym = s;
+			file->sym_count++;
+		}
+	}
+
+	if (s)
+		return 0;
+	return -1;
+}
+
+static int add_resolve_name2vma(struct trace_debug_object *obj, char *name)
+{
+	struct debug_symbols *s = NULL;
+
+	s = obj->sym;
+	while (s) {
+		if (s->symbol.name && !strcmp(name, s->symbol.name))
+			break;
+		s = s->next;
+	}
+	if (!s) {
+		s = calloc(1, sizeof(*s));
+		if (!s)
+			return -1;
+		s->symbol.name = strdup(name);
+		if (!s->symbol.name) {
+			free(s);
+			return -1;
+		}
+		s->next = obj->sym;
+		obj->sym = s;
+		obj->sym_count++;
+	}
+
+	return 0;
+}
+
+int trace_debug_add_resolve_symbol(struct trace_debug_object *obj,
+				   unsigned long long vma, char *name)
+{
+	int ret = -1;
+
+	if (!obj)
+		return -1;
+
+	if (!name && vma) /* vma -> name resolving */
+		ret = add_resolve_vma2name(obj, vma);
+	else if (name) /* name -> vma resolving */
+		ret = add_resolve_name2vma(obj, name);
+
+	return ret;
+}
+
+static int walk_symbols(struct debug_symbols *sym,
+			int (*callback)(struct tracecmd_debug_symbols *, void *),
+			void *context)
+{
+	while (sym) {
+		if (callback(&sym->symbol, context))
+			return -1;
+		sym = sym->next;
+	}
+
+	return 0;
+}
+
+void trace_debug_walk_resolved_symbols(struct trace_debug_object *obj,
+				       int (*callback)(struct tracecmd_debug_symbols *, void *),
+				       void *context)
+{
+	struct trace_debug_file *file;
+
+	walk_symbols(obj->sym, callback, context);
+	file = obj->files;
+	while (file) {
+		walk_symbols(file->sym, callback, context);
+		file = file->next;
+	}
+}
+
+
+void trace_debug_free_symbols(struct tracecmd_debug_symbols *symbols, int count)
+{
+	int i;
+
+	if (!symbols)
+		return;
+
+	for (i = 0; i < count; i++) {
+		free(symbols[i].name);
+		free(symbols[i].fname);
+	}
+	free(symbols);
+
+}
+#else
+int trace_debug_resolve_symbols(struct trace_debug_object *obj)
+{
+	return -1;
+}
+
+int trace_debug_add_resolve_symbol(struct trace_debug_object *obj,
+				   unsigned long long vma, char *name)
+{
+	return -1;
+}
+
+void trace_debug_walk_resolved_symbols(struct trace_debug_object *obj,
+				       int (*callback)(struct tracecmd_debug_symbols *, void *),
+				       void *context)
+{
+
+}
+
+void trace_debug_free_symbols(struct tracecmd_debug_symbols *symbols, int count)
+{
+
+}
+
+void trace_debug_obj_destroy(struct trace_debug_object *debug)
+{
+
+}
+
+struct trace_debug_object *trace_debug_obj_create_file(char *file)
+{
+	return NULL;
+}
+struct trace_debug_object *trace_debug_obj_create_pid(int pid)
+{
+	return NULL;
+}
+
+#endif
 
 #define _STRINGIFY(x) #x
 #define STRINGIFY(x) _STRINGIFY(x)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] trace-cmd: Add a new option in trace.dat file for the address to function name mapping
  2020-08-07 12:06 [PATCH 0/4][WiP] trace-cmd: Add "perf" sub command Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 1/4] trace-cmd: Internal refactoring of pid address map logic Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 2/4] trace-cmd: New internal APIs for reading ELF header Tzvetomir Stoyanov (VMware)
@ 2020-08-07 12:06 ` Tzvetomir Stoyanov (VMware)
  2020-08-07 12:06 ` [PATCH 4/4] trace-cmd: Add new subcomand "trace-cmd perf" Tzvetomir Stoyanov (VMware)
  3 siblings, 0 replies; 5+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2020-08-07 12:06 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

There are cases where resolving a user space address recorded in the
trace file to function name is required, when this file is visualised.
This mapping is available during the trace. A new trace.dat file option
TRACECMD_OPTION_PID_SYMBOLS is added, for storing the mapping in the
file. A new API is introduced, to resolve address to name, using
information stored in the TRACECMD_OPTION_PID_SYMBOLS option:

 struct tracecmd_debug_symbols *
 tracecmd_search_task_symbol(struct tracecmd_input *handle,
			     int pid, unsigned long long addr);

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 include/trace-cmd/trace-cmd.h |   5 ++
 lib/trace-cmd/trace-input.c   | 144 ++++++++++++++++++++++++++++++++++
 tracecmd/trace-dump.c         |   3 +
 3 files changed, 152 insertions(+)

diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h
index 5ebd076e..f47f6a19 100644
--- a/include/trace-cmd/trace-cmd.h
+++ b/include/trace-cmd/trace-cmd.h
@@ -112,6 +112,7 @@ enum {
 	TRACECMD_OPTION_TRACEID,
 	TRACECMD_OPTION_TIME_SHIFT,
 	TRACECMD_OPTION_GUEST,
+	TRACECMD_OPTION_PID_SYMBOLS,
 };
 
 enum {
@@ -262,6 +263,10 @@ unsigned int tracecmd_record_ts_delta(struct tracecmd_input *handle,
 struct tracecmd_proc_addr_map *
 tracecmd_search_task_map(struct tracecmd_input *handle,
 			 int pid, unsigned long long addr);
+
+struct tracecmd_debug_symbols *
+tracecmd_search_task_symbol(struct tracecmd_input *handle,
+			    int pid, unsigned long long addr);
 #ifndef SWIG
 /* hack for function graph work around */
 extern __thread struct tracecmd_input *tracecmd_curr_thread_handle;
diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
index af97883e..39da0cb8 100644
--- a/lib/trace-cmd/trace-input.c
+++ b/lib/trace-cmd/trace-input.c
@@ -19,6 +19,8 @@
 #include "trace-local.h"
 #include "kbuffer.h"
 #include "list.h"
+#include "trace-hash.h"
+#include "trace-hash-local.h"
 
 #define _STRINGIFY(x) #x
 #define STRINGIFY(x) _STRINGIFY(x)
@@ -98,6 +100,18 @@ struct host_trace_info {
 	struct ts_offset_sample	*ts_samples;
 };
 
+struct pid_function_map {
+	struct trace_hash_item hash;
+	struct tracecmd_debug_symbols symb;
+};
+
+struct pid_symbol_maps {
+	struct pid_symbol_maps *next;
+	int pid;
+	int symb_count;
+	struct trace_hash symbols;
+};
+
 struct tracecmd_input {
 	struct tep_handle	*pevent;
 	struct tep_plugin_list	*plugin_list;
@@ -130,6 +144,7 @@ struct tracecmd_input {
 
 	struct hook_list	*hooks;
 	struct pid_addr_maps	*pid_maps;
+	struct pid_symbol_maps	*pid_symbols;
 	/* file information */
 	size_t			header_files_start;
 	size_t			ftrace_files_start;
@@ -2430,6 +2445,92 @@ static int trace_pid_map_search(const void *a, const void *b)
 	return 0;
 }
 
+static void trace_pid_symbols_free(struct pid_symbol_maps *maps)
+{
+	struct pid_symbol_maps *del;
+	struct pid_function_map *fmap;
+	struct trace_hash_item **bucket;
+	struct trace_hash_item *item;
+
+	while (maps) {
+		del = maps;
+		maps = maps->next;
+		trace_hash_for_each_bucket(bucket, &del->symbols) {
+			trace_hash_while_item(item, bucket) {
+				trace_hash_del(item);
+				fmap = (struct pid_function_map *)item;
+				free(fmap->symb.name);
+				free(fmap->symb.fname);
+				free(fmap);
+			}
+		}
+		trace_hash_free(&del->symbols);
+	}
+}
+
+#define MAX_FUNC_NAME	256
+static int trace_pid_symbols_load(struct tracecmd_input *handle, char *buf)
+{
+	struct pid_symbol_maps *maps = NULL;
+	struct pid_function_map *s;
+	char file[PATH_MAX];
+	char fname[MAX_FUNC_NAME];
+	char *line;
+	int res;
+	int ret;
+	int i;
+
+	maps = calloc(1, sizeof(*maps));
+	if (!maps)
+		return -ENOMEM;
+
+	ret  = -EINVAL;
+	line = strchr(buf, '\n');
+	if (!line)
+		goto out_fail;
+
+	*line = '\0';
+	res = sscanf(buf, "%x %d", &maps->pid, &maps->symb_count);
+	if (res != 2)
+		goto out_fail;
+
+	ret  = -ENOMEM;
+	if (trace_hash_init(&maps->symbols, 64) < 0)
+		goto out_fail;
+
+	buf = line + 1;
+	line = strchr(buf, '\n');
+	for (i = 0; i < maps->symb_count; i++) {
+		if (!line)
+			break;
+		*line = '\0';
+		s = calloc(1, sizeof(*s));
+		if (!s)
+			goto out_fail;
+		res = sscanf(buf, "%"STRINGIFY(PATH_MAX)"s %"STRINGIFY(MAX_FUNC_NAME)"s %llx %llx",
+				file, fname, &s->symb.vma_start, &s->symb.vma_near);
+		if (res != 4)
+			break;
+		s->symb.fname = strdup(file);
+		s->symb.name = strdup(fname);
+		if (!s->symb.fname || !s->symb.name)
+			goto out_fail;
+		s->hash.key = trace_hash(s->symb.vma_near);
+		trace_hash_add(&maps->symbols, (struct trace_hash_item *)s);
+		buf = line + 1;
+		line = strchr(buf, '\n');
+	}
+
+	maps->next = handle->pid_symbols;
+	handle->pid_symbols = maps;
+
+	return 0;
+
+out_fail:
+	trace_pid_symbols_free(maps);
+	return ret;
+}
+
 /**
  * tracecmd_search_task_map - Search task memory address map
  * @handle: input handle to the trace.dat file
@@ -2469,6 +2570,43 @@ tracecmd_search_task_map(struct tracecmd_input *handle,
 	return lib;
 }
 
+/**
+ * tracecmd_search_task_symbol - Resolve address to function name
+ * @handle: input handle to the trace.dat file
+ * @pid: pid of the task, which function information is stored in the trace.dat
+ * @addr: address of the function
+ *
+ * Mapping of some functions to addresses of traced PIDs can be saved in the
+ * trace.dat file, using the "perf" sub command. If there is such information,
+ * this API can be used to look up into this function maps to find the name of
+ * the function and the name of the file where that function is loaded.
+ *
+ * A pointer to struct tracecmd_debug_symbols is returned, containing information
+ * about the reuquested @addr: the name of the function, its start address, the
+ * name of the binary file that contains the function.
+ */
+struct tracecmd_debug_symbols *
+tracecmd_search_task_symbol(struct tracecmd_input *handle,
+			    int pid, unsigned long long addr)
+{
+	struct pid_function_map *symb = NULL;
+	struct pid_symbol_maps	*map;
+
+	for (map = handle->pid_symbols; map; map = map->next) {
+		if (pid == map->pid)
+			break;
+	}
+
+	if (map)
+		symb = (struct pid_function_map *)trace_hash_find(&map->symbols,
+								  trace_hash(addr), NULL, NULL);
+
+	if (symb)
+		return &symb->symb;
+
+	return NULL;
+}
+
 static int handle_options(struct tracecmd_input *handle)
 {
 	long long offset;
@@ -2610,6 +2748,10 @@ static int handle_options(struct tracecmd_input *handle)
 		case TRACECMD_OPTION_GUEST:
 			trace_guest_load(handle, buf, size);
 			break;
+		case TRACECMD_OPTION_PID_SYMBOLS:
+			if (buf[size-1] == '\0')
+				trace_pid_symbols_load(handle, buf);
+			break;
 		default:
 			warning("unknown option %d", option);
 			break;
@@ -3352,6 +3494,8 @@ void tracecmd_close(struct tracecmd_input *handle)
 
 	trace_pid_map_free(handle->pid_maps);
 	handle->pid_maps = NULL;
+	trace_pid_symbols_free(handle->pid_symbols);
+	handle->pid_symbols = NULL;
 
 	trace_tsync_offset_free(&handle->host);
 	trace_guests_free(handle);
diff --git a/tracecmd/trace-dump.c b/tracecmd/trace-dump.c
index 5642f12a..5c6992c8 100644
--- a/tracecmd/trace-dump.c
+++ b/tracecmd/trace-dump.c
@@ -530,6 +530,9 @@ static void dump_options(int fd)
 		case TRACECMD_OPTION_GUEST:
 			dump_option_guest(fd, size);
 			break;
+		case TRACECMD_OPTION_PID_SYMBOLS:
+			dump_option_string(fd, size, "PIDSYMBOLS");
+			break;
 		default:
 			do_print(OPTIONS, " %d %d\t[Unknown option, size - skipping]\n",
 				 option, size);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] trace-cmd: Add new subcomand "trace-cmd perf"
  2020-08-07 12:06 [PATCH 0/4][WiP] trace-cmd: Add "perf" sub command Tzvetomir Stoyanov (VMware)
                   ` (2 preceding siblings ...)
  2020-08-07 12:06 ` [PATCH 3/4] trace-cmd: Add a new option in trace.dat file for the address to function name mapping Tzvetomir Stoyanov (VMware)
@ 2020-08-07 12:06 ` Tzvetomir Stoyanov (VMware)
  3 siblings, 0 replies; 5+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2020-08-07 12:06 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

The new sub command uses perf to collect performance information of the
selected process. It has one mandatory argument:
trace-cmd perf --pid <PID>

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 tracecmd/Makefile              |   1 +
 tracecmd/include/trace-local.h |   2 +
 tracecmd/trace-cmd.c           |   1 +
 tracecmd/trace-perf.c          | 540 +++++++++++++++++++++++++++++++++
 4 files changed, 544 insertions(+)
 create mode 100644 tracecmd/trace-perf.c

diff --git a/tracecmd/Makefile b/tracecmd/Makefile
index f9435558..64cd7430 100644
--- a/tracecmd/Makefile
+++ b/tracecmd/Makefile
@@ -32,6 +32,7 @@ TRACE_CMD_OBJS += trace-list.o
 TRACE_CMD_OBJS += trace-usage.o
 TRACE_CMD_OBJS += trace-dump.o
 TRACE_CMD_OBJS += trace-obj-debug.o
+TRACE_CMD_OBJS += trace-perf.o
 ifeq ($(VSOCK_DEFINED), 1)
 TRACE_CMD_OBJS += trace-tsync.o
 endif
diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index ccae61d4..2ab915ac 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -101,6 +101,8 @@ void trace_usage(int argc, char **argv);
 
 void trace_dump(int argc, char **argv);
 
+void trace_perf(int argc, char **argv);
+
 int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
 		       int cpus, int *fds,
 		       int argc, char **argv, bool use_fifos,
diff --git a/tracecmd/trace-cmd.c b/tracecmd/trace-cmd.c
index 7376c5a5..0c5b324f 100644
--- a/tracecmd/trace-cmd.c
+++ b/tracecmd/trace-cmd.c
@@ -104,6 +104,7 @@ struct command commands[] = {
 	{"list", trace_list},
 	{"help", trace_usage},
 	{"dump", trace_dump},
+	{"perf", trace_perf},
 	{"-h", trace_usage},
 };
 
diff --git a/tracecmd/trace-perf.c b/tracecmd/trace-perf.c
new file mode 100644
index 00000000..0b100aa0
--- /dev/null
+++ b/tracecmd/trace-perf.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <sys/mman.h>
+
+
+#include "trace-local.h"
+#include "trace-cmd-local.h"
+
+#define BUF_SHIFT	8
+
+struct perf_stat {
+	__u64 count;
+	__u64 time_enabled;
+	__u64 time_running;
+	__u64 id;
+};
+
+struct perf_cpu {
+	int cpu;
+	int fd;
+	char *file;
+	pthread_t tid;
+	struct perf_event_mmap_page *mpage;
+};
+
+struct perf_session {
+	int pid;
+	int cpu_count;
+	struct perf_cpu *cpus;
+	struct trace_debug_object *fdebug;
+};
+
+static long perf_event_open(struct perf_event_attr *event, pid_t pid,
+			    int cpu, int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, event, pid, cpu, group_fd, flags);
+}
+
+/*
+ * struct {
+ *	struct perf_event_header	header;
+ *
+ *	#
+ *	# Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+ *	# The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+ *	# is fixed relative to header.
+ *	#
+ *
+ *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
+ *	{ u64			ip;	  } && PERF_SAMPLE_IP
+ *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+ *	{ u64			time;     } && PERF_SAMPLE_TIME
+ *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+ *	{ u64			id;	  } && PERF_SAMPLE_ID
+ *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+ *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+ *	{ u64			period;   } && PERF_SAMPLE_PERIOD
+ *
+ *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+ *
+ *	{ u64			nr,
+ *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+ *
+ *	#
+ *	# The RAW record below is opaque data wrt the ABI
+ *	#
+ *	# That is, the ABI doesn't make any promises wrt to
+ *	# the stability of its content, it may vary depending
+ *	# on event, hardware, kernel version and phase of
+ *	# the moon.
+ *	#
+ *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+ *	#
+ *
+ *	{ u32			size;
+ *	  char                  data[size];}&& PERF_SAMPLE_RAW
+ *
+ *	{ u64                   nr;
+ *	  { u64	hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
+ *        { u64 from, to, flags } lbr[nr];
+ *      } && PERF_SAMPLE_BRANCH_STACK
+ *
+ *	{ u64			abi; # enum perf_sample_regs_abi
+ *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ *
+ *	{ u64			size;
+ *	  char			data[size];
+ *	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
+ *
+ *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
+ *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
+ *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
+ *	{ u64			abi; # enum perf_sample_regs_abi
+ *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+ *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
+ *	{ u64			size;
+ *	  char			data[size]; } && PERF_SAMPLE_AUX
+ * };
+ */
+
+struct perf_event_sample {
+	struct perf_event_header head;
+	uint64_t id; //PERF_SAMPLE_IDENTIFIER
+	uint32_t pid, tid; //PERF_SAMPLE_TID
+	uint64_t time; //PERF_SAMPLE_TIME
+	uint32_t cpu, res; // PERF_SAMPLE_CPU
+	uint64_t period; // PERF_SAMPLE_PERIOD
+	uint64_t nr;
+	uint64_t ips[]; // PERF_SAMPLE_CALLCHAIN
+} __attribute__((packed));
+
+static void handle_event(struct perf_event_sample *sample,
+			 struct perf_cpu *cpu, int fd)
+{
+	int i;
+#if 0
+	printf(" Sample %d/%ld %d %d\n", sample->head.size,
+					  sizeof(struct perf_event_sample),
+					  sample->head.type, sample->head.misc);
+	printf("  ID %ld, TID (%d, %d), TIME %ld, CPU (%d, %d), PERIOD %ld, STACK %ld\n    ",
+			sample->id, sample->pid, sample->tid, sample->time,
+			sample->cpu, sample->res, sample->period, sample->nr);
+#endif
+	write(fd, &sample->time, sizeof(uint64_t));
+	write(fd, &sample->nr, sizeof(uint64_t));
+	for (i = 0; i < sample->nr; i++) {
+		write(fd, &sample->ips[i], sizeof(uint64_t));
+//		printf("0x%lx ", sample->ips[i]);
+	}
+//	printf("\n");
+}
+
+static void perf_event_read(struct perf_cpu *cpu, int fd)
+{
+	struct perf_event_header *header;
+	int rd = 0, all = 0;
+	unsigned char *data;
+
+
+	data = (unsigned char *)cpu->mpage + getpagesize();
+
+	while (cpu->mpage->data_tail != cpu->mpage->data_head) {
+		while (cpu->mpage->data_tail >= BUF_SHIFT * getpagesize())
+			cpu->mpage->data_tail -= BUF_SHIFT * getpagesize();
+
+		header = (struct perf_event_header *)(data + cpu->mpage->data_tail);
+		if (header->size == 0)
+			break;
+
+		cpu->mpage->data_tail += header->size;
+
+		while (cpu->mpage->data_tail >= BUF_SHIFT * getpagesize())
+			cpu->mpage->data_tail -= BUF_SHIFT * getpagesize();
+		all++;
+		if (header->type == PERF_RECORD_SAMPLE) {
+			rd++;
+			handle_event((struct perf_event_sample *)header, cpu, fd);
+		}
+	}
+	cpu->mpage->data_tail = cpu->mpage->data_head;
+}
+
+static void perf_init_pe(struct perf_event_attr *pe)
+{
+	memset(pe, 0, sizeof(struct perf_event_attr));
+	pe->type = PERF_TYPE_HARDWARE;
+	pe->sample_type = PERF_SAMPLE_CALLCHAIN |
+			  PERF_SAMPLE_IDENTIFIER |
+			  PERF_SAMPLE_TID |
+			  PERF_SAMPLE_TIME |
+			  PERF_SAMPLE_CPU |
+			  PERF_SAMPLE_PERIOD;
+	pe->size = sizeof(struct perf_event_attr);
+	pe->config = PERF_COUNT_HW_CPU_CYCLES;
+	pe->disabled = 1;
+	pe->exclude_kernel = 1;
+	pe->freq = 1;
+	pe->sample_freq = 1000;
+	pe->inherit = 1;
+	pe->mmap = 1;
+	pe->comm = 1;
+	pe->task = 1;
+	pe->precise_ip = 1;
+	pe->sample_id_all = 1;
+//	pe->mmap2 = 1;
+	pe->comm_exec = 1;
+	pe->ksymbol = 1;
+	pe->bpf_event = 1;
+	pe->read_format = PERF_FORMAT_ID |
+			PERF_FORMAT_TOTAL_TIME_ENABLED|
+			PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+}
+
+static int perf_enable(struct perf_cpu *perf, bool enable)
+{
+	int ret;
+
+	if (enable) {
+		ret = ioctl(perf->fd, PERF_EVENT_IOC_RESET, 0);
+		ret = ioctl(perf->fd, PERF_EVENT_IOC_ENABLE, 0);
+	} else
+		ret = ioctl(perf->fd, PERF_EVENT_IOC_DISABLE, 0);
+
+	return ret;
+}
+
+static int perf_mmap(struct perf_cpu *perf)
+{
+	/* associate a buffer with the file */
+	perf->mpage = mmap(NULL, (BUF_SHIFT + 1)*getpagesize(),
+			PROT_READ | PROT_WRITE, MAP_SHARED, perf->fd, 0);
+	if (perf->mpage == MAP_FAILED)
+		return -1;
+	return 0;
+}
+
+static void perf_dump_stat(struct perf_cpu *perf)
+{
+	struct perf_stat stat;
+	int ret;
+
+	ret = read(perf->fd, &stat, sizeof(stat));
+	if (ret > 0)
+		printf("cpu %d: %lld %lld %lld %lld\n",
+			perf->cpu, stat.count,
+			stat.time_enabled, stat.time_running,
+			stat.id);
+}
+
+static void perf_session_destroy(struct perf_session *perf)
+{
+	int i;
+
+	if (perf->cpu_count && perf->cpus) {
+		for (i = 0; i < perf->cpu_count; i++) {
+			if (perf->cpus[i].fd >= 0)
+				close(perf->cpus[i].fd);
+			if (perf->cpus[i].mpage)
+				munmap(perf->cpus[i].mpage,
+				       (BUF_SHIFT + 1)*getpagesize());
+			if (perf->cpus[i].file) {
+				remove(perf->cpus[i].file);
+				free(perf->cpus[i].file);
+			}
+		}
+		free(perf->cpus);
+	}
+
+	if (perf->fdebug)
+		trace_debug_obj_destroy(perf->fdebug);
+
+	free(perf);
+}
+
+#define TMP_FILE "/tmp/perf_temp_data.XXXXXX"
+static struct perf_session *perf_session_new(int pid)
+{
+	int cpus = tracecmd_count_cpus();
+	struct perf_event_attr pe;
+	struct perf_session *perf;
+	char *template;
+	int i;
+
+	template = strdup(TMP_FILE);
+	if (!template)
+		return NULL;
+	perf = calloc(cpus, sizeof(struct perf_session));
+	if (!perf)
+		return NULL;
+	perf->fdebug = trace_debug_obj_create_pid(pid);
+	if (!perf->fdebug)
+		goto error;
+	perf->cpus = calloc(cpus, sizeof(struct perf_cpu));
+	if (!cpus)
+		goto error;
+	for (i = 0; i < cpus; i++)
+		perf->cpus[i].fd = -1;
+
+	perf_init_pe(&pe);
+	for (i = 0; i < cpus; i++) {
+		perf->cpus[i].fd = perf_event_open(&pe, pid, i, -1, 0);
+		if (perf->cpus[i].fd < 0)
+			goto error;
+		fcntl(perf->cpus[i].fd, F_SETFL, O_NONBLOCK);
+		perf->cpus[i].cpu = i;
+		if (perf_mmap(&perf->cpus[i]) < 0)
+			goto error;
+		strcpy(template, TMP_FILE);
+		mktemp(template);
+		perf->cpus[i].file = strdup(template);
+	}
+	perf->cpu_count = cpus;
+	perf->pid = pid;
+	free(template);
+	return perf;
+error:
+	free(template);
+	perf_session_destroy(perf);
+	return NULL;
+}
+
+void *perf_read_thread(void *context)
+{
+	struct perf_cpu *cpu = (struct perf_cpu *)context;
+	struct perf_stat stat;
+	fd_set rset;
+	int ret;
+	int fd;
+
+	fd = open(cpu->file, O_WRONLY|O_CREAT|O_TRUNC, 0600);
+	if (fd < 0)
+		return NULL;
+
+	perf_enable(cpu, true);
+
+	while (1) {
+//		FD_ZERO(&rset);
+//		FD_SET(cpu->fd, &rset);
+//		ret = select(cpu->fd + 1, &rset, NULL, NULL, NULL);
+		ret = read(cpu->fd, &stat, sizeof(stat));
+//		if ( ret > 0 && stat.count > 0) {
+//			printf("Got %lld on CPU %d\n", stat.count, cpu->cpu);
+			perf_event_read(cpu, fd);
+//		}
+	}
+	return NULL;
+}
+
+#define PERF_READER_SCHEDULER SCHED_RR
+static int perf_run_readers(struct perf_session *perf)
+{
+	struct sched_param sched;
+	pthread_attr_t attr;
+	int i;
+
+	sched.sched_priority = sched_get_priority_max(PERF_READER_SCHEDULER);
+	pthread_attr_init(&attr);
+	pthread_attr_setschedpolicy(&attr, PERF_READER_SCHEDULER);
+	pthread_attr_setschedparam(&attr, &sched);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+	for (i = 0; i < perf->cpu_count; i++) {
+		if (pthread_create(&perf->cpus[i].tid, &attr,
+				   perf_read_thread, &perf->cpus[i]))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int perf_stop_readers(struct perf_session *perf)
+{
+	int i;
+
+	for (i = 0; i < perf->cpu_count; i++)
+		pthread_cancel(perf->cpus[i].tid);
+
+	for (i = 0; i < perf->cpu_count; i++)
+		pthread_join(perf->cpus[i].tid, NULL);
+
+	return 0;
+}
+
+static void perf_collect_results(struct perf_session *perf, int cpu)
+{
+	unsigned long long time, count, ip;
+	int fd, i;
+	int ret;
+
+	fd = open(perf->cpus[cpu].file, O_RDONLY, 0600);
+	if (fd < 0)
+		return;
+	do {
+		ret = read(fd, &time, sizeof(time));
+		if (ret != sizeof(time))
+			break;
+
+		ret = read(fd, &count, sizeof(count));
+		if (ret != sizeof(count))
+			break;
+
+		printf(" %lld %lld: ", time, count);
+		for (i = 0; i < count; i++) {
+			ret = read(fd, &ip, sizeof(ip));
+			if (ret != sizeof(ip))
+				break;
+			ret = trace_debug_add_resolve_symbol(perf->fdebug, ip, NULL);
+			if (ret < 0)
+				printf("0x%llx(unknown) ", ip);
+			else
+				printf("0x%llx ", ip);
+		}
+		printf("\n");
+		if (i < count)
+			break;
+	} while (1);
+	close(fd);
+}
+
+static int perf_collect(struct perf_session *perf)
+{
+	int i;
+
+	if (perf_run_readers(perf))
+		return -1;
+
+	printf("Collecting callstack of pid %d\n", perf->pid);
+	sleep(2);
+
+	for (i = 0; i < perf->cpu_count; i++)
+		perf_enable(&perf->cpus[i], false);
+
+	perf_stop_readers(perf);
+
+	for (i = 0; i < perf->cpu_count; i++) {
+		perf_dump_stat(&perf->cpus[i]);
+		perf_collect_results(perf, i);
+	}
+
+	return 0;
+}
+
+struct symb_walk_context {
+	struct trace_seq *s;
+	int count;
+};
+
+static int perf_symbols_walk(struct tracecmd_debug_symbols *symb, void *data)
+{
+	struct symb_walk_context *context = (struct symb_walk_context *)data;
+
+	trace_seq_printf(context->s, "%s %s %llx %llx\n",
+			 symb->fname, symb->name,
+			 symb->vma_start, symb->vma_near);
+	context->count++;
+
+	return 0;
+}
+
+static void
+add_pid_symbols(struct tracecmd_output *handle, struct perf_session *perf)
+{
+	struct symb_walk_context context;
+	struct trace_seq body, head;
+
+	trace_seq_init(&body);
+	trace_seq_init(&head);
+	context.s = &body;
+	context.count = 0;
+	trace_debug_walk_resolved_symbols(perf->fdebug, perf_symbols_walk, &context);
+	trace_seq_printf(&head, "%x %d\n", perf->pid, context.count);
+	trace_seq_terminate(&head);
+	trace_seq_terminate(&body);
+	trace_seq_puts(&head, body.buffer);
+	if (handle && context.count)
+		tracecmd_add_option(handle, TRACECMD_OPTION_PID_SYMBOLS,
+				    head.len + 1, head.buffer);
+	else
+		trace_seq_do_printf(&head);
+	trace_seq_destroy(&body);
+}
+
+static int perf_resolve_trace(struct perf_session *perf)
+{
+	trace_debug_resolve_symbols(perf->fdebug);
+	add_pid_symbols(NULL, perf);
+
+	return 0;
+}
+
+int perf_run(int pid)
+{
+	struct perf_session *perf;
+
+	perf = perf_session_new(pid);
+	if (perf == NULL)
+		return -1;
+
+	perf_collect(perf);
+
+	perf_resolve_trace(perf);
+
+	perf_session_destroy(perf);
+	return 0;
+}
+
+enum {
+	OPT_pid	= 255,
+};
+
+void trace_perf(int argc, char **argv)
+{
+	int pid = 0;
+	int c;
+
+
+	if (strcmp(argv[1], "perf") != 0)
+		usage(argv);
+	for (;;) {
+		int option_index = 0;
+		static struct option long_options[] = {
+			{"pid", required_argument, NULL, OPT_pid},
+			{"help", no_argument, NULL, '?'},
+			{NULL, 0, NULL, 0}
+		};
+
+		c = getopt_long (argc-1, argv+1, "+hp:",
+				long_options, &option_index);
+		if (c == -1)
+			break;
+		switch (c) {
+
+		case OPT_pid:
+		case 'p':
+			pid = atoi(optarg);
+			break;
+		case 'h':
+		case '?':
+		default:
+			usage(argv);
+		}
+	}
+	perf_run(pid);
+}
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-08-07 12:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-07 12:06 [PATCH 0/4][WiP] trace-cmd: Add "perf" sub command Tzvetomir Stoyanov (VMware)
2020-08-07 12:06 ` [PATCH 1/4] trace-cmd: Internal refactoring of pid address map logic Tzvetomir Stoyanov (VMware)
2020-08-07 12:06 ` [PATCH 2/4] trace-cmd: New internal APIs for reading ELF header Tzvetomir Stoyanov (VMware)
2020-08-07 12:06 ` [PATCH 3/4] trace-cmd: Add a new option in trace.dat file for the address to function name mapping Tzvetomir Stoyanov (VMware)
2020-08-07 12:06 ` [PATCH 4/4] trace-cmd: Add new subcomand "trace-cmd perf" Tzvetomir Stoyanov (VMware)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).