[RFC PATCH v2 0/3] trace-cruncher: Initial support for perf

All of lore.kernel.org
 help / color / mirror / Atom feed

* [RFC PATCH v2 0/3] trace-cruncher: Initial support for perf
@ 2022-04-08 10:03 Tzvetomir Stoyanov (VMware)
  2022-04-08 10:03 ` [RFC PATCH v2 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-04-08 10:03 UTC (permalink / raw)
  To: y.karadz; +Cc: rostedt, linux-trace-devel

Two major functionalities are introduced by this patch set:
 - VMA <-> function name resolving, using bfd library.
 - Support for Linux kernel perf framework, using perf library.

v2 changes:
 - Renamed a lot of APIs and internal functions, to have more consistent
   names and prefixes.
 - Added support for staring an application that is going to be traced.
 - Added support for setting the time duration of collecting the traces.
 - Handle ctrl-c into the C library instead of the user application.
 - Coding style fixes.

Tzvetomir Stoyanov (VMware) (3):
  trace-cruncher: Logic for resolving address to function name
  trace-cruncher: Support for perf
  trace-cruncher: perf example

 examples/perf_sampling.py |  53 ++
 setup.py                  |   9 +-
 src/perfpy-utils.c        | 896 ++++++++++++++++++++++++++++++++++
 src/perfpy-utils.h        |  43 ++
 src/perfpy.c              | 141 ++++++
 src/trace-obj-debug.c     | 982 ++++++++++++++++++++++++++++++++++++++
 src/trace-obj-debug.h     |  54 +++
 7 files changed, 2176 insertions(+), 2 deletions(-)
 create mode 100755 examples/perf_sampling.py
 create mode 100644 src/perfpy-utils.c
 create mode 100644 src/perfpy-utils.h
 create mode 100644 src/perfpy.c
 create mode 100644 src/trace-obj-debug.c
 create mode 100644 src/trace-obj-debug.h

-- 
2.35.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC PATCH v2 1/3] trace-cruncher: Logic for resolving address to function name
  2022-04-08 10:03 [RFC PATCH v2 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
@ 2022-04-08 10:03 ` Tzvetomir Stoyanov (VMware)
  2022-04-08 10:03 ` [RFC PATCH v2 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
  2022-04-08 10:03 ` [RFC PATCH v2 3/3] trace-cruncher: perf example Tzvetomir Stoyanov (VMware)
  2 siblings, 0 replies; 6+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-04-08 10:03 UTC (permalink / raw)
  To: y.karadz; +Cc: rostedt, linux-trace-devel

Resolving virtual address to function name and vise versa is useful
functionality for a trace application. Trace-cruncher can use it in two
use cases:
 - Resolving VMA to function name, when collecting user application
   performance traces with perf.
 - Resolving function name to VMA, when using ftarce uprobe dynamic
   events.

Proposed implementation uses the bfd library to parse the binary files
and read the symbol table. This information is available only if the
files are not stripped.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 src/trace-obj-debug.c | 982 ++++++++++++++++++++++++++++++++++++++++++
 src/trace-obj-debug.h |  54 +++
 2 files changed, 1036 insertions(+)
 create mode 100644 src/trace-obj-debug.c
 create mode 100644 src/trace-obj-debug.h

diff --git a/src/trace-obj-debug.c b/src/trace-obj-debug.c
new file mode 100644
index 0000000..a6da22f
--- /dev/null
+++ b/src/trace-obj-debug.c
@@ -0,0 +1,982 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <bfd.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <fnmatch.h>
+#include <ctype.h>
+
+#include "trace-obj-debug.h"
+
+//#define DEBUG_INTERNALS
+
+/* Got from demangle.h */
+#define DMGL_AUTO (1 << 8)
+
+struct debug_bfd_handle {
+	bfd *bfd;
+	unsigned long long addr_offset;
+};
+
+enum match_type {
+	MATCH_EXACT	= 0,
+	MATH_WILDCARD	= 1,
+};
+
+struct debug_symbols {
+	struct debug_symbols		*next;
+	struct dbg_trace_symbols	symbol;
+	enum match_type			match;
+};
+
+struct debug_file {
+	struct debug_file		*next;
+	char				*file_name;
+
+	/* Start and end address, where this file is mapped into the memory of the process. */
+	unsigned long long		vmem_start;
+	unsigned long long		vmem_end;
+
+	/* bfd library context for this file. */
+	struct debug_bfd_handle	*dbg;
+
+	/* Symbols to resolve, search in this file only. */
+	int				sym_count;
+	struct debug_symbols		*sym;
+};
+
+struct dbg_trace_context {
+	/* PID or name of the process. */
+	int				pid;
+	char				*fname;
+
+	/* List of all libraries and object files, mapped in the memory of the process. */
+	struct dbg_trace_pid_maps	*fmaps;
+
+	/* Symbols to resolve, search in all files. */
+	int				sym_count;
+	struct debug_symbols		*sym;
+
+	/* List of all libraries and object files, opened from bfd library for processing. */
+	struct debug_file		*files;
+};
+
+#define RESOLVE_NAME		(1 << 0)
+#define RESOLVE_VMA		(1 << 1)
+#define RESOLVE_FOFFSET		(1 << 2)
+
+struct debug_obj_job {
+	unsigned int flags;
+	unsigned long long addr_offset;
+	struct debug_symbols *symbols;
+};
+
+struct debug_dwarf_bfd_context {
+	asymbol **table;
+	struct debug_obj_job *job;
+};
+
+/*
+ * A hook, called from bfd library for each section of the file.
+ * The logic is used for symbol name and file offset resolving from given symbol VMA.
+ */
+static void process_bfd_section(bfd *abfd, asection *section, void *param)
+{
+	struct debug_dwarf_bfd_context *context = (struct debug_dwarf_bfd_context *)param;
+	unsigned int discriminator;
+	const char *functionname;
+	struct debug_symbols *s;
+	unsigned long long vma;
+	const char *filename;
+	unsigned int line;
+	bfd_boolean found;
+
+	/* Skip sections that have no code. */
+	if (!(section->flags & SEC_CODE))
+		return;
+
+	/* Loop through all symbols, that have to be resolved. */
+	for (s = context->job->symbols; s; s = s->next) {
+		if (s->symbol.vma_near)
+			vma = s->symbol.vma_near;
+		else if (s->symbol.vma_start)
+			vma = s->symbol.vma_start;
+		else
+			continue;
+
+		/* Adjust the symbol's VMA, if this section is dynamically loaded. */
+		if (abfd->flags & DYNAMIC)
+			vma -=  context->job->addr_offset;
+
+		/* Check if requested symbol's vma is within the current section. */
+		if (vma && section->vma <= vma &&
+		    (section->vma + section->size) > vma) {
+
+			/* Found the file, where the symbol is defined. */
+			if (!s->symbol.fname)
+				s->symbol.fname = strdup(abfd->filename);
+
+			/* Found the offset of the symbol within the file. */
+			if (context->job->flags & RESOLVE_FOFFSET)
+				s->symbol.foffset = section->filepos + (vma - section->vma);
+
+			/* Look for the nearest function. */
+			if (!s->symbol.name && (context->job->flags & RESOLVE_NAME)) {
+				found = bfd_find_nearest_line_discriminator(abfd, section, context->table,
+									    vma - section->vma, &filename,
+									    &functionname, &line, &discriminator);
+#ifdef DEBUG_INTERNALS
+				printf("Find addr near 0x%X, offset 0x%X - > vma - 0x%X in %s, found %s\n\r",
+					s->symbol.vma_near, context->job->addr_offset, vma, abfd->filename,
+					found ? functionname : "NA");
+#endif
+				if (found) {
+					/* Demangle the name of the function. */
+					s->symbol.name = bfd_demangle(abfd, functionname, DMGL_AUTO);
+					/* Found the name of the symbol. */
+					if (!s->symbol.name)
+						s->symbol.name = strdup(functionname);
+				}
+			}
+		}
+	}
+}
+
+/* Load the symbol table from the file. */
+static asymbol **get_sym_table(bfd *handle)
+{
+	long size, ssize, dsize;
+	asymbol **symtable;
+	long count;
+
+	if ((bfd_get_file_flags(handle) & HAS_SYMS) == 0)
+		return NULL;
+
+	dsize = bfd_get_dynamic_symtab_upper_bound(handle);
+	size = dsize > 0 ? dsize : 0;
+
+	ssize = bfd_get_symtab_upper_bound(handle);
+	size += ssize > 0 ? ssize : 0;
+
+	if (size <= 0)
+		return NULL;
+
+	symtable = (asymbol **) calloc(1, size);
+	if (!symtable)
+		return NULL;
+
+	count = bfd_canonicalize_symtab(handle, symtable);
+	count += bfd_canonicalize_dynamic_symtab(handle, symtable + count);
+	if (count <= 0) {
+		free(symtable);
+		return NULL;
+	}
+
+	return symtable;
+}
+
+/* Match the requested name to the name of the symbol. Handle a wildcard match. */
+static bool symbol_match(char *pattern, enum match_type match, const char *symbol)
+{
+	bool ret = false;
+
+	switch (match) {
+	case MATCH_EXACT:
+		if (strlen(pattern) == strlen(symbol) &&
+		    !strcmp(pattern, symbol))
+			ret = true;
+		break;
+	case MATH_WILDCARD:
+		if (!fnmatch(pattern, symbol, 0))
+			ret = true;
+		break;
+	}
+
+	return ret;
+}
+
+/* Lookup in the file's symbol table.
+ * The logic is used for symbol VMA resolving from given symbol name.
+ */
+static int lookup_bfd_sym(struct debug_dwarf_bfd_context *context)
+{
+	struct debug_symbols *s, *last = NULL;
+	struct debug_symbols *new, *new_list = NULL;
+	unsigned long long vma;
+	asymbol **sp;
+	int res = 0;
+
+	for (sp = context->table; *sp != NULL; sp++) {
+		/* Skip the symbol, if it is not a function. */
+		if (!((*sp)->flags & BSF_FUNCTION))
+			continue;
+		/* Loop through all symbols that should be resolved. */
+		for (s = context->job->symbols; s; s = s->next) {
+			if (!s->symbol.name)
+				continue;
+			last = s;
+			if (!symbol_match(s->symbol.name, s->match, (*sp)->name))
+				continue;
+#ifdef DEBUG_INTERNALS
+			printf("Matched %s, pattern %s\n\r", (*sp)->name, s->symbol.name);
+#endif
+			vma = (*sp)->value + (*sp)->section->vma;
+			/* Adjust the VMA, if the section is dynamically loaded. */
+			if ((*sp)->the_bfd->flags & DYNAMIC)
+				vma += context->job->addr_offset;
+			if (s->match == MATCH_EXACT) {
+				/* Exact match, update the VMA. */
+				s->symbol.vma_start = vma;
+			} else if (s->match == MATH_WILDCARD) {
+				/* Wildcard pattern match, create a new symbol. */
+				new = calloc(1, sizeof(struct debug_symbols));
+				if (!new)
+					break;
+				new->symbol.name = strdup((*sp)->name);
+				new->symbol.vma_start = vma;
+				new->symbol.vma_near = s->symbol.vma_near;
+				new->symbol.foffset = s->symbol.foffset;
+				new->symbol.cookie = s->symbol.cookie;
+				if (s->symbol.fname)
+					new->symbol.fname = strdup(s->symbol.fname);
+				new->next = new_list;
+				new_list = new;
+			}
+			res++;
+		}
+	}
+	if (last && !last->next)
+		last->next = new_list;
+
+	return res;
+}
+
+/* Process a bfd object from the file. */
+static int process_bfd_object(bfd *abfd, struct debug_obj_job *job)
+{
+	struct debug_dwarf_bfd_context context;
+	int ret = 0;
+
+	memset(&context, 0, sizeof(context));
+	context.job = job;
+	if (bfd_check_format_matches(abfd, bfd_object, NULL) ||
+	    bfd_check_format_matches(abfd, bfd_core, NULL)) {
+		context.table = get_sym_table(abfd);
+		if (!context.table)
+			return -1;
+
+		/* Resolve VMA from the symbol table. */
+		if (job->flags & RESOLVE_VMA)
+			lookup_bfd_sym(&context);
+
+		/* Resolve symbol name and file offset from file's sections. */
+		if ((job->flags & RESOLVE_NAME) || (job->flags & RESOLVE_FOFFSET))
+			bfd_map_over_sections(abfd, process_bfd_section, &context);
+
+		free(context.table);
+	} else {
+		ret = -1;
+	}
+
+	return ret;
+}
+
+/* Open a bfd archive file and read all objects. */
+static int read_all_bfd(bfd *abfd, struct debug_obj_job *job)
+{
+	bfd *last_arfile = NULL;
+	bfd *arfile = NULL;
+	int ret = 0;
+
+	if (bfd_check_format(abfd, bfd_archive)) {
+		for (;;) {
+			bfd_set_error(bfd_error_no_error);
+			arfile = bfd_openr_next_archived_file(abfd, arfile);
+			if (!arfile) {
+				if (bfd_get_error() != bfd_error_no_more_archived_files)
+					break;
+			}
+			ret = read_all_bfd(arfile, job);
+			if (last_arfile)
+				bfd_close(last_arfile);
+			last_arfile = arfile;
+		}
+		if (last_arfile)
+			bfd_close(last_arfile);
+	} else
+		ret = process_bfd_object(abfd, job);
+
+	return ret;
+}
+
+/**
+ * resolve_symbol_vma - name -> (vma, file offset) resolving
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ * @symbols - link list with desired symbols, with given name
+ *
+ * Get VMA and file offset of the symbols with given name.
+ * Return 0 on success, -1 on error.
+ */
+static int resolve_symbol_vma(struct debug_bfd_handle *obj,
+			      struct debug_symbols *symbols)
+{
+	struct debug_obj_job job;
+	int ret;
+
+	memset(&job, 0, sizeof(job));
+	job.flags |= RESOLVE_VMA;
+	job.flags |= RESOLVE_FOFFSET;
+	job.symbols = symbols;
+	job.addr_offset = obj->addr_offset;
+	ret = read_all_bfd(obj->bfd, &job);
+
+	return ret;
+}
+
+/**
+ * resolve_symbol_name - vma -> name resolving
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ * @symbols - link list with desired symbols, with given VMA
+ *
+ * Get names of the symbols with given VMA, look for nearest symbol to that VMA.
+ * Return 0 on success, -1 on error.
+ */
+static int resolve_symbol_name(struct debug_bfd_handle *obj,
+			       struct debug_symbols *symbols)
+{
+	struct debug_obj_job job;
+
+	if (!obj || !obj->bfd)
+		return -1;
+	memset(&job, 0, sizeof(job));
+	job.flags |= RESOLVE_NAME;
+	job.addr_offset = obj->addr_offset;
+	job.symbols = symbols;
+	return read_all_bfd(obj->bfd, &job);
+}
+
+/**
+ * debug_handle_destroy - Close file opened with trace_obj_debug_create()
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ *
+ * Close the file and free any allocated resources, related to file's debug
+ * information.
+ */
+static void debug_handle_destroy(struct debug_bfd_handle *obj)
+{
+	if (obj && obj->bfd)
+		bfd_close(obj->bfd);
+	free(obj);
+}
+
+/**
+ * debug_handle_create - Open binary file for parsing ELF and DWARF information
+ * @name: Name of the binary ELF file.
+ *
+ * Return pointer to trace_obj_debug structure, that can be passed to other APIs
+ * for extracting debug information from the file. NULL in case of an error.
+ */
+static struct debug_bfd_handle *debug_handle_create(char *file)
+{
+	struct debug_bfd_handle *obj = NULL;
+
+	obj = calloc(1, sizeof(*obj));
+	if (!obj)
+		return NULL;
+
+	bfd_init();
+	obj->bfd = bfd_openr(file, NULL);
+	if (!obj->bfd)
+		goto error;
+	obj->bfd->flags |= BFD_DECOMPRESS;
+
+	return obj;
+
+error:
+	debug_handle_destroy(obj);
+	return NULL;
+}
+
+/* Get the full path of process's executable, using the /proc fs. */
+static char *get_full_name(int pid)
+{
+	char mapname[PATH_MAX+1];
+	char fname[PATH_MAX+1];
+	int ret;
+
+	sprintf(fname, "/proc/%d/exe", pid);
+	ret = readlink(fname, mapname, PATH_MAX);
+	if (ret >= PATH_MAX || ret < 0)
+		return NULL;
+	mapname[ret] = 0;
+
+	return strdup(mapname);
+}
+
+/* Get or create a bfd debug context for an object file. */
+static struct debug_file *get_mapped_file(struct dbg_trace_context *dbg,
+					  char *fname,
+					  unsigned long long vmem_start)
+{
+	struct debug_file *file = dbg->files;
+
+	/* Search if the file is already added. */
+	while (file) {
+		if (!strcmp(fname, file->file_name) &&
+		    vmem_start && file->vmem_end == vmem_start)
+			return file;
+		file = file->next;
+	}
+
+	file = calloc(1, sizeof(*file));
+	if (!file)
+		return NULL;
+	file->file_name = strdup(fname);
+	if (!file->file_name)
+		goto error;
+	file->dbg = debug_handle_create(fname);
+	file->next = dbg->files;
+	dbg->files = file;
+	return file;
+
+error:
+	free(file->file_name);
+	debug_handle_destroy(file->dbg);
+	free(file);
+	return NULL;
+}
+
+/* Destroy a bfd debug context. */
+void dbg_trace_context_destroy(struct dbg_trace_context *dbg)
+{
+	struct debug_file *fdel;
+	struct debug_symbols *sdel;
+
+	while (dbg->sym) {
+		sdel = dbg->sym;
+		dbg->sym = dbg->sym->next;
+		free(sdel->symbol.name);
+		free(sdel->symbol.fname);
+		free(sdel);
+	}
+	while (dbg->files) {
+		fdel = dbg->files;
+		dbg->files = dbg->files->next;
+		debug_handle_destroy(fdel->dbg);
+		while (fdel->sym) {
+			sdel = fdel->sym;
+			fdel->sym = fdel->sym->next;
+			free(sdel->symbol.name);
+			free(sdel->symbol.fname);
+			free(sdel);
+		}
+		free(fdel);
+	}
+
+	free(dbg->fname);
+	dbg_trace_free_filemap(dbg->fmaps);
+	free(dbg);
+}
+
+/* Add an object file, mapped to specific memory of the process. */
+int dbg_trace_context_add_file(struct dbg_trace_context *dbg, char *file_name,
+			       unsigned long long vmem_start,
+			       unsigned long long vmem_end,
+			       unsigned long long pgoff)
+{
+	struct debug_file *file;
+
+	file = get_mapped_file(dbg, file_name, vmem_start);
+	if (!file)
+		return -1;
+	if (file->vmem_end == vmem_start) {
+		file->vmem_end = vmem_end;
+	} else {
+		file->vmem_start = vmem_start;
+		file->vmem_end = vmem_end;
+		if (file->dbg)
+			file->dbg->addr_offset = vmem_start - pgoff;
+	}
+
+	return 0;
+}
+
+/**
+ * dbg_trace_context_create_pid - create debug context for given PID
+ * @pid - ID of running process
+ * @libs - if true: inspect also all libraries, uased by the given process.
+ *
+ * Returns a pointer to allocated debug context, or NULL in case of an error.
+ */
+struct dbg_trace_context *dbg_trace_context_create_pid(int pid, bool libs)
+{
+	struct dbg_trace_context *dbg;
+	unsigned int i;
+
+	dbg = calloc(1, sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	dbg->pid = pid;
+	/* Get the full path of the process executable. */
+	dbg->fname = get_full_name(pid);
+	if (!dbg->fname) {
+		free(dbg);
+		return NULL;
+	}
+
+	/* Get the memory map of all libraries, linked to the process. */
+	dbg_trace_get_filemap(&dbg->fmaps, pid);
+
+	for (i = 0; i < dbg->fmaps->nr_lib_maps; i++) {
+		if (!libs && strcmp(dbg->fname, dbg->fmaps->lib_maps[i].lib_name))
+			continue;
+		/* Create a bfd debug object for each file. */
+		dbg_trace_context_add_file(dbg, dbg->fmaps->lib_maps[i].lib_name,
+					   dbg->fmaps->lib_maps[i].start,
+					   dbg->fmaps->lib_maps[i].end, 0);
+	}
+
+	return dbg;
+}
+
+/* Get the full path of a library. */
+static char *get_lib_full_path(char *libname)
+{
+	void *h = dlmopen(LM_ID_NEWLM, libname, RTLD_LAZY);
+	char dldir[PATH_MAX+1];
+	char *fname = NULL;
+	int ret;
+
+	if (!h)
+		return NULL;
+
+	ret = dlinfo(h, RTLD_DI_ORIGIN, dldir);
+	dlclose(h);
+
+	if (ret || asprintf(&fname, "%s/%s", dldir, libname) <= 0)
+		return NULL;
+
+	return fname;
+}
+
+/* Get the memory map of all libraries, linked to an executable file. */
+static int debug_obj_file_add_libs(struct dbg_trace_context *dbg,
+				   struct debug_file *file)
+{
+	char line[PATH_MAX];
+	char *libname;
+	char *trimmed;
+	char *fullname;
+	FILE *fp = NULL;
+	int ret = -1;
+
+	setenv("LD_TRACE_LOADED_OBJECTS", "1", 1);
+	fp = popen(file->file_name, "r");
+	if (!fp)
+		goto out;
+
+	while (fgets(line, sizeof(line), fp) != NULL) {
+		libname = strchr(line, ' ');
+		trimmed = line;
+		if (libname) {
+			*libname = '\0';
+			while (isspace(*trimmed))
+				trimmed++;
+			if (*trimmed != '/') {
+				fullname = get_lib_full_path(trimmed);
+				if (fullname) {
+					get_mapped_file(dbg, fullname, 0);
+					free(fullname);
+				}
+			} else {
+				get_mapped_file(dbg, trimmed, 0);
+			}
+		}
+	}
+
+out:
+	unsetenv("LD_TRACE_LOADED_OBJECTS");
+	if (fp)
+		pclose(fp);
+	return ret;
+}
+
+/**
+ * dbg_trace_context_create_file - create debug context for given executable file
+ * @fname - full path to an executable file
+ * @libs - if true: inspect also all libraries, used by the given file.
+ *
+ * Returns a pointer to allocated debug context, or NULL in case of an error.
+ */
+struct dbg_trace_context *dbg_trace_context_create_file(char *fname, bool libs)
+{
+	struct dbg_trace_context *dbg;
+	struct debug_file *file;
+
+	dbg = calloc(1, sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	dbg->fname = strdup(fname);
+	file = get_mapped_file(dbg, fname, 0);
+	if (!file)
+		goto error;
+	if (libs)
+		debug_obj_file_add_libs(dbg, file);
+
+#ifdef DEBUG_INTERNALS
+	printf("Created debug object for %s:\n\r", dbg->fname);
+	file = dbg->files;
+	while (file) {
+		printf("\t%s\n\r", file->file_name);
+		file = file->next;
+	}
+#endif
+	return dbg;
+
+error:
+	dbg_trace_context_destroy(dbg);
+	return NULL;
+}
+
+static void set_unknown(struct debug_symbols *sym, char *file)
+{
+	while (sym) {
+		if (!sym->symbol.fname)
+			sym->symbol.fname = strdup(file);
+		sym = sym->next;
+	}
+}
+
+/* Perform the requested symbols resolving, using the bfd library. */
+int dbg_trace_resolve_symbols(struct dbg_trace_context *obj)
+{
+	struct debug_file *file;
+
+	for (file = obj->files; file; file = file->next) {
+		if (!file->dbg) {
+			set_unknown(file->sym, file->file_name);
+			continue;
+		}
+		/* Resolve near VMA -> name. */
+		resolve_symbol_name(file->dbg, file->sym);
+		/* Resolve name -> exact VMA. */
+		resolve_symbol_vma(file->dbg, file->sym);
+		resolve_symbol_vma(file->dbg, obj->sym);
+	}
+
+	return 0;
+}
+
+/* Add VMA -> name resolving request. */
+static int add_resolve_vma2name(struct dbg_trace_context *obj,
+				unsigned long long vma, int cookie)
+{
+	struct debug_symbols *s = NULL;
+	struct debug_file *file;
+
+	file = obj->files;
+	while (file) {
+		/* Find the file, where the requested VMA is. */
+		if (vma >= file->vmem_start && vma <= file->vmem_end)
+			break;
+		file = file->next;
+	}
+
+	if (file) {
+		s = file->sym;
+		while (s) {
+			/* Check if the given VMA is already added for resolving. */
+			if (s->symbol.vma_near == vma)
+				break;
+			s = s->next;
+		}
+		if (!s) {
+			s = calloc(1, sizeof(*s));
+			if (!s)
+				return -1;
+			s->symbol.cookie = cookie;
+			s->symbol.vma_near = vma;
+			s->symbol.fname = strdup(file->file_name);
+			if (!s->symbol.fname)
+				goto error;
+			s->next = file->sym;
+			file->sym = s;
+			file->sym_count++;
+		}
+	}
+
+	if (s)
+		return 0;
+error:
+	if (s) {
+		free(s->symbol.fname);
+		free(s);
+	}
+	return -1;
+}
+
+/* Add name - VMA resolving request, The @name can have wildcards. */
+static int add_resolve_name2vma(struct dbg_trace_context *obj, char *name, int cookie)
+{
+	struct debug_symbols *s = NULL;
+
+	s = obj->sym;
+	while (s) {
+		/* Check if the given name is already added for resolving. */
+		if (s->symbol.name && !strcmp(name, s->symbol.name))
+			break;
+		s = s->next;
+	}
+	if (!s) {
+		s = calloc(1, sizeof(*s));
+		if (!s)
+			return -1;
+		s->symbol.cookie = cookie;
+		s->symbol.name = strdup(name);
+		if (!s->symbol.name)
+			goto error;
+		if (strchr(name, '*') || strchr(name, '?'))
+			s->match = MATH_WILDCARD;
+
+		s->next = obj->sym;
+		obj->sym = s;
+		obj->sym_count++;
+	}
+
+	return 0;
+
+error:
+	if (s) {
+		free(s->symbol.name);
+		free(s);
+	}
+	return -1;
+}
+
+/**
+ * dbg_trace_add_resolve_symbol - add new resolving request
+ * @obj - debug object context
+ * @vma - VMA->name resolving, if @vma is not 0
+ * @name - name-VMA resolving, if @name is not NULL
+ * @cookie - a cookie, attached to each successful resolving from this request
+ *
+ * Returns 0 if the request is added successfully, or -1 in case of an error.
+ */
+int dbg_trace_add_resolve_symbol(struct dbg_trace_context *obj,
+				 unsigned long long vma, char *name, int cookie)
+{
+	int ret = -1;
+
+	if (!obj)
+		return -1;
+
+	if (!name && vma) /* vma -> name resolving */
+		ret = add_resolve_vma2name(obj, vma, cookie);
+	else if (name) /* name -> vma resolving */
+		ret = add_resolve_name2vma(obj, name, cookie);
+
+	return ret;
+}
+
+static int walk_symbols(struct debug_symbols *sym,
+			int (*callback)(struct dbg_trace_symbols *, void *),
+			void *context)
+{
+	while (sym) {
+		if (callback(&sym->symbol, context))
+			return -1;
+		sym = sym->next;
+	}
+
+	return 0;
+}
+
+/**
+ * dbg_trace_walk_resolved_symbols - walk through all resolved symbols
+ * @obj - debug object context
+ * @callback - a callback hook, called for each resolved symbol.
+ *		If the callback returns non-zero, the walk stops.
+ * @context - a user specified context, passed to the callback
+ */
+void dbg_trace_walk_resolved_symbols(struct dbg_trace_context *obj,
+				     int (*callback)(struct dbg_trace_symbols *, void *),
+				     void *context)
+{
+	struct debug_file *file;
+
+	walk_symbols(obj->sym, callback, context);
+	file = obj->files;
+	while (file) {
+		walk_symbols(file->sym, callback, context);
+		file = file->next;
+	}
+}
+
+/**
+ * trace_debug_free_symbols - free array of debug symbols
+ * @symbols - array with debug symbols
+ * @count - count of the @symbols array
+ */
+void trace_debug_free_symbols(struct dbg_trace_symbols *symbols, int count)
+{
+	int i;
+
+	if (!symbols)
+		return;
+
+	for (i = 0; i < count; i++) {
+		free(symbols[i].name);
+		free(symbols[i].fname);
+	}
+	free(symbols);
+
+}
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+/**
+ * dbg_trace_get_filemap - get a memory map of a process, using /proc fs
+ * @pid_maps - return: list of files, mapped into the process memory
+ * @pid - id of a process
+ *
+ * Returns 0 on success, -1 in case of an error.
+ */
+int dbg_trace_get_filemap(struct dbg_trace_pid_maps **pid_maps, int pid)
+{
+	struct dbg_trace_pid_maps *maps = *pid_maps;
+	struct dbg_trace_proc_addr_map *map;
+	unsigned long long begin, end;
+	struct dbg_trace_pid_maps *m;
+	char mapname[PATH_MAX+1];
+	char fname[PATH_MAX+1];
+	char buf[PATH_MAX+100];
+	unsigned int i;
+	FILE *f;
+	int ret;
+	int res;
+
+	sprintf(fname, "/proc/%d/exe", pid);
+	ret = readlink(fname, mapname, PATH_MAX);
+	if (ret >= PATH_MAX || ret < 0)
+		return -ENOENT;
+	mapname[ret] = 0;
+
+	sprintf(fname, "/proc/%d/maps", pid);
+	f = fopen(fname, "r");
+	if (!f)
+		return -ENOENT;
+
+	while (maps) {
+		if (pid == maps->pid)
+			break;
+		maps = maps->next;
+	}
+
+	ret = -ENOMEM;
+	if (!maps) {
+		maps = calloc(1, sizeof(*maps));
+		if (!maps)
+			goto out_fail;
+		maps->pid = pid;
+		maps->next = *pid_maps;
+		*pid_maps = maps;
+	} else {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		free(maps->lib_maps);
+		maps->lib_maps = NULL;
+		maps->nr_lib_maps = 0;
+		free(maps->proc_name);
+	}
+
+	maps->proc_name = strdup(mapname);
+	if (!maps->proc_name)
+		goto out_fail;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		mapname[0] = '\0';
+		res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s",
+			     &begin, &end, mapname);
+		if (res == 3 && mapname[0] != '\0') {
+			map = realloc(maps->lib_maps,
+				      (maps->nr_lib_maps + 1) * sizeof(*map));
+			if (!map)
+				goto out_fail;
+			map[maps->nr_lib_maps].end = end;
+			map[maps->nr_lib_maps].start = begin;
+			map[maps->nr_lib_maps].lib_name = strdup(mapname);
+			if (!map[maps->nr_lib_maps].lib_name)
+				goto out_fail;
+			maps->lib_maps = map;
+			maps->nr_lib_maps++;
+		}
+	}
+
+	fclose(f);
+	return 0;
+
+out_fail:
+	fclose(f);
+	if (maps) {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		if (*pid_maps != maps) {
+			m = *pid_maps;
+			while (m) {
+				if (m->next == maps) {
+					m->next = maps->next;
+					break;
+				}
+				m = m->next;
+			}
+		} else
+			*pid_maps = maps->next;
+		free(maps->lib_maps);
+		maps->lib_maps = NULL;
+		maps->nr_lib_maps = 0;
+		free(maps->proc_name);
+		maps->proc_name = NULL;
+		free(maps);
+	}
+	return ret;
+}
+
+static void procmap_free(struct dbg_trace_pid_maps *maps)
+{
+	unsigned int i;
+
+	if (!maps)
+		return;
+	if (maps->lib_maps) {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		free(maps->lib_maps);
+	}
+	free(maps->proc_name);
+	free(maps);
+}
+
+/**
+ * dbg_trace_free_filemap - Free list of files, associated with given process
+ * @maps - list of files, returned by dbg_trace_get_filemap()
+ */
+void dbg_trace_free_filemap(struct dbg_trace_pid_maps *maps)
+{
+	struct dbg_trace_pid_maps *del;
+
+	while (maps) {
+		del = maps;
+		maps = maps->next;
+		procmap_free(del);
+	}
+}
diff --git a/src/trace-obj-debug.h b/src/trace-obj-debug.h
new file mode 100644
index 0000000..19091ad
--- /dev/null
+++ b/src/trace-obj-debug.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+#ifndef _TC_TRACE_DEBUG_UTILS_
+#define _TC_TRACE_DEBUG_UTILS_
+
+/* --- Debug symbols--- */
+struct dbg_trace_pid_maps {
+	struct dbg_trace_pid_maps	*next;
+	struct dbg_trace_proc_addr_map	*lib_maps;
+	unsigned int			nr_lib_maps;
+	char				*proc_name;
+	int				pid;
+};
+int dbg_trace_get_filemap(struct dbg_trace_pid_maps **file_maps, int pid);
+void dbg_trace_free_filemap(struct dbg_trace_pid_maps *maps);
+
+struct dbg_trace_symbols {
+	char *name;			/* symbol's name */
+	char *fname;			/* symbol's file */
+	int cookie;
+	unsigned long long vma_start;	/* symbol's start VMA */
+	unsigned long long vma_near;	/* symbol's requested VMA */
+	unsigned long long foffset;	/* symbol's offset in the binary file*/
+};
+
+struct dbg_trace_proc_addr_map {
+	unsigned long long	start;
+	unsigned long long	end;
+	char			*lib_name;
+};
+
+struct dbg_trace_context;
+struct dbg_trace_context *dbg_trace_context_create_file(char *file, bool libs);
+struct dbg_trace_context *dbg_trace_context_create_pid(int pid, bool libs);
+
+void dbg_trace_context_destroy(struct dbg_trace_context *debug);
+int dbg_trace_context_add_file(struct dbg_trace_context *dbg, char *file_name,
+			       unsigned long long vmem_start,
+			       unsigned long long vmem_end,
+			       unsigned long long pgoff);
+
+int dbg_trace_resolve_symbols(struct dbg_trace_context *obj);
+int dbg_trace_add_resolve_symbol(struct dbg_trace_context *obj,
+				 unsigned long long vma, char *name, int cookie);
+
+void dbg_trace_walk_resolved_symbols(struct dbg_trace_context *obj,
+				     int (*callback)(struct dbg_trace_symbols *, void *),
+				     void *context);
+
+#endif /* _TC_TRACE_DEBUG_UTILS_ */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC PATCH v2 2/3] trace-cruncher: Support for perf
  2022-04-08 10:03 [RFC PATCH v2 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
  2022-04-08 10:03 ` [RFC PATCH v2 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
@ 2022-04-08 10:03 ` Tzvetomir Stoyanov (VMware)
  2022-04-14 12:58   ` Yordan Karadzhov
  2022-04-08 10:03 ` [RFC PATCH v2 3/3] trace-cruncher: perf example Tzvetomir Stoyanov (VMware)
  2 siblings, 1 reply; 6+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-04-08 10:03 UTC (permalink / raw)
  To: y.karadz; +Cc: rostedt, linux-trace-devel

Initial perf support for trace-cruncher, using libperf. As a first
stage, collecting of stack trace samples of given process is supported.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 setup.py           |   9 +-
 src/perfpy-utils.c | 896 +++++++++++++++++++++++++++++++++++++++++++++
 src/perfpy-utils.h |  43 +++
 src/perfpy.c       | 141 +++++++
 4 files changed, 1087 insertions(+), 2 deletions(-)
 create mode 100644 src/perfpy-utils.c
 create mode 100644 src/perfpy-utils.h
 create mode 100644 src/perfpy.c

diff --git a/setup.py b/setup.py
index 21c627f..8f9d006 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,8 @@ def third_party_paths():
     include_dirs = [np.get_include()]
     libs_required = [('libtraceevent', '1.5.0'),
                      ('libtracefs',    '1.3.0'),
-                     ('libkshark',     '2.0.1')]
+                     ('libkshark',     '2.0.1'),
+                     ('libperf',     '0.0.1')]
     libs_found = []
 
     for lib in libs_required:
@@ -83,6 +84,10 @@ def main():
                           sources=['src/ksharkpy.c', 'src/ksharkpy-utils.c'],
                           libraries=['kshark'])
 
+    module_perf = extension(name='tracecruncher.perfpy',
+                            sources=['src/perfpy.c', 'src/perfpy-utils.c', 'src/trace-obj-debug.c'],
+                            libraries=['traceevent', 'perf', 'bfd'])
+
     setup(name='tracecruncher',
           version='0.2.0',
           description='Interface for accessing Linux tracing data in Python.',
@@ -91,7 +96,7 @@ def main():
           url='https://github.com/vmware/trace-cruncher',
           license='LGPL-2.1',
           packages=find_packages(),
-          ext_modules=[module_ft, module_data, module_ks],
+          ext_modules=[module_ft, module_data, module_ks, module_perf],
           classifiers=[
               'Development Status :: 4 - Beta',
               'Programming Language :: Python :: 3',
diff --git a/src/perfpy-utils.c b/src/perfpy-utils.c
new file mode 100644
index 0000000..4d30596
--- /dev/null
+++ b/src/perfpy-utils.c
@@ -0,0 +1,896 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+#ifndef _GNU_SOURCE
+/** Use GNU C Library. */
+#define _GNU_SOURCE
+#endif // _GNU_SOURCE
+
+// C
+#include <stdio.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/resource.h>
+#include <semaphore.h>
+#include <signal.h>
+
+// libperf
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+
+// trace-cruncher
+#include "perfpy-utils.h"
+#include "trace-obj-debug.h"
+
+PyObject * PERF_ERROR;
+
+#define TMP_FILE "/tmp/perf_temp_data.XXXXXX"
+
+struct perf_scan_thread {
+	uint32_t tid;
+	char *comm;
+	struct perf_counts_values count;
+};
+
+struct perf_handle {
+	bool running;
+	bool debug_resolved;
+	pthread_t reader;
+	int fd;
+	int thr_count;
+	uint32_t pid;
+	uint32_t trace_time; /* in msec */
+	char *command;
+	char **argv;
+	struct perf_scan_thread *thr_map;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+	struct perf_cpu_map *cpus;
+	struct dbg_trace_context *debug;
+	struct perf_thread_map *threads;
+};
+
+struct event_sample {
+	uint64_t		id;		/* PERF_SAMPLE_IDENTIFIER */
+	uint64_t		ip;		/* PERF_SAMPLE_IP */
+	uint32_t		pid, tid;	/* PERF_SAMPLE_TID */
+	uint64_t		time;		/* PERF_SAMPLE_TIME */
+	uint32_t		cpu, res;	/* PERF_SAMPLE_CPU */
+	uint64_t		nr;
+	uint64_t		ips[];		/* PERF_SAMPLE_CALLCHAIN */
+} __attribute__((packed));
+
+struct perf_event_sample {
+	struct event_sample	data;
+	struct perf_handle	*perf;
+	char			*ip;
+	char			*ips[];
+} __attribute__((packed));
+
+int py_perf_handle_destroy(struct perf_handle *perf)
+{
+	if (!perf || !perf->running)
+		return 0;
+
+	perf->running = false;
+	pthread_join(perf->reader, NULL);
+	fsync(perf->fd);
+	if (perf->command && perf->pid > 0) {
+		kill(perf->pid, SIGINT);
+		perf->pid = 0;
+	}
+
+	return 0;
+}
+
+void py_perf_handle_free(struct perf_handle *perf)
+{
+	int i;
+
+	if (!perf)
+		return;
+
+	if (perf->evlist)
+		perf_evlist__delete(perf->evlist);
+	if (perf->fd >= 0)
+		close(perf->fd);
+	if (perf->debug)
+		dbg_trace_context_destroy(perf->debug);
+	if (perf->cpus)
+		perf_cpu_map__put(perf->cpus);
+	if (perf->threads)
+		perf_thread_map__put(perf->threads);
+	if (perf->thr_map) {
+		for (i = 0; i < perf->thr_count; i++)
+			free(perf->thr_map[i].comm);
+		free(perf->thr_map);
+	}
+	if (perf->argv) {
+		i = 0;
+		while (perf->argv[i])
+			free(perf->argv[i++]);
+		free(perf->argv);
+	}
+	free(perf->command);
+
+	free(perf);
+}
+
+void py_perf_sample_free(struct perf_event_sample *sample)
+{
+	unsigned int i;
+
+	if (sample) {
+		free(sample->ip);
+		for (i = 0; i < sample->data.nr; i++)
+			free((char *)(sample->ips[i]));
+	}
+	free(sample);
+}
+
+static int pid_filter(const struct dirent *dir)
+{
+	const char *dname = dir->d_name;
+
+	if (!dname || dname[0] == '.')
+		return 0;
+
+	while (*dname) {
+		if (!isdigit(*dname))
+			return 0;
+		dname++;
+	}
+
+	return 1;
+}
+
+static  int str_read_file(const char *file, char **buffer)
+{
+	char stbuf[BUFSIZ];
+	char *buf = NULL;
+	int size = 0;
+	char *nbuf;
+	int fd;
+	int r;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	do {
+		r = read(fd, stbuf, BUFSIZ);
+		if (r <= 0)
+			continue;
+		nbuf = realloc(buf, size+r+1);
+		if (!nbuf) {
+			size = -1;
+			break;
+		}
+		buf = nbuf;
+		memcpy(buf+size, stbuf, r);
+		size += r;
+	} while (r > 0);
+
+	close(fd);
+	if (r == 0 && size > 0) {
+		buf[size] = '\0';
+		*buffer = buf;
+	} else
+		free(buf);
+
+	return size;
+}
+
+static void strip_control_chars(char *str)
+{
+	while (*str) {
+		if (iscntrl(*str)) {
+			*str = '\0';
+			break;
+		}
+		str++;
+	}
+}
+
+static struct perf_thread_map *create_thread_map(struct perf_handle *perf)
+{
+	struct perf_thread_map *tmap = NULL;
+	struct dirent **pids = NULL;
+	char path[PATH_MAX];
+	int i, count;
+
+	snprintf(path, PATH_MAX, "/proc/%d/task", perf->pid);
+	count = scandir(path, &pids, pid_filter, NULL);
+	if (count < 1)
+		goto out;
+
+	tmap = perf_thread_map__new_array(count, NULL);
+	if (!tmap)
+		goto out;
+	free(perf->thr_map);
+	perf->thr_map = calloc(count, sizeof(struct perf_scan_thread));
+	if (!perf->thr_map)
+		goto out;
+	perf->thr_count = count;
+
+	for (i = 0; i < count; i++) {
+		perf->thr_map[i].tid = atoi(pids[i]->d_name);
+		perf_thread_map__set_pid(tmap, i, perf->thr_map[i].tid);
+		snprintf(path, PATH_MAX, "/proc/%d/task/%s/comm", perf->pid, pids[i]->d_name);
+		str_read_file(path, &perf->thr_map[i].comm);
+		strip_control_chars(perf->thr_map[i].comm);
+	}
+
+out:
+	if (pids) {
+		for (i = 0; i < count; i++)
+			free(pids[i]);
+		free(pids);
+	}
+
+	return tmap;
+}
+
+static struct perf_handle *new_perf_sampling_handle(int freq, pid_t pid, char *command, char **argv)
+{
+	struct perf_handle *perf = NULL;
+	char *tmp_file = NULL;
+
+	perf = calloc(1, sizeof(*perf));
+	if (!perf)
+		return NULL;
+
+	perf->fd = -1;
+	perf->attr.type        = PERF_TYPE_HARDWARE;
+	perf->attr.config      = PERF_COUNT_HW_CPU_CYCLES;
+	perf->attr.disabled    = 1;
+	perf->attr.freq        = 1;
+	perf->attr.sample_freq = freq;
+	perf->attr.exclude_kernel = 1;
+	perf->attr.exclude_idle = 1;
+	perf->attr.exclude_callchain_kernel = 1;
+	perf->attr.comm = 1;
+	perf->attr.mmap2 = 1;
+	perf->attr.task = 1;
+	perf->attr.precise_ip = 0;
+	perf->attr.inherit = 1;
+	perf->attr.task = 1;
+	perf->attr.inherit_stat = 1;
+	perf->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				 PERF_FORMAT_TOTAL_TIME_RUNNING;
+	perf->attr.sample_type = PERF_SAMPLE_IDENTIFIER |
+				 PERF_SAMPLE_IP |
+				 PERF_SAMPLE_TID |
+				 PERF_SAMPLE_TIME |
+				 PERF_SAMPLE_CPU |
+				 PERF_SAMPLE_CALLCHAIN;
+
+	/* trace all CPUs in the system */
+	perf->cpus = perf_cpu_map__new(NULL);
+	if (!perf->cpus)
+		goto error;
+
+	if (command) {
+		perf->command = strdup(command);
+		if (!perf->command)
+			goto error;
+		perf->argv = argv;
+		perf->debug = dbg_trace_context_create_file(command, true);
+	} else {
+		perf->pid = pid;
+		perf->debug = dbg_trace_context_create_pid(pid, true);
+	}
+	if (!perf->debug)
+		goto error;
+	perf->debug_resolved = false;
+
+	perf->evlist = perf_evlist__new();
+	if (!perf->evlist)
+		goto error;
+
+	tmp_file = strdup(TMP_FILE);
+	if (!tmp_file)
+		goto error;
+
+	mktemp(tmp_file);
+	perf->fd = open(tmp_file, O_RDWR|O_CREAT|O_TRUNC, 0600);
+	unlink(tmp_file);
+	if (perf->fd < 0)
+		goto error;
+
+	perf->evsel = perf_evsel__new(&perf->attr);
+	if (!perf->evsel)
+		goto error;
+
+	perf_evlist__add(perf->evlist, perf->evsel);
+
+	free(tmp_file);
+	return perf;
+
+error:
+	py_perf_handle_free(perf);
+	free(tmp_file);
+	return NULL;
+}
+
+PyObject *PyPerfSampler_new(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+	char *kwlist[] = {"pid", "command", "freq", "argv", NULL};
+	PyObject *py_perf, *py_arg, *py_args = NULL;
+	struct perf_handle *perf = NULL;
+	int freq = 10, pid = 0;
+	char *command = NULL;
+	char **argv = NULL;
+	int i, argc;
+
+	if (!PyArg_ParseTupleAndKeywords(args,
+					 kwargs,
+					 "|isiO",
+					 kwlist,
+					 &pid,
+					 &command,
+					 &freq,
+					 &py_args
+					 )) {
+		return NULL;
+	}
+
+	if (pid == 0 && !command) {
+		PyErr_Format(PERF_ERROR, "PID or command must be specified");
+		return NULL;
+	}
+
+	if (command && py_args) {
+		if (!PyList_CheckExact(py_args)) {
+			PyErr_SetString(PERF_ERROR, "Failed to parse argv list");
+			return NULL;
+		}
+		argc = PyList_Size(py_args);
+		argv = calloc(argc + 1, sizeof(char *));
+		for (i = 0; i < argc; i++) {
+			py_arg = PyList_GetItem(py_args, i);
+			if (!PyUnicode_Check(py_arg))
+				continue;
+			argv[i] = strdup(PyUnicode_DATA(py_arg));
+			if (!argv[i])
+				return NULL;
+		}
+		argv[i] = NULL;
+	}
+
+	perf = new_perf_sampling_handle(freq, pid, command, argv);
+
+	if (!perf) {
+		PyErr_SetString(PERF_ERROR, "Failed create new perf context");
+		return NULL;
+	}
+
+	py_perf = PyPerf_New(perf);
+
+	return py_perf;
+}
+
+static void perf_read_sample(struct perf_handle *perf, struct perf_record_sample *event)
+{
+	struct event_sample *sample;
+	uint64_t i;
+
+	sample = (struct event_sample *)(event->array);
+
+	/* check if the sample is for our PID */
+	if (sample->pid != perf->pid)
+		return;
+
+	if (perf->debug)
+		dbg_trace_add_resolve_symbol(perf->debug, sample->ip, NULL, 0);
+
+	if (write(perf->fd, sample, sizeof(*sample)) != sizeof(*sample))
+		return;
+
+	for (i = 0; i < sample->nr; i++) {
+		if (write(perf->fd, &sample->ips[i], sizeof(uint64_t)) != sizeof(uint64_t))
+			return;
+		if (perf->debug)
+			dbg_trace_add_resolve_symbol(perf->debug, sample->ips[i], NULL, 0);
+	}
+}
+
+/* A new memory is mapped to traced process */
+static void perf_read_mmap2(struct perf_handle *perf, struct perf_record_mmap2 *mmap)
+{
+	/* check if mmap is for our PID */
+	if (perf->pid != mmap->pid)
+		return;
+
+	/* check if executable memory is mapped */
+	if (mmap->header.misc & PERF_RECORD_MISC_MMAP_DATA)
+		return;
+
+	/*
+	 * A new dynamic library is dlopen() by the traced process,
+	 * store it for vma -> name resolving
+	 */
+	dbg_trace_context_add_file(perf->debug, mmap->filename,
+				   mmap->start, mmap->start + mmap->len, mmap->pgoff);
+}
+
+/* A new thread is started */
+static void perf_read_comm(struct perf_handle *perf, struct perf_record_comm *comm)
+{
+	struct perf_scan_thread *tmp;
+	int i;
+
+	/* check if the thread is started by PID */
+	if (perf->pid != comm->pid)
+		return;
+
+	for (i = 0; i < perf->thr_count; i++) {
+		if (perf->thr_map[i].tid == comm->tid) {
+			free(perf->thr_map[i].comm);
+			perf->thr_map[i].comm = strdup(comm->comm);
+			return;
+		}
+	}
+
+	tmp = realloc(perf->thr_map, (perf->thr_count + 1) * sizeof(struct perf_scan_thread));
+	if (!tmp)
+		return;
+
+	perf->thr_map = tmp;
+	perf->thr_map[perf->thr_count].tid = comm->tid;
+	perf->thr_map[perf->thr_count].comm = strdup(comm->comm);
+	perf->thr_count++;
+}
+
+static void *perf_reader_thread(void *data)
+{
+	struct perf_handle *perf = data;
+	struct perf_mmap *map;
+	union perf_event *event;
+
+	perf_evlist__enable(perf->evlist);
+
+	while (true) {
+		if (!perf->running)
+			break;
+		perf_evlist__for_each_mmap(perf->evlist, map, false) {
+			if (perf_mmap__read_init(map) < 0)
+				continue;
+
+			while ((event = perf_mmap__read_event(map)) != NULL) {
+
+				switch (event->sample.header.type) {
+				case PERF_RECORD_SAMPLE:
+					perf_read_sample(perf, (struct perf_record_sample *)event);
+					break;
+				case PERF_RECORD_COMM:
+					perf_read_comm(perf, (struct perf_record_comm *)event);
+					break;
+				case PERF_RECORD_MMAP2:
+					perf_read_mmap2(perf, (struct perf_record_mmap2 *)event);
+					break;
+				}
+
+				perf_mmap__consume(map);
+			}
+
+			perf_mmap__read_done(map);
+		}
+	}
+	perf_evlist__disable(perf->evlist);
+	pthread_exit(0);
+}
+
+static int increase_file_limit(void)
+{
+	struct rlimit lim;
+
+	if (getrlimit(RLIMIT_NOFILE, &lim))
+		return -1;
+
+	if (lim.rlim_cur < lim.rlim_max) {
+		lim.rlim_cur = lim.rlim_max;
+	} else {
+		lim.rlim_cur += 100;
+		lim.rlim_max += 100;
+	}
+
+	return setrlimit(RLIMIT_NOFILE, &lim);
+}
+
+static int perf_maps_init(struct perf_handle *perf)
+{
+	int ret;
+
+	if (!perf->threads)
+		perf->threads = create_thread_map(perf);
+	if (!perf->threads)
+		return -1;
+
+	perf_evlist__set_maps(perf->evlist, perf->cpus, perf->threads);
+	do {
+		ret = perf_evlist__open(perf->evlist);
+		if (!ret)
+			break;
+		if (ret != -EMFILE)
+			goto out;
+		ret = increase_file_limit();
+		if (ret)
+			goto out;
+	} while (ret);
+
+	ret = perf_evlist__mmap(perf->evlist, 4);
+out:
+	if (ret)
+		perf_evlist__close(perf->evlist);
+	return ret;
+}
+
+static int perf_reader_start(struct perf_handle *perf)
+{
+	pthread_attr_t attrib;
+	int ret;
+
+	pthread_attr_init(&attrib);
+	pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE);
+	ret = pthread_create(&perf->reader, &attrib, perf_reader_thread, perf);
+	if (!ret)
+		perf->running = true;
+
+	pthread_attr_destroy(&attrib);
+	return ret;
+}
+
+#define PERF_EXEC_SYNC	"/TC_PERF_SYNC_XXXXXX"
+static int perf_run_cmd(struct perf_handle *perf)
+{
+	char *envp[] = {NULL};
+	char sname[strlen(PERF_EXEC_SYNC) + 1];
+	sem_t *sem;
+	pid_t pid;
+	int ret;
+
+	strcpy(sname, PERF_EXEC_SYNC);
+	mktemp(sname);
+	sem = sem_open(sname, O_CREAT | O_EXCL, 0644, 0);
+	sem_unlink(sname);
+
+	pid = fork();
+	if (pid < 0)
+		return -1;
+	if (pid == 0) {
+		sem_wait(sem);
+		execvpe(perf->command, perf->argv, envp);
+	} else {
+		perf->pid = pid;
+		ret = perf_maps_init(perf);
+		if (!ret)
+			ret = perf_reader_start(perf);
+		sem_post(sem);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int perf_start_sampling(struct perf_handle *perf)
+{
+	int ret;
+
+	if (perf->running)
+		return 0;
+
+	if (perf->command)
+		return perf_run_cmd(perf);
+
+	ret = perf_maps_init(perf);
+	if (!ret)
+		ret = perf_reader_start(perf);
+
+	return ret;
+}
+
+static bool sampling_run;
+
+static void sampling_stop(int sig)
+{
+	sampling_run = false;
+}
+
+static void sampling_timer(int sig, siginfo_t *si, void *uc)
+{
+	sampling_run = false;
+}
+
+#define PID_WAIT_CHECK_USEC	500000
+#define TIMER_SEC_NANO		1000000000LL
+static int perf_wait_pid(struct perf_handle *perf)
+{
+	struct itimerspec tperiod = {0};
+	struct sigaction saction = {0};
+	struct sigevent stime = {0};
+	timer_t timer_id;
+
+	if (perf->pid == 0)
+		return -1;
+
+	sampling_run = true;
+	signal(SIGINT, sampling_stop);
+
+	if (perf->trace_time) {
+		stime.sigev_notify = SIGEV_SIGNAL;
+		stime.sigev_signo = SIGRTMIN;
+		if (timer_create(CLOCK_MONOTONIC, &stime, &timer_id))
+			return -1;
+		saction.sa_flags = SA_SIGINFO;
+		saction.sa_sigaction = sampling_timer;
+		sigemptyset(&saction.sa_mask);
+		if (sigaction(SIGRTMIN, &saction, NULL)) {
+			timer_delete(timer_id);
+			return -1;
+		}
+		/* covert trace_time from msec to sec, nsec */
+		tperiod.it_value.tv_nsec = ((unsigned long long)perf->trace_time * 1000000LL);
+		if (tperiod.it_value.tv_nsec >= TIMER_SEC_NANO) {
+			tperiod.it_value.tv_sec = tperiod.it_value.tv_nsec / TIMER_SEC_NANO;
+			tperiod.it_value.tv_nsec %= TIMER_SEC_NANO;
+		}
+		if (timer_settime(timer_id, 0, &tperiod, NULL))
+			return -1;
+	}
+
+	do {
+		if (perf->command) { /* wait for a child */
+			if (waitpid(perf->pid, NULL, WNOHANG) == (int)perf->pid) {
+				perf->pid = 0;
+				sampling_run = false;
+			}
+		} else { /* not a child, check if still exist */
+			if (kill(perf->pid, 0) == -1 && errno == ESRCH) {
+				perf->pid = 0;
+				sampling_run = false;
+			}
+		}
+		usleep(PID_WAIT_CHECK_USEC);
+	} while (sampling_run);
+
+	if (perf->trace_time)
+		timer_delete(timer_id);
+
+	signal(SIGINT, SIG_DFL);
+
+	return 0;
+}
+
+PyObject *PyPerf_enable(PyPerf *self, PyObject *args, PyObject *kwargs)
+{
+	char *kwlist[] = {"wait", "time", NULL};
+	struct perf_handle *perf = self->ptrObj;
+	int wait = false;
+	int ret;
+
+	if (perf->running) {
+		PyErr_Format(PERF_ERROR, "Perf sampling is already started");
+		return NULL;
+	}
+
+	perf->trace_time = 0;
+	if (!PyArg_ParseTupleAndKeywords(args,
+					 kwargs,
+					 "|pi",
+					 kwlist,
+					 &wait,
+					 &perf->trace_time)) {
+		return NULL;
+	}
+
+	ret = perf_start_sampling(perf);
+	if (ret) {
+		PyErr_Format(PERF_ERROR,
+			     "Failed to start perf sampling - %s", strerror(-ret));
+		return NULL;
+	}
+
+	if (wait) {
+		perf_wait_pid(perf);
+		py_perf_handle_destroy(perf);
+	}
+
+	Py_RETURN_NONE;
+}
+
+PyObject *PyPerf_disable(PyPerf *self)
+{
+	struct perf_handle *perf = self->ptrObj;
+
+	if (!perf->running) {
+		PyErr_Format(PERF_ERROR, "Perf reader is not started");
+		return NULL;
+	}
+
+	py_perf_handle_destroy(perf);
+
+	Py_RETURN_NONE;
+}
+
+struct symb_walk {
+	uint64_t ip;
+	char *name;
+};
+
+static int sym_get(struct dbg_trace_symbols *symb, void *data)
+{
+	struct symb_walk *s = (struct symb_walk *)data;
+
+	if (s->ip == symb->vma_near) {
+		if (symb->name)
+			asprintf(&s->name, "%s @ %s", symb->name, symb->fname);
+		else
+			asprintf(&s->name, "(0x%llX) @ %s", symb->vma_near, symb->fname);
+		return 1;
+	}
+	return 0;
+}
+
+static char *ip_name(struct perf_handle *perf, uint64_t ip)
+{
+	struct symb_walk symb;
+
+	symb.ip = ip;
+	symb.name = NULL;
+	if (perf && perf->debug)
+		dbg_trace_walk_resolved_symbols(perf->debug, sym_get, &symb);
+
+	if (!symb.name)
+		asprintf(&symb.name, "0x%lX", ip);
+
+	return symb.name;
+}
+
+PyObject *PyPerf_getSamples(PyPerf *self)
+{
+	struct perf_handle *perf = self->ptrObj;
+	struct event_sample sample;
+	struct perf_event_sample *store;
+	PyObject *slist, *sobject;
+	uint64_t i, ip;
+	int ca = 0, cs = 0;
+
+	if (perf->running) {
+		PyErr_Format(PERF_ERROR, "Perf reader is running");
+		return NULL;
+	}
+
+	if (perf->debug && !perf->debug_resolved) {
+		dbg_trace_resolve_symbols(perf->debug);
+		perf->debug_resolved = true;
+	}
+
+	if (lseek(perf->fd, 0, SEEK_SET) == (off_t)-1) {
+		PyErr_Format(PERF_ERROR, "No samples");
+		return NULL;
+	}
+
+	slist = PyList_New(0);
+	do {
+		if (read(perf->fd, &sample, sizeof(sample)) != sizeof(sample))
+			break;
+		ca++;
+		store = calloc(1, sizeof(*store) + (sample.nr * sizeof(char *)));
+		if (!store)
+			break;
+		memcpy(&store->data, &sample, sizeof(sample));
+		store->perf = perf;
+		store->ip = ip_name(perf, store->data.ip);
+		for (i = 0; i < sample.nr; i++) {
+			if (read(perf->fd, &ip, sizeof(uint64_t)) != sizeof(uint64_t))
+				break;
+			store->ips[i] = ip_name(perf, ip);
+		}
+		cs += sample.nr;
+		if (i < sample.nr)
+			break;
+		sobject = PyPerfEventSampler_New(store);
+		PyList_Append(slist, sobject);
+	} while (true);
+	ftruncate(perf->fd, 0);
+	return slist;
+}
+
+PyObject *PyPerfSampler_id(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLongLong(sample->data.id);
+}
+
+PyObject *PyPerfSampler_pid(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLong(sample->data.pid);
+}
+
+PyObject *PyPerfSampler_tid(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLong(sample->data.tid);
+}
+
+PyObject *PyPerfSampler_time(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLongLong(sample->data.time);
+}
+
+PyObject *PyPerfSampler_cpu(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLong(sample->data.cpu);
+}
+
+PyObject *PyPerfSampler_nr(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLongLong(sample->data.nr);
+}
+
+PyObject *PyPerfSampler_ip(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyUnicode_FromString(sample->ip);
+}
+
+PyObject *PyPerfSampler_tid_comm(PyPerfEventSampler *self, PyObject *args, PyObject *kwargs)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+	char *name = NULL;
+	int i;
+
+	if (!sample->perf || !sample->perf->thr_count || !sample->perf->thr_map)
+		Py_RETURN_NONE;
+
+	for (i = 0; i < sample->perf->thr_count; i++)
+		if (sample->perf->thr_map[i].tid == sample->data.tid)
+			break;
+
+	if (i < sample->perf->thr_count && sample->perf->thr_map[i].comm)
+		name = sample->perf->thr_map[i].comm;
+
+	if (name)
+		return PyUnicode_FromString(name);
+
+	Py_RETURN_NONE;
+}
+
+PyObject *PyPerfSampler_ips(PyPerfEventSampler *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+	PyObject *slist;
+	unsigned int i;
+
+	slist = PyList_New(0);
+	for (i = 0 ; i < sample->data.nr; i++)
+		PyList_Append(slist, PyUnicode_FromString((char *)sample->ips[i]));
+
+	return slist;
+}
diff --git a/src/perfpy-utils.h b/src/perfpy-utils.h
new file mode 100644
index 0000000..648a8ce
--- /dev/null
+++ b/src/perfpy-utils.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+#ifndef _TC_PERF_PY_UTILS
+#define _TC_PERF_PY_UTILS
+
+// Python
+#include <Python.h>
+
+// trace-cruncher
+#include "common.h"
+
+struct perf_handle;
+struct perf_event_sample;
+
+C_OBJECT_WRAPPER_DECLARE(perf_handle, PyPerf);
+C_OBJECT_WRAPPER_DECLARE(perf_event_sample, PyPerfEventSampler);
+
+PyObject *PyPerfSampler_new(PyObject *self, PyObject *args, PyObject *kwargs);
+
+PyObject *PyPerf_enable(PyPerf *self, PyObject *args, PyObject *kwargs);
+PyObject *PyPerf_disable(PyPerf *self);
+PyObject *PyPerf_getSamples(PyPerf *self);
+
+PyObject *PyPerfSampler_id(PyPerfEventSampler *self);
+PyObject *PyPerfSampler_ip(PyPerfEventSampler *self);
+PyObject *PyPerfSampler_pid(PyPerfEventSampler *self);
+PyObject *PyPerfSampler_tid(PyPerfEventSampler *self);
+PyObject *PyPerfSampler_tid_comm(PyPerfEventSampler *self, PyObject *args, PyObject *kwargs);
+PyObject *PyPerfSampler_time(PyPerfEventSampler *self);
+PyObject *PyPerfSampler_cpu(PyPerfEventSampler *self);
+PyObject *PyPerfSampler_nr(PyPerfEventSampler *self);
+PyObject *PyPerfSampler_ips(PyPerfEventSampler *self);
+
+void py_perf_handle_free(struct perf_handle *perf);
+int py_perf_handle_destroy(struct perf_handle *perf);
+
+void py_perf_sample_free(struct perf_event_sample *sample);
+
+#endif
diff --git a/src/perfpy.c b/src/perfpy.c
new file mode 100644
index 0000000..a6b2042
--- /dev/null
+++ b/src/perfpy.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+// Python
+#include <Python.h>
+
+// libperf
+#include <perf/core.h>
+#include <perf/evsel.h>
+#include <perf/mmap.h>
+#include <perf/event.h>
+
+// trace-cruncher
+#include "common.h"
+#include "perfpy-utils.h"
+
+extern PyObject *PERF_ERROR;
+
+static PyMethodDef PyPerf_methods[] = {
+	{"enable",
+	 (PyCFunction) PyPerf_enable,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "start sampling"
+	},
+	{"disable",
+	 (PyCFunction) PyPerf_disable,
+	 METH_NOARGS,
+	 "stop sampling"
+	},
+	{"get_samples",
+	 (PyCFunction) PyPerf_getSamples,
+	 METH_NOARGS,
+	 "get recorded samples"
+	},
+	{NULL}
+};
+C_OBJECT_WRAPPER(perf_handle, PyPerf, py_perf_handle_destroy, py_perf_handle_free);
+
+static PyMethodDef PyPerfEventSampler_methods[] = {
+	{"id",
+	 (PyCFunction) PyPerfSampler_id,
+	 METH_NOARGS,
+	 "get sample id"
+	},
+	{"ip",
+	 (PyCFunction) PyPerfSampler_ip,
+	 METH_NOARGS,
+	 "get sample ip"
+	},
+	{"pid",
+	 (PyCFunction) PyPerfSampler_pid,
+	 METH_NOARGS,
+	 "get sample pid"
+	},
+	{"tid",
+	 (PyCFunction) PyPerfSampler_tid,
+	 METH_NOARGS,
+	 "get sample tid"
+	},
+	{"tid_comm",
+	 (PyCFunction) PyPerfSampler_tid_comm,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "get sample tid"
+	},
+	{"time",
+	 (PyCFunction) PyPerfSampler_time,
+	 METH_NOARGS,
+	 "get sample timestamp"
+	},
+	{"cpu",
+	 (PyCFunction) PyPerfSampler_cpu,
+	 METH_NOARGS,
+	 "get sample cpu"
+	},
+	{"stack_count",
+	 (PyCFunction) PyPerfSampler_nr,
+	 METH_NOARGS,
+	 "get sample stack count"
+	},
+	{"stack",
+	 (PyCFunction) PyPerfSampler_ips,
+	 METH_NOARGS,
+	 "get sample stack"
+	},
+	{NULL}
+};
+C_OBJECT_WRAPPER(perf_event_sample, PyPerfEventSampler, NO_DESTROY, py_perf_sample_free);
+
+static PyMethodDef perfpy_methods[] = {
+	{"sampler_instance",
+	 (PyCFunction) PyPerfSampler_new,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "Allocate new perf sampler instance"
+	},
+	{NULL}
+};
+
+static int perf_error_print(enum libperf_print_level level,
+			    const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+static struct PyModuleDef perfpy_module = {
+	PyModuleDef_HEAD_INIT,
+	"perfpy",
+	"Python interface for Perf.",
+	-1,
+	perfpy_methods
+};
+
+PyMODINIT_FUNC PyInit_perfpy(void)
+{
+
+	if (!PyPerfTypeInit())
+		return NULL;
+	if (!PyPerfEventSamplerTypeInit())
+		return NULL;
+
+	PERF_ERROR = PyErr_NewException("tracecruncher.perfpy.perf_error",
+					NULL, NULL);
+
+	PyObject *module = PyModule_Create(&perfpy_module);
+
+	PyModule_AddObject(module, "perf_error", PERF_ERROR);
+	PyModule_AddObject(module, "perf_handle", (PyObject *) &PyPerfType);
+	PyModule_AddObject(module, "perf_event_sample", (PyObject *) &PyPerfEventSamplerType);
+
+	if (geteuid() != 0) {
+		PyErr_SetString(PERF_ERROR,
+				"Permission denied. Root privileges are required.");
+		return NULL;
+	}
+
+	libperf_init(perf_error_print);
+
+	return module;
+}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC PATCH v2 3/3] trace-cruncher: perf example
  2022-04-08 10:03 [RFC PATCH v2 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
  2022-04-08 10:03 ` [RFC PATCH v2 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
  2022-04-08 10:03 ` [RFC PATCH v2 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
@ 2022-04-08 10:03 ` Tzvetomir Stoyanov (VMware)
  2 siblings, 0 replies; 6+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-04-08 10:03 UTC (permalink / raw)
  To: y.karadz; +Cc: rostedt, linux-trace-devel

Example python program for using trace-cruncher to collect performance
statistics of a given process.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 examples/perf_sampling.py | 53 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100755 examples/perf_sampling.py

diff --git a/examples/perf_sampling.py b/examples/perf_sampling.py
new file mode 100755
index 0000000..6219bff
--- /dev/null
+++ b/examples/perf_sampling.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+
+"""
+SPDX-License-Identifier: CC-BY-4.0
+
+Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+"""
+
+import sys
+import time
+import shutil
+
+import tracecruncher.perfpy as perf
+
+def SortKey(sample):
+    return sample.time()
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print('Usage: ', sys.argv[0], ' [PROCESS]')
+        sys.exit(1)
+
+    # Create perf sample object for the given process
+    if sys.argv[1].isdigit():
+        p = perf.sampler_instance(pid=int(sys.argv[1]), freq=99)
+    else:
+        file = shutil.which(sys.argv[1])
+        if file is None:
+            print('Cannot find ', sys.argv[1], ' in the system')
+            sys.exit(1)
+        p = perf.sampler_instance(command=file, argv=sys.argv[1:], freq=99)
+
+    print('Start collecting performance data, press ctrl+c  to stop')
+    # Start collecting performance traces
+    p.enable(wait=True)
+
+    # Get collected samples
+    samples = p.get_samples()
+    # Sort the list based on the timestamp
+    samples.sort(key=SortKey)
+    time = 0
+    ip_count = 0
+    for s in samples:
+        # Print PID, TID, time and trace depth of each sample
+        if time == 0:
+            time = s.time()
+        print("{0} {1} ({2}), +{3}:".format(s.ip(), s.tid(), s.tid_comm(), s.time() - time))
+        ips = s.stack()
+        ip_count += len(ips)
+        for ip in reversed(ips):
+            # Print stack trace of the sample
+            print("\t{0}".format(ip))
+    print("\nCollected {0} samples, {1} ip traces".format(len(samples), ip_count))
\ No newline at end of file
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH v2 2/3] trace-cruncher: Support for perf
  2022-04-08 10:03 ` [RFC PATCH v2 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
@ 2022-04-14 12:58   ` Yordan Karadzhov
  2022-04-15 11:35     ` Tzvetomir Stoyanov
  0 siblings, 1 reply; 6+ messages in thread
From: Yordan Karadzhov @ 2022-04-14 12:58 UTC (permalink / raw)
  To: Tzvetomir Stoyanov (VMware); +Cc: rostedt, linux-trace-devel



On 8.04.22 г. 13:03 ч., Tzvetomir Stoyanov (VMware) wrote:
> Initial perf support for trace-cruncher, using libperf. As a first
> stage, collecting of stack trace samples of given process is supported.
> 
> Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> ---
>   setup.py           |   9 +-
>   src/perfpy-utils.c | 896 +++++++++++++++++++++++++++++++++++++++++++++
>   src/perfpy-utils.h |  43 +++
>   src/perfpy.c       | 141 +++++++
>   4 files changed, 1087 insertions(+), 2 deletions(-)
>   create mode 100644 src/perfpy-utils.c
>   create mode 100644 src/perfpy-utils.h
>   create mode 100644 src/perfpy.c
> 
> diff --git a/setup.py b/setup.py
> index 21c627f..8f9d006 100644
> --- a/setup.py
> +++ b/setup.py
> @@ -41,7 +41,8 @@ def third_party_paths():
>       include_dirs = [np.get_include()]
>       libs_required = [('libtraceevent', '1.5.0'),
>                        ('libtracefs',    '1.3.0'),
> -                     ('libkshark',     '2.0.1')]
> +                     ('libkshark',     '2.0.1'),
> +                     ('libperf',     '0.0.1')]
>       libs_found = []
>   
>       for lib in libs_required:
> @@ -83,6 +84,10 @@ def main():
>                             sources=['src/ksharkpy.c', 'src/ksharkpy-utils.c'],
>                             libraries=['kshark'])
>   
> +    module_perf = extension(name='tracecruncher.perfpy',
> +                            sources=['src/perfpy.c', 'src/perfpy-utils.c', 'src/trace-obj-debug.c'],
> +                            libraries=['traceevent', 'perf', 'bfd'])
> +
>       setup(name='tracecruncher',
>             version='0.2.0',
>             description='Interface for accessing Linux tracing data in Python.',
> @@ -91,7 +96,7 @@ def main():
>             url='https://github.com/vmware/trace-cruncher',
>             license='LGPL-2.1',
>             packages=find_packages(),
> -          ext_modules=[module_ft, module_data, module_ks],
> +          ext_modules=[module_ft, module_data, module_ks, module_perf],
>             classifiers=[
>                 'Development Status :: 4 - Beta',
>                 'Programming Language :: Python :: 3',
> diff --git a/src/perfpy-utils.c b/src/perfpy-utils.c
> new file mode 100644
> index 0000000..4d30596
> --- /dev/null
> +++ b/src/perfpy-utils.c
> @@ -0,0 +1,896 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +
> +/*
> + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> + */
> +
> +#ifndef _GNU_SOURCE
> +/** Use GNU C Library. */
> +#define _GNU_SOURCE
> +#endif // _GNU_SOURCE
> +
> +// C
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <pthread.h>
> +#include <sys/stat.h>
> +#include <sys/wait.h>
> +#include <fcntl.h>
> +#include <dirent.h>
> +#include <sys/resource.h>
> +#include <semaphore.h>
> +#include <signal.h>
> +
> +// libperf
> +#include <linux/perf_event.h>
> +#include <perf/evlist.h>
> +#include <perf/evsel.h>
> +#include <perf/cpumap.h>
> +#include <perf/threadmap.h>
> +#include <perf/mmap.h>
> +#include <perf/core.h>
> +#include <perf/event.h>
> +
> +// trace-cruncher
> +#include "perfpy-utils.h"
> +#include "trace-obj-debug.h"
> +
> +PyObject * PERF_ERROR;
> +
> +#define TMP_FILE "/tmp/perf_temp_data.XXXXXX"
> +
> +struct perf_scan_thread {
> +	uint32_t tid;
> +	char *comm;
> +	struct perf_counts_values count;
> +};
> +
> +struct perf_handle {
> +	bool running;
> +	bool debug_resolved;
> +	pthread_t reader;
> +	int fd;
> +	int thr_count;
> +	uint32_t pid;
> +	uint32_t trace_time; /* in msec */
> +	char *command;
> +	char **argv;
> +	struct perf_scan_thread *thr_map;
> +	struct perf_evlist *evlist;
> +	struct perf_evsel *evsel;
> +	struct perf_event_attr attr;
> +	struct perf_cpu_map *cpus;
> +	struct dbg_trace_context *debug;
> +	struct perf_thread_map *threads;
> +};
> +
> +struct event_sample {
> +	uint64_t		id;		/* PERF_SAMPLE_IDENTIFIER */
> +	uint64_t		ip;		/* PERF_SAMPLE_IP */
> +	uint32_t		pid, tid;	/* PERF_SAMPLE_TID */
> +	uint64_t		time;		/* PERF_SAMPLE_TIME */
> +	uint32_t		cpu, res;	/* PERF_SAMPLE_CPU */
> +	uint64_t		nr;
> +	uint64_t		ips[];		/* PERF_SAMPLE_CALLCHAIN */
> +} __attribute__((packed));
> +
> +struct perf_event_sample {
> +	struct event_sample	data;
> +	struct perf_handle	*perf;
> +	char			*ip;
> +	char			*ips[];
> +} __attribute__((packed));
> +
> +int py_perf_handle_destroy(struct perf_handle *perf)
> +{
> +	if (!perf || !perf->running)
> +		return 0;
> +
> +	perf->running = false;
> +	pthread_join(perf->reader, NULL);
> +	fsync(perf->fd);
> +	if (perf->command && perf->pid > 0) {
> +		kill(perf->pid, SIGINT);
> +		perf->pid = 0;

Maybe we can free 'perf->command' and set it to NULL here.

> +	}
> +
> +	return 0;
> +}
> +
> +void py_perf_handle_free(struct perf_handle *perf)
> +{
> +	int i;
> +
> +	if (!perf)
> +		return;
> +
> +	if (perf->evlist)
> +		perf_evlist__delete(perf->evlist);
> +	if (perf->fd >= 0)
> +		close(perf->fd);
> +	if (perf->debug)
> +		dbg_trace_context_destroy(perf->debug);
> +	if (perf->cpus)
> +		perf_cpu_map__put(perf->cpus);
> +	if (perf->threads)
> +		perf_thread_map__put(perf->threads);
> +	if (perf->thr_map) {
> +		for (i = 0; i < perf->thr_count; i++)
> +			free(perf->thr_map[i].comm);
> +		free(perf->thr_map);
> +	}
> +	if (perf->argv) {
> +		i = 0;
> +		while (perf->argv[i])
> +			free(perf->argv[i++]);
> +		free(perf->argv);
> +	}
> +	free(perf->command);
> +
> +	free(perf);
> +}
> +
> +void py_perf_sample_free(struct perf_event_sample *sample)
> +{
> +	unsigned int i;
> +
> +	if (sample) {
> +		free(sample->ip);
> +		for (i = 0; i < sample->data.nr; i++)
> +			free((char *)(sample->ips[i]));
> +	}
> +	free(sample);
> +}
> +
> +static int pid_filter(const struct dirent *dir)
> +{
> +	const char *dname = dir->d_name;
> +
> +	if (!dname || dname[0] == '.')
> +		return 0;
> +
> +	while (*dname) {
> +		if (!isdigit(*dname))
> +			return 0;
> +		dname++;
> +	}
> +
> +	return 1;
> +}
> +
> +static  int str_read_file(const char *file, char **buffer)
> +{
> +	char stbuf[BUFSIZ];
> +	char *buf = NULL;
> +	int size = 0;
> +	char *nbuf;
> +	int fd;
> +	int r;
> +
> +	fd = open(file, O_RDONLY);
> +	if (fd < 0)
> +		return -1;
> +
> +	do {
> +		r = read(fd, stbuf, BUFSIZ);
> +		if (r <= 0)
> +			continue;

I wonder why you use 'continue' instead of 'break'?
If you use 'break', we don't even need to have 'do {} while()'.
We can use just while(1) or for(;;) for the loop.

> +		nbuf = realloc(buf, size+r+1);
> +		if (!nbuf) {
> +			size = -1;
> +			break;
> +		}
> +		buf = nbuf;
> +		memcpy(buf+size, stbuf, r);
> +		size += r;
> +	} while (r > 0);
> +
> +	close(fd);
> +	if (r == 0 && size > 0) {
> +		buf[size] = '\0';
> +		*buffer = buf;
> +	} else
> +		free(buf);
> +
> +	return size;
> +}
> +
> +static void strip_control_chars(char *str)
> +{
> +	while (*str) {
> +		if (iscntrl(*str)) {
> +			*str = '\0';
> +			break;
> +		}
> +		str++;
> +	}
> +}
> +
> +static struct perf_thread_map *create_thread_map(struct perf_handle *perf)
> +{
> +	struct perf_thread_map *tmap = NULL;
> +	struct dirent **pids = NULL;
> +	char path[PATH_MAX];
> +	int i, count;
> +
> +	snprintf(path, PATH_MAX, "/proc/%d/task", perf->pid);
> +	count = scandir(path, &pids, pid_filter, NULL);
> +	if (count < 1)
> +		goto out;
> +
> +	tmap = perf_thread_map__new_array(count, NULL);
> +	if (!tmap)
> +		goto out;
> +	free(perf->thr_map);
> +	perf->thr_map = calloc(count, sizeof(struct perf_scan_thread));
> +	if (!perf->thr_map)

Isn't this an error? Maybe we have to free 'tmap' end set it to NULL.

> +		goto out;
> +	perf->thr_count = count;
> +
> +	for (i = 0; i < count; i++) {
> +		perf->thr_map[i].tid = atoi(pids[i]->d_name);
> +		perf_thread_map__set_pid(tmap, i, perf->thr_map[i].tid);
> +		snprintf(path, PATH_MAX, "/proc/%d/task/%s/comm", perf->pid, pids[i]->d_name);
> +		str_read_file(path, &perf->thr_map[i].comm);

Check the return value for error.

> +		strip_control_chars(perf->thr_map[i].comm);
> +	}
> +
> +out:
> +	if (pids) {
> +		for (i = 0; i < count; i++)
> +			free(pids[i]);
> +		free(pids);
> +	}
> +
> +	return tmap;
> +}
> +
> +static struct perf_handle *new_perf_sampling_handle(int freq, pid_t pid, char *command, char **argv)
> +{
> +	struct perf_handle *perf = NULL;
> +	char *tmp_file = NULL;
> +
> +	perf = calloc(1, sizeof(*perf));
> +	if (!perf)
> +		return NULL;
> +
> +	perf->fd = -1;
> +	perf->attr.type        = PERF_TYPE_HARDWARE;
> +	perf->attr.config      = PERF_COUNT_HW_CPU_CYCLES;
> +	perf->attr.disabled    = 1;
> +	perf->attr.freq        = 1;
> +	perf->attr.sample_freq = freq;
> +	perf->attr.exclude_kernel = 1;
> +	perf->attr.exclude_idle = 1;
> +	perf->attr.exclude_callchain_kernel = 1;
> +	perf->attr.comm = 1;
> +	perf->attr.mmap2 = 1;
> +	perf->attr.task = 1;
> +	perf->attr.precise_ip = 0;
> +	perf->attr.inherit = 1;
> +	perf->attr.task = 1;
> +	perf->attr.inherit_stat = 1;
> +	perf->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
> +				 PERF_FORMAT_TOTAL_TIME_RUNNING;
> +	perf->attr.sample_type = PERF_SAMPLE_IDENTIFIER |
> +				 PERF_SAMPLE_IP |
> +				 PERF_SAMPLE_TID |
> +				 PERF_SAMPLE_TIME |
> +				 PERF_SAMPLE_CPU |
> +				 PERF_SAMPLE_CALLCHAIN;
> +
> +	/* trace all CPUs in the system */

	/* Trace all CPUs in the system. */
> +	perf->cpus = perf_cpu_map__new(NULL);
> +	if (!perf->cpus)
> +		goto error;
> +
> +	if (command) {
> +		perf->command = strdup(command);
> +		if (!perf->command)
> +			goto error;
> +		perf->argv = argv;
> +		perf->debug = dbg_trace_context_create_file(command, true);
> +	} else {
> +		perf->pid = pid;
> +		perf->debug = dbg_trace_context_create_pid(pid, true);
> +	}
> +	if (!perf->debug)
> +		goto error;
> +	perf->debug_resolved = false;
> +
> +	perf->evlist = perf_evlist__new();
> +	if (!perf->evlist)
> +		goto error;
> +
> +	tmp_file = strdup(TMP_FILE);
> +	if (!tmp_file)
> +		goto error;
> +
> +	mktemp(tmp_file);
> +	perf->fd = open(tmp_file, O_RDWR|O_CREAT|O_TRUNC, 0600);
> +	unlink(tmp_file);
> +	if (perf->fd < 0)
> +		goto error;
> +
> +	perf->evsel = perf_evsel__new(&perf->attr);
> +	if (!perf->evsel)
> +		goto error;
> +
> +	perf_evlist__add(perf->evlist, perf->evsel);
> +
> +	free(tmp_file);
> +	return perf;
> +
> +error:
> +	py_perf_handle_free(perf);
> +	free(tmp_file);
> +	return NULL;
> +}
> +
> +PyObject *PyPerfSampler_new(PyObject *self, PyObject *args, PyObject *kwargs)

The name of this function has to be 'PyPerf_sampler_instance'
> +{
> +	char *kwlist[] = {"pid", "command", "freq", "argv", NULL};

'command' argument look redundant. It is the same as argv[0]

> +	PyObject *py_perf, *py_arg, *py_args = NULL;
> +	struct perf_handle *perf = NULL;
> +	int freq = 10, pid = 0;
> +	char *command = NULL;
> +	char **argv = NULL;
> +	int i, argc;
> +
> +	if (!PyArg_ParseTupleAndKeywords(args,
> +					 kwargs,
> +					 "|isiO",
> +					 kwlist,
> +					 &pid,
> +					 &command,
> +					 &freq,
> +					 &py_args
> +					 )) {
> +		return NULL;
> +	}
> +
> +	if (pid == 0 && !command) {

We have to handle also the case when both 'pid' and 'command' (argv) are provided.
Also the case when 'pid' is negative.

> +		PyErr_Format(PERF_ERROR, "PID or command must be specified");
> +		return NULL;
> +	}
> +
> +	if (command && py_args) {
> +		if (!PyList_CheckExact(py_args)) {
> +			PyErr_SetString(PERF_ERROR, "Failed to parse argv list");
> +			return NULL;
> +		}
> +		argc = PyList_Size(py_args);
> +		argv = calloc(argc + 1, sizeof(char *));
> +		for (i = 0; i < argc; i++) {
> +			py_arg = PyList_GetItem(py_args, i);
> +			if (!PyUnicode_Check(py_arg))
> +				continue;
> +			argv[i] = strdup(PyUnicode_DATA(py_arg));
> +			if (!argv[i])
> +				return NULL;
> +		}
> +		argv[i] = NULL;
This was allocated using calloc(). No need to set it to NULL.

> +	}
> +
> +	perf = new_perf_sampling_handle(freq, pid, command, argv);
> +
> +	if (!perf) {
> +		PyErr_SetString(PERF_ERROR, "Failed create new perf context");
> +		return NULL;
> +	}
> +
> +	py_perf = PyPerf_New(perf);
> +
> +	return py_perf;
> +}
> +
> +static void perf_read_sample(struct perf_handle *perf, struct perf_record_sample *event)
> +{
> +	struct event_sample *sample;
> +	uint64_t i;
> +
> +	sample = (struct event_sample *)(event->array);
> +
> +	/* check if the sample is for our PID */
> +	if (sample->pid != perf->pid)
> +		return;
> +
> +	if (perf->debug)
> +		dbg_trace_add_resolve_symbol(perf->debug, sample->ip, NULL, 0);
> +
> +	if (write(perf->fd, sample, sizeof(*sample)) != sizeof(*sample))
> +		return;
> +
> +	for (i = 0; i < sample->nr; i++) {
> +		if (write(perf->fd, &sample->ips[i], sizeof(uint64_t)) != sizeof(uint64_t))
> +			return;
> +		if (perf->debug)
> +			dbg_trace_add_resolve_symbol(perf->debug, sample->ips[i], NULL, 0);
> +	}
> +}
> +
> +/* A new memory is mapped to traced process */
> +static void perf_read_mmap2(struct perf_handle *perf, struct perf_record_mmap2 *mmap)
> +{
> +	/* check if mmap is for our PID */
> +	if (perf->pid != mmap->pid)
> +		return;
> +
> +	/* check if executable memory is mapped */
> +	if (mmap->header.misc & PERF_RECORD_MISC_MMAP_DATA)
> +		return;
> +
> +	/*
> +	 * A new dynamic library is dlopen() by the traced process,
> +	 * store it for vma -> name resolving
> +	 */
> +	dbg_trace_context_add_file(perf->debug, mmap->filename,
> +				   mmap->start, mmap->start + mmap->len, mmap->pgoff);
> +}
> +
> +/* A new thread is started */
> +static void perf_read_comm(struct perf_handle *perf, struct perf_record_comm *comm)
> +{
> +	struct perf_scan_thread *tmp;
> +	int i;
> +
> +	/* check if the thread is started by PID */
> +	if (perf->pid != comm->pid)
> +		return;
> +
> +	for (i = 0; i < perf->thr_count; i++) {
> +		if (perf->thr_map[i].tid == comm->tid) {
> +			free(perf->thr_map[i].comm);
> +			perf->thr_map[i].comm = strdup(comm->comm);
> +			return;
> +		}
> +	}
> +
> +	tmp = realloc(perf->thr_map, (perf->thr_count + 1) * sizeof(struct perf_scan_thread));
> +	if (!tmp)
> +		return;
> +
> +	perf->thr_map = tmp;
> +	perf->thr_map[perf->thr_count].tid = comm->tid;
> +	perf->thr_map[perf->thr_count].comm = strdup(comm->comm);
> +	perf->thr_count++;
> +}
> +
> +static void *perf_reader_thread(void *data)
> +{
> +	struct perf_handle *perf = data;
> +	struct perf_mmap *map;
> +	union perf_event *event;
> +
> +	perf_evlist__enable(perf->evlist);
> +
> +	while (true) {
> +		if (!perf->running)
> +			break;
> +		perf_evlist__for_each_mmap(perf->evlist, map, false) {
> +			if (perf_mmap__read_init(map) < 0)
> +				continue;
> +
> +			while ((event = perf_mmap__read_event(map)) != NULL) {
> +
empty line^

> +				switch (event->sample.header.type) {
> +				case PERF_RECORD_SAMPLE:
> +					perf_read_sample(perf, (struct perf_record_sample *)event);
> +					break;
> +				case PERF_RECORD_COMM:
> +					perf_read_comm(perf, (struct perf_record_comm *)event);
> +					break;
> +				case PERF_RECORD_MMAP2:
> +					perf_read_mmap2(perf, (struct perf_record_mmap2 *)event);
> +					break;
> +				}
> +
> +				perf_mmap__consume(map);
> +			}
> +
> +			perf_mmap__read_done(map);
> +		}
> +	}
> +	perf_evlist__disable(perf->evlist);
> +	pthread_exit(0);
> +}
> +
> +static int increase_file_limit(void)
> +{
> +	struct rlimit lim;
> +
> +	if (getrlimit(RLIMIT_NOFILE, &lim))
> +		return -1;
> +
> +	if (lim.rlim_cur < lim.rlim_max) {
> +		lim.rlim_cur = lim.rlim_max;
> +	} else {
> +		lim.rlim_cur += 100;
> +		lim.rlim_max += 100;

I wonder where this number 100 comes from?

> +	}
> +
> +	return setrlimit(RLIMIT_NOFILE, &lim);
> +}
> +
> +static int perf_maps_init(struct perf_handle *perf)
> +{
> +	int ret;
> +
> +	if (!perf->threads)
> +		perf->threads = create_thread_map(perf);
> +	if (!perf->threads)
> +		return -1;
> +
> +	perf_evlist__set_maps(perf->evlist, perf->cpus, perf->threads);
> +	do {
> +		ret = perf_evlist__open(perf->evlist);
> +		if (!ret)
> +			break;
> +		if (ret != -EMFILE)
> +			goto out;
> +		ret = increase_file_limit();
> +		if (ret)
> +			goto out;
> +	} while (ret);
> +

This loops seems over-complicated. Something like this reads easier (at least for me)

	for(;;) {
		if (perf_evlist__open(perf->evlist) == 0)
			break;

		if (ret != -EMFILE)
			goto out;

		if (increase_file_limit() != 0)
			goto out;
	}

> +	ret = perf_evlist__mmap(perf->evlist, 4);
> +out:
> +	if (ret)
> +		perf_evlist__close(perf->evlist);
> +	return ret;
> +}
> +
> +static int perf_reader_start(struct perf_handle *perf)
> +{
> +	pthread_attr_t attrib;
> +	int ret;
> +
> +	pthread_attr_init(&attrib);
> +	pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE);
> +	ret = pthread_create(&perf->reader, &attrib, perf_reader_thread, perf);
> +	if (!ret)
> +		perf->running = true;
> +
> +	pthread_attr_destroy(&attrib);
> +	return ret;
> +}
> +
> +#define PERF_EXEC_SYNC	"/TC_PERF_SYNC_XXXXXX"
> +static int perf_run_cmd(struct perf_handle *perf)
> +{
> +	char *envp[] = {NULL};
> +	char sname[strlen(PERF_EXEC_SYNC) + 1];
> +	sem_t *sem;
> +	pid_t pid;
> +	int ret;
> +
> +	strcpy(sname, PERF_EXEC_SYNC);
> +	mktemp(sname);
> +	sem = sem_open(sname, O_CREAT | O_EXCL, 0644, 0);
> +	sem_unlink(sname);
> +
> +	pid = fork();
> +	if (pid < 0)
> +		return -1;
> +	if (pid == 0) {
> +		sem_wait(sem);
> +		execvpe(perf->command, perf->argv, envp);
> +	} else {
> +		perf->pid = pid;
> +		ret = perf_maps_init(perf);
> +		if (!ret)
> +			ret = perf_reader_start(perf);
> +		sem_post(sem);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int perf_start_sampling(struct perf_handle *perf)
> +{
> +	int ret;
> +
> +	if (perf->running)
> +		return 0;
> +
> +	if (perf->command)
> +		return perf_run_cmd(perf);
> +
> +	ret = perf_maps_init(perf);
> +	if (!ret)
> +		ret = perf_reader_start(perf);
> +
> +	return ret;
> +}
> +
> +static bool sampling_run;
> +
> +static void sampling_stop(int sig)
> +{
> +	sampling_run = false;
> +}
> +
> +static void sampling_timer(int sig, siginfo_t *si, void *uc)
> +{
> +	sampling_run = false;
> +}
> +
> +#define PID_WAIT_CHECK_USEC	500000
> +#define TIMER_SEC_NANO		1000000000LL
> +static int perf_wait_pid(struct perf_handle *perf)
> +{
> +	struct itimerspec tperiod = {0};
> +	struct sigaction saction = {0};
> +	struct sigevent stime = {0};
> +	timer_t timer_id;
> +
> +	if (perf->pid == 0)
> +		return -1;
> +
> +	sampling_run = true;
> +	signal(SIGINT, sampling_stop);
> +
> +	if (perf->trace_time) {
> +		stime.sigev_notify = SIGEV_SIGNAL;
> +		stime.sigev_signo = SIGRTMIN;
> +		if (timer_create(CLOCK_MONOTONIC, &stime, &timer_id))
> +			return -1;
> +		saction.sa_flags = SA_SIGINFO;
> +		saction.sa_sigaction = sampling_timer;
> +		sigemptyset(&saction.sa_mask);
> +		if (sigaction(SIGRTMIN, &saction, NULL)) {
> +			timer_delete(timer_id);
> +			return -1;
> +		}
> +		/* covert trace_time from msec to sec, nsec */
> +		tperiod.it_value.tv_nsec = ((unsigned long long)perf->trace_time * 1000000LL);
> +		if (tperiod.it_value.tv_nsec >= TIMER_SEC_NANO) {
> +			tperiod.it_value.tv_sec = tperiod.it_value.tv_nsec / TIMER_SEC_NANO;
> +			tperiod.it_value.tv_nsec %= TIMER_SEC_NANO;
> +		}
> +		if (timer_settime(timer_id, 0, &tperiod, NULL))
> +			return -1;
> +	}
> +
> +	do {
> +		if (perf->command) { /* wait for a child */
> +			if (waitpid(perf->pid, NULL, WNOHANG) == (int)perf->pid) {
> +				perf->pid = 0;
> +				sampling_run = false;
> +			}
> +		} else { /* not a child, check if still exist */
> +			if (kill(perf->pid, 0) == -1 && errno == ESRCH) {
> +				perf->pid = 0;
> +				sampling_run = false;
> +			}
> +		}
> +		usleep(PID_WAIT_CHECK_USEC);
> +	} while (sampling_run);
> +
> +	if (perf->trace_time)
> +		timer_delete(timer_id);
> +
> +	signal(SIGINT, SIG_DFL);
> +
> +	return 0;
> +}
> +
> +PyObject *PyPerf_enable(PyPerf *self, PyObject *args, PyObject *kwargs)
> +{
> +	char *kwlist[] = {"wait", "time", NULL};
> +	struct perf_handle *perf = self->ptrObj;
> +	int wait = false;
> +	int ret;
> +
> +	if (perf->running) {
> +		PyErr_Format(PERF_ERROR, "Perf sampling is already started");
> +		return NULL;
> +	}
> +
> +	perf->trace_time = 0;
> +	if (!PyArg_ParseTupleAndKeywords(args,
> +					 kwargs,
> +					 "|pi",
> +					 kwlist,
> +					 &wait,
> +					 &perf->trace_time)) {
> +		return NULL;
> +	}
> +
> +	ret = perf_start_sampling(perf);
> +	if (ret) {
> +		PyErr_Format(PERF_ERROR,
> +			     "Failed to start perf sampling - %s", strerror(-ret));
> +		return NULL;
> +	}
> +
> +	if (wait) {
> +		perf_wait_pid(perf);
> +		py_perf_handle_destroy(perf);
> +	}
> +
> +	Py_RETURN_NONE;
> +}
> +
> +PyObject *PyPerf_disable(PyPerf *self)
> +{
> +	struct perf_handle *perf = self->ptrObj;
> +
> +	if (!perf->running) {
> +		PyErr_Format(PERF_ERROR, "Perf reader is not started");
> +		return NULL;
> +	}
> +
> +	py_perf_handle_destroy(perf);
> +
> +	Py_RETURN_NONE;
> +}
> +
> +struct symb_walk {
> +	uint64_t ip;
> +	char *name;
> +};
> +
> +static int sym_get(struct dbg_trace_symbols *symb, void *data)
> +{
> +	struct symb_walk *s = (struct symb_walk *)data;
> +
> +	if (s->ip == symb->vma_near) {
> +		if (symb->name)
> +			asprintf(&s->name, "%s @ %s", symb->name, symb->fname);
> +		else
> +			asprintf(&s->name, "(0x%llX) @ %s", symb->vma_near, symb->fname);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static char *ip_name(struct perf_handle *perf, uint64_t ip)
> +{
> +	struct symb_walk symb;
> +
> +	symb.ip = ip;
> +	symb.name = NULL;
> +	if (perf && perf->debug)
> +		dbg_trace_walk_resolved_symbols(perf->debug, sym_get, &symb);
> +
> +	if (!symb.name)
> +		asprintf(&symb.name, "0x%lX", ip);
> +
> +	return symb.name;
> +}
> +
> +PyObject *PyPerf_getSamples(PyPerf *self)
> +{
> +	struct perf_handle *perf = self->ptrObj;
> +	struct event_sample sample;
> +	struct perf_event_sample *store;
> +	PyObject *slist, *sobject;
> +	uint64_t i, ip;
> +	int ca = 0, cs = 0;
> +
> +	if (perf->running) {
> +		PyErr_Format(PERF_ERROR, "Perf reader is running");
> +		return NULL;
> +	}
> +
> +	if (perf->debug && !perf->debug_resolved) {
> +		dbg_trace_resolve_symbols(perf->debug);
> +		perf->debug_resolved = true;
> +	}
> +
> +	if (lseek(perf->fd, 0, SEEK_SET) == (off_t)-1) {
> +		PyErr_Format(PERF_ERROR, "No samples");
> +		return NULL;
> +	}
> +
> +	slist = PyList_New(0);
> +	do {
> +		if (read(perf->fd, &sample, sizeof(sample)) != sizeof(sample))
> +			break;
> +		ca++;
> +		store = calloc(1, sizeof(*store) + (sample.nr * sizeof(char *)));
> +		if (!store)
> +			break;
> +		memcpy(&store->data, &sample, sizeof(sample));
> +		store->perf = perf;
> +		store->ip = ip_name(perf, store->data.ip);
> +		for (i = 0; i < sample.nr; i++) {
> +			if (read(perf->fd, &ip, sizeof(uint64_t)) != sizeof(uint64_t))
> +				break;
> +			store->ips[i] = ip_name(perf, ip);
> +		}
> +		cs += sample.nr;
> +		if (i < sample.nr)
> +			break;
> +		sobject = PyPerfEventSampler_New(store);
> +		PyList_Append(slist, sobject);
> +	} while (true);
> +	ftruncate(perf->fd, 0);
> +	return slist;
> +}
> +
> +PyObject *PyPerfSampler_id(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLongLong(sample->data.id);
> +}
> +
> +PyObject *PyPerfSampler_pid(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLong(sample->data.pid);
> +}
> +
> +PyObject *PyPerfSampler_tid(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLong(sample->data.tid);
> +}
> +
> +PyObject *PyPerfSampler_time(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLongLong(sample->data.time);
> +}
> +
> +PyObject *PyPerfSampler_cpu(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLong(sample->data.cpu);
> +}
> +
> +PyObject *PyPerfSampler_nr(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLongLong(sample->data.nr);
> +}
> +
> +PyObject *PyPerfSampler_ip(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyUnicode_FromString(sample->ip);
> +}
> +
> +PyObject *PyPerfSampler_tid_comm(PyPerfEventSampler *self, PyObject *args, PyObject *kwargs)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +	char *name = NULL;
> +	int i;
> +
> +	if (!sample->perf || !sample->perf->thr_count || !sample->perf->thr_map)
> +		Py_RETURN_NONE;
> +
> +	for (i = 0; i < sample->perf->thr_count; i++)
> +		if (sample->perf->thr_map[i].tid == sample->data.tid)
> +			break;
> +
> +	if (i < sample->perf->thr_count && sample->perf->thr_map[i].comm)
> +		name = sample->perf->thr_map[i].comm;
> +
> +	if (name)
> +		return PyUnicode_FromString(name);
> +
> +	Py_RETURN_NONE;
> +}
> +
> +PyObject *PyPerfSampler_ips(PyPerfEventSampler *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +	PyObject *slist;
> +	unsigned int i;
> +
> +	slist = PyList_New(0);
> +	for (i = 0 ; i < sample->data.nr; i++)
> +		PyList_Append(slist, PyUnicode_FromString((char *)sample->ips[i]));
> +
> +	return slist;
> +}
> diff --git a/src/perfpy-utils.h b/src/perfpy-utils.h
> new file mode 100644
> index 0000000..648a8ce
> --- /dev/null
> +++ b/src/perfpy-utils.h
> @@ -0,0 +1,43 @@
> +/* SPDX-License-Identifier: LGPL-2.1 */
> +
> +/*
> + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> + */
> +
> +#ifndef _TC_PERF_PY_UTILS
> +#define _TC_PERF_PY_UTILS
> +
> +// Python
> +#include <Python.h>
> +
> +// trace-cruncher
> +#include "common.h"
> +
> +struct perf_handle;
> +struct perf_event_sample;
> +
> +C_OBJECT_WRAPPER_DECLARE(perf_handle, PyPerf);
> +C_OBJECT_WRAPPER_DECLARE(perf_event_sample, PyPerfEventSampler);
> +
> +PyObject *PyPerfSampler_new(PyObject *self, PyObject *args, PyObject *kwargs);
> +
> +PyObject *PyPerf_enable(PyPerf *self, PyObject *args, PyObject *kwargs);
> +PyObject *PyPerf_disable(PyPerf *self);
> +PyObject *PyPerf_getSamples(PyPerf *self);
> +
> +PyObject *PyPerfSampler_id(PyPerfEventSampler *self);
> +PyObject *PyPerfSampler_ip(PyPerfEventSampler *self);
> +PyObject *PyPerfSampler_pid(PyPerfEventSampler *self);
> +PyObject *PyPerfSampler_tid(PyPerfEventSampler *self);
> +PyObject *PyPerfSampler_tid_comm(PyPerfEventSampler *self, PyObject *args, PyObject *kwargs);
> +PyObject *PyPerfSampler_time(PyPerfEventSampler *self);
> +PyObject *PyPerfSampler_cpu(PyPerfEventSampler *self);
> +PyObject *PyPerfSampler_nr(PyPerfEventSampler *self);
> +PyObject *PyPerfSampler_ips(PyPerfEventSampler *self);
> +
> +void py_perf_handle_free(struct perf_handle *perf);
> +int py_perf_handle_destroy(struct perf_handle *perf);
> +
> +void py_perf_sample_free(struct perf_event_sample *sample);
> +
> +#endif
> diff --git a/src/perfpy.c b/src/perfpy.c
> new file mode 100644
> index 0000000..a6b2042
> --- /dev/null
> +++ b/src/perfpy.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +
> +/*
> + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> + */
> +
> +// Python
> +#include <Python.h>
> +
> +// libperf
> +#include <perf/core.h>
> +#include <perf/evsel.h>
> +#include <perf/mmap.h>
> +#include <perf/event.h>
> +
> +// trace-cruncher
> +#include "common.h"
> +#include "perfpy-utils.h"
> +
> +extern PyObject *PERF_ERROR;
> +
> +static PyMethodDef PyPerf_methods[] = {
> +	{"enable",
> +	 (PyCFunction) PyPerf_enable,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "start sampling"
> +	},
> +	{"disable",
> +	 (PyCFunction) PyPerf_disable,
> +	 METH_NOARGS,
> +	 "stop sampling"
> +	},
> +	{"get_samples",
> +	 (PyCFunction) PyPerf_getSamples,
> +	 METH_NOARGS,
> +	 "get recorded samples"
> +	},
> +	{NULL}
> +};
> +C_OBJECT_WRAPPER(perf_handle, PyPerf, py_perf_handle_destroy, py_perf_handle_free);
> +
> +static PyMethodDef PyPerfEventSampler_methods[] = {
> +	{"id",
> +	 (PyCFunction) PyPerfSampler_id,
> +	 METH_NOARGS,
> +	 "get sample id"
> +	},
> +	{"ip",
> +	 (PyCFunction) PyPerfSampler_ip,
> +	 METH_NOARGS,
> +	 "get sample ip"
> +	},
> +	{"pid",
> +	 (PyCFunction) PyPerfSampler_pid,
> +	 METH_NOARGS,
> +	 "get sample pid"
> +	},
> +	{"tid",
> +	 (PyCFunction) PyPerfSampler_tid,
> +	 METH_NOARGS,
> +	 "get sample tid"
> +	},
> +	{"tid_comm",
> +	 (PyCFunction) PyPerfSampler_tid_comm,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "get sample tid"
> +	},
> +	{"time",
> +	 (PyCFunction) PyPerfSampler_time,
> +	 METH_NOARGS,
> +	 "get sample timestamp"
> +	},
> +	{"cpu",
> +	 (PyCFunction) PyPerfSampler_cpu,
> +	 METH_NOARGS,
> +	 "get sample cpu"
> +	},
> +	{"stack_count",
> +	 (PyCFunction) PyPerfSampler_nr,
> +	 METH_NOARGS,
> +	 "get sample stack count"
> +	},
> +	{"stack",
> +	 (PyCFunction) PyPerfSampler_ips,
> +	 METH_NOARGS,
> +	 "get sample stack"
> +	},
> +	{NULL}

So far I've been using as a convention that the 'C' function that implements a method of the Python module has a name 
that starts with the type of the module (or object from the module) as a prefix, followed by the name of the method itself.

For example the name of the function that implements 'stack()' must be 'PyPerfEventSampler_stack()'.

Thanks!
Yordan

> +};
> +C_OBJECT_WRAPPER(perf_event_sample, PyPerfEventSampler, NO_DESTROY, py_perf_sample_free);
> +
> +static PyMethodDef perfpy_methods[] = {
> +	{"sampler_instance",
> +	 (PyCFunction) PyPerfSampler_new,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "Allocate new perf sampler instance"
> +	},
> +	{NULL}
> +};
> +
> +static int perf_error_print(enum libperf_print_level level,
> +			    const char *fmt, va_list ap)
> +{
> +	return vfprintf(stderr, fmt, ap);
> +}
> +
> +static struct PyModuleDef perfpy_module = {
> +	PyModuleDef_HEAD_INIT,
> +	"perfpy",
> +	"Python interface for Perf.",
> +	-1,
> +	perfpy_methods
> +};
> +
> +PyMODINIT_FUNC PyInit_perfpy(void)
> +{
> +
> +	if (!PyPerfTypeInit())
> +		return NULL;
> +	if (!PyPerfEventSamplerTypeInit())
> +		return NULL;
> +
> +	PERF_ERROR = PyErr_NewException("tracecruncher.perfpy.perf_error",
> +					NULL, NULL);
> +
> +	PyObject *module = PyModule_Create(&perfpy_module);
> +
> +	PyModule_AddObject(module, "perf_error", PERF_ERROR);
> +	PyModule_AddObject(module, "perf_handle", (PyObject *) &PyPerfType);
> +	PyModule_AddObject(module, "perf_event_sample", (PyObject *) &PyPerfEventSamplerType);
> +
> +	if (geteuid() != 0) {
> +		PyErr_SetString(PERF_ERROR,
> +				"Permission denied. Root privileges are required.");
> +		return NULL;
> +	}
> +
> +	libperf_init(perf_error_print);
> +
> +	return module;
> +}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH v2 2/3] trace-cruncher: Support for perf
  2022-04-14 12:58   ` Yordan Karadzhov
@ 2022-04-15 11:35     ` Tzvetomir Stoyanov
  0 siblings, 0 replies; 6+ messages in thread
From: Tzvetomir Stoyanov @ 2022-04-15 11:35 UTC (permalink / raw)
  To: Yordan Karadzhov; +Cc: Steven Rostedt, Linux Trace Devel

On Thu, Apr 14, 2022 at 3:58 PM Yordan Karadzhov <y.karadz@gmail.com> wrote:
>
>
>
> On 8.04.22 г. 13:03 ч., Tzvetomir Stoyanov (VMware) wrote:
> > Initial perf support for trace-cruncher, using libperf. As a first
> > stage, collecting of stack trace samples of given process is supported.
> >
> > Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> > ---
> >   setup.py           |   9 +-
> >   src/perfpy-utils.c | 896 +++++++++++++++++++++++++++++++++++++++++++++
> >   src/perfpy-utils.h |  43 +++
> >   src/perfpy.c       | 141 +++++++
> >   4 files changed, 1087 insertions(+), 2 deletions(-)
> >   create mode 100644 src/perfpy-utils.c
> >   create mode 100644 src/perfpy-utils.h
> >   create mode 100644 src/perfpy.c
> >
> [  .. ]
> > +int py_perf_handle_destroy(struct perf_handle *perf)
> > +{
> > +     if (!perf || !perf->running)
> > +             return 0;
> > +
> > +     perf->running = false;
> > +     pthread_join(perf->reader, NULL);
> > +     fsync(perf->fd);
> > +     if (perf->command && perf->pid > 0) {
> > +             kill(perf->pid, SIGINT);
> > +             perf->pid = 0;
>
> Maybe we can free 'perf->command' and set it to NULL here.

Freeing the command string should be done in py_perf_handle_free(), it
looks more logical. The py_perf_handle_destroy() disables the running
perf samples. It should be possible to run sampling again on the same
command, by calling PyPerf_enable().

>
> > +     }
> > +
> > +     return 0;
> > +}
> > +
[ ... ]
> > +
> > +PyObject *PyPerfSampler_new(PyObject *self, PyObject *args, PyObject *kwargs)
>
> The name of this function has to be 'PyPerf_sampler_instance'
> > +{
> > +     char *kwlist[] = {"pid", "command", "freq", "argv", NULL};
>
> 'command' argument look redundant. It is the same as argv[0]

There is a little difference between 'command' and argv[0]. The
'command' argument is supposed to include the full path to the
application file that should be traced, while argv[0] could be just
the file name, without the path. Resolving the full path of the file
is done in the python code and passed to the API. For example, in:
  ./perf_sampling.py ls -al
'command' is '/usr/bin/ls'
argv[0] is 'ls'

>
> > +     PyObject *py_perf, *py_arg, *py_args = NULL;
> > +     struct perf_handle *perf = NULL;
> > +     int freq = 10, pid = 0;
> > +     char *command = NULL;
> > +     char **argv = NULL;
> > +     int i, argc;
> > +
> > +     if (!PyArg_ParseTupleAndKeywords(args,
> > +                                      kwargs,
> > +                                      "|isiO",
> > +                                      kwlist,
> > +                                      &pid,
> > +                                      &command,
> > +                                      &freq,
> > +                                      &py_args
> > +                                      )) {
> > +             return NULL;
> > +     }
> > +
> > +     if (pid == 0 && !command) {
>
> We have to handle also the case when both 'pid' and 'command' (argv) are provided.
> Also the case when 'pid' is negative.

I'll add a check for negative pid. In case both 'pid' and 'command'
are provided, 'command' is with higher priority - that logic is in
new_perf_sampling_handle().

>
> > +             PyErr_Format(PERF_ERROR, "PID or command must be specified");
> > +             return NULL;
> > +     }
> > +
> > +     if (command && py_args) {
> > +             if (!PyList_CheckExact(py_args)) {
> > +                     PyErr_SetString(PERF_ERROR, "Failed to parse argv list");
> > +                     return NULL;
> > +             }
> > +             argc = PyList_Size(py_args);
> > +             argv = calloc(argc + 1, sizeof(char *));
> > +             for (i = 0; i < argc; i++) {
> > +                     py_arg = PyList_GetItem(py_args, i);
> > +                     if (!PyUnicode_Check(py_arg))
> > +                             continue;
> > +                     argv[i] = strdup(PyUnicode_DATA(py_arg));
> > +                     if (!argv[i])
> > +                             return NULL;
> > +             }
> > +             argv[i] = NULL;
> This was allocated using calloc(). No need to set it to NULL.
>
> > +     }
> > +
> > +     perf = new_perf_sampling_handle(freq, pid, command, argv);
> > +
> > +     if (!perf) {
> > +             PyErr_SetString(PERF_ERROR, "Failed create new perf context");
> > +             return NULL;
> > +     }
> > +
> > +     py_perf = PyPerf_New(perf);
> > +
> > +     return py_perf;
> > +}
[ ... ]
> > +static int increase_file_limit(void)
> > +{
> > +     struct rlimit lim;
> > +
> > +     if (getrlimit(RLIMIT_NOFILE, &lim))
> > +             return -1;
> > +
> > +     if (lim.rlim_cur < lim.rlim_max) {
> > +             lim.rlim_cur = lim.rlim_max;
> > +     } else {
> > +             lim.rlim_cur += 100;
> > +             lim.rlim_max += 100;
>
> I wonder where this number 100 comes from?

It is just a safe step to increase the limit. Perf could use a lot of
file descriptors, on some systems the default limit is not enough.

>
> > +     }
> > +
> > +     return setrlimit(RLIMIT_NOFILE, &lim);
> > +}
> > +
> > +static int perf_maps_init(struct perf_handle *perf)
> > +{
> > +     int ret;
> > +
> > +     if (!perf->threads)
> > +             perf->threads = create_thread_map(perf);
> > +     if (!perf->threads)
> > +             return -1;
> > +
> > +     perf_evlist__set_maps(perf->evlist, perf->cpus, perf->threads);
> > +     do {
> > +             ret = perf_evlist__open(perf->evlist);
> > +             if (!ret)
> > +                     break;
> > +             if (ret != -EMFILE)
> > +                     goto out;
> > +             ret = increase_file_limit();
> > +             if (ret)
> > +                     goto out;
> > +     } while (ret);
> > +
>
> This loops seems over-complicated. Something like this reads easier (at least for me)
>
>         for(;;) {
>                 if (perf_evlist__open(perf->evlist) == 0)
>                         break;
>
>                 if (ret != -EMFILE)
>                         goto out;
>
>                 if (increase_file_limit() != 0)
>                         goto out;
>         }
>

The return code should be preserved, as in case of an error it is
returned. Also, the error for not enough file descriptors should be
handled.

> > +     ret = perf_evlist__mmap(perf->evlist, 4);
> > +out:
> > +     if (ret)
> > +             perf_evlist__close(perf->evlist);
> > +     return ret;
> > +}
> > +
[  ... ]
> > +static PyMethodDef PyPerfEventSampler_methods[] = {
> > +     {"id",
> > +      (PyCFunction) PyPerfSampler_id,
> > +      METH_NOARGS,
> > +      "get sample id"
> > +     },
> > +     {"ip",
> > +      (PyCFunction) PyPerfSampler_ip,
> > +      METH_NOARGS,
> > +      "get sample ip"
> > +     },
> > +     {"pid",
> > +      (PyCFunction) PyPerfSampler_pid,
> > +      METH_NOARGS,
> > +      "get sample pid"
> > +     },
> > +     {"tid",
> > +      (PyCFunction) PyPerfSampler_tid,
> > +      METH_NOARGS,
> > +      "get sample tid"
> > +     },
> > +     {"tid_comm",
> > +      (PyCFunction) PyPerfSampler_tid_comm,
> > +      METH_VARARGS | METH_KEYWORDS,
> > +      "get sample tid"
> > +     },
> > +     {"time",
> > +      (PyCFunction) PyPerfSampler_time,
> > +      METH_NOARGS,
> > +      "get sample timestamp"
> > +     },
> > +     {"cpu",
> > +      (PyCFunction) PyPerfSampler_cpu,
> > +      METH_NOARGS,
> > +      "get sample cpu"
> > +     },
> > +     {"stack_count",
> > +      (PyCFunction) PyPerfSampler_nr,
> > +      METH_NOARGS,
> > +      "get sample stack count"
> > +     },
> > +     {"stack",
> > +      (PyCFunction) PyPerfSampler_ips,
> > +      METH_NOARGS,
> > +      "get sample stack"
> > +     },
> > +     {NULL}
>
> So far I've been using as a convention that the 'C' function that implements a method of the Python module has a name
> that starts with the type of the module (or object from the module) as a prefix, followed by the name of the method itself.
>
> For example the name of the function that implements 'stack()' must be 'PyPerfEventSampler_stack()'.
>

Thanks for this clarification, I'll check the names of the methods.

> Thanks!
> Yordan
>

Thank you for that review! I'll address your comments in v2.

> > +};
> > +C_OBJECT_WRAPPER(perf_event_sample, PyPerfEventSampler, NO_DESTROY, py_perf_sample_free);
> > +
> > +static PyMethodDef perfpy_methods[] = {
> > +     {"sampler_instance",
> > +      (PyCFunction) PyPerfSampler_new,
> > +      METH_VARARGS | METH_KEYWORDS,
> > +      "Allocate new perf sampler instance"
> > +     },
> > +     {NULL}
> > +};
> > +
> > +static int perf_error_print(enum libperf_print_level level,
> > +                         const char *fmt, va_list ap)
> > +{
> > +     return vfprintf(stderr, fmt, ap);
> > +}
> > +
> > +static struct PyModuleDef perfpy_module = {
> > +     PyModuleDef_HEAD_INIT,
> > +     "perfpy",
> > +     "Python interface for Perf.",
> > +     -1,
> > +     perfpy_methods
> > +};
> > +
> > +PyMODINIT_FUNC PyInit_perfpy(void)
> > +{
> > +
> > +     if (!PyPerfTypeInit())
> > +             return NULL;
> > +     if (!PyPerfEventSamplerTypeInit())
> > +             return NULL;
> > +
> > +     PERF_ERROR = PyErr_NewException("tracecruncher.perfpy.perf_error",
> > +                                     NULL, NULL);
> > +
> > +     PyObject *module = PyModule_Create(&perfpy_module);
> > +
> > +     PyModule_AddObject(module, "perf_error", PERF_ERROR);
> > +     PyModule_AddObject(module, "perf_handle", (PyObject *) &PyPerfType);
> > +     PyModule_AddObject(module, "perf_event_sample", (PyObject *) &PyPerfEventSamplerType);
> > +
> > +     if (geteuid() != 0) {
> > +             PyErr_SetString(PERF_ERROR,
> > +                             "Permission denied. Root privileges are required.");
> > +             return NULL;
> > +     }
> > +
> > +     libperf_init(perf_error_print);
> > +
> > +     return module;
> > +}



-- 
Tzvetomir (Ceco) Stoyanov
VMware Open Source Technology Center

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-04-15 11:35 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-08 10:03 [RFC PATCH v2 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
2022-04-08 10:03 ` [RFC PATCH v2 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
2022-04-08 10:03 ` [RFC PATCH v2 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
2022-04-14 12:58   ` Yordan Karadzhov
2022-04-15 11:35     ` Tzvetomir Stoyanov
2022-04-08 10:03 ` [RFC PATCH v2 3/3] trace-cruncher: perf example Tzvetomir Stoyanov (VMware)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.