All of lore.kernel.org
 help / color / mirror / Atom feed
From: Beau Belgrave <beaub@linux.microsoft.com>
To: rostedt@goodmis.org
Cc: linux-trace-devel@vger.kernel.org, beaub@linux.microsoft.com
Subject: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
Date: Fri, 18 Feb 2022 14:50:56 -0800	[thread overview]
Message-ID: <20220218225058.12701-2-beaub@linux.microsoft.com> (raw)
In-Reply-To: <20220218225058.12701-1-beaub@linux.microsoft.com>

Adds the required APIs to libtracefs to create, manage and write out
data to trace events via the user_events kernel mechanism.

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 Makefile                 |   8 +
 include/tracefs-local.h  |  24 ++
 include/tracefs.h        |  60 +++++
 src/Makefile             |   4 +
 src/tracefs-userevents.c | 545 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 641 insertions(+)
 create mode 100644 src/tracefs-userevents.c

diff --git a/Makefile b/Makefile
index 544684c..a4598b4 100644
--- a/Makefile
+++ b/Makefile
@@ -154,6 +154,14 @@ CFLAGS ?= -g -Wall
 CPPFLAGS ?=
 LDFLAGS ?=
 
+USEREVENTS_INSTALLED := $(shell if (echo "$(pound)include <linux/user_events.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+export USEREVENTS_INSTALLED
+ifeq ($(USEREVENTS_INSTALLED), 1)
+CFLAGS += -DUSEREVENTS
+else
+$(warning user_events.h not installed, skipping)
+endif
+
 CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -x c - -lcunit -o /dev/null >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
 export CUNIT_INSTALLED
 
diff --git a/include/tracefs-local.h b/include/tracefs-local.h
index bf157e1..e768cba 100644
--- a/include/tracefs-local.h
+++ b/include/tracefs-local.h
@@ -119,4 +119,28 @@ int trace_rescan_events(struct tep_handle *tep,
 struct tep_event *get_tep_event(struct tep_handle *tep,
 				const char *system, const char *name);
 
+/* Internal interface for ftrace user events */
+
+struct tracefs_user_event_group;
+
+struct tracefs_user_event
+{
+	int write_index;
+	int status_index;
+	int iovecs;
+	int rels;
+	int len;
+	struct tracefs_user_event_group *group;
+	struct tracefs_user_event *next;
+};
+
+struct tracefs_user_event_group
+{
+	int fd;
+	int mmap_len;
+	char *mmap;
+	pthread_mutex_t lock;
+	struct tracefs_user_event *events;
+};
+
 #endif /* _TRACE_FS_LOCAL_H */
diff --git a/include/tracefs.h b/include/tracefs.h
index 1848ad0..7871dfe 100644
--- a/include/tracefs.h
+++ b/include/tracefs.h
@@ -571,4 +571,64 @@ struct tracefs_synth *tracefs_sql(struct tep_handle *tep, const char *name,
 struct tep_event *
 tracefs_synth_get_event(struct tep_handle *tep, struct tracefs_synth *synth);
 
+/* User events */
+enum tracefs_uevent_type {
+	TRACEFS_UEVENT_END,
+	TRACEFS_UEVENT_u8,
+	TRACEFS_UEVENT_s8,
+	TRACEFS_UEVENT_u16,
+	TRACEFS_UEVENT_s16,
+	TRACEFS_UEVENT_u32,
+	TRACEFS_UEVENT_s32,
+	TRACEFS_UEVENT_u64,
+	TRACEFS_UEVENT_s64,
+	TRACEFS_UEVENT_string,
+	TRACEFS_UEVENT_struct,
+	TRACEFS_UEVENT_varray,
+	TRACEFS_UEVENT_vstring,
+};
+
+enum tracefs_uevent_flags {
+	/* None */
+	TRACEFS_UEVENT_FLAG_NONE = 0,
+
+	/* When BPF is attached, use iterator/no copy */
+	TRACEFS_UEVENT_FLAG_bpf_iter = 1 << 0,
+};
+
+struct tracefs_uevent_item {
+	/* Type of item */
+	enum tracefs_uevent_type type;
+
+	/* Length of data, optional during register */
+	int len;
+
+	union {
+		/* Used during write */
+		const void *data;
+
+		/* Used during register */
+		const char *name;
+	};
+};
+
+struct tracefs_user_event;
+struct tracefs_user_event_group;
+
+struct tracefs_user_event_group *tracefs_user_event_group_create(void);
+
+void tracefs_user_event_group_close(struct tracefs_user_event_group *group);
+
+int tracefs_user_event_delete(const char *name);
+
+struct tracefs_user_event *
+tracefs_user_event_register(struct tracefs_user_event_group *group,
+			    const char *name, enum tracefs_uevent_flags flags,
+			    struct tracefs_uevent_item *items);
+
+bool tracefs_user_event_test(struct tracefs_user_event *event);
+
+int tracefs_user_event_write(struct tracefs_user_event *event,
+			     struct tracefs_uevent_item *items);
+
 #endif /* _TRACE_FS_H */
diff --git a/src/Makefile b/src/Makefile
index e8afab5..984e8cf 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -14,6 +14,10 @@ OBJS += tracefs-filter.o
 OBJS += tracefs-dynevents.o
 OBJS += tracefs-eprobes.o
 
+ifeq ($(USEREVENTS_INSTALLED), 1)
+OBJS += tracefs-userevents.o
+endif
+
 # Order matters for the the three below
 OBJS += sqlhist-lex.o
 OBJS += sqlhist.tab.o
diff --git a/src/tracefs-userevents.c b/src/tracefs-userevents.c
new file mode 100644
index 0000000..4d64fd8
--- /dev/null
+++ b/src/tracefs-userevents.c
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2022 Microsoft Corporation.
+ *
+ * Authors:
+ *   Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <alloca.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <linux/user_events.h>
+
+#include "tracefs.h"
+#include "tracefs-local.h"
+
+#define STAT_FILE "user_events_status"
+#define DATA_FILE "user_events_data"
+
+static void free_user_events(struct tracefs_user_event *event)
+{
+	struct tracefs_user_event *next;
+
+	while (event) {
+		next = event->next;
+		free(event);
+		event = next;
+	}
+}
+
+#define LEN_OR_ZERO (len ? len - pos : 0)
+static int append_field(struct tracefs_uevent_item *item, char *buf,
+			int len, int offset, int index)
+{
+	int pos = offset;
+
+	if (index != 0)
+		pos += snprintf(buf + pos, LEN_OR_ZERO, ";");
+
+	switch (item->type) {
+	case TRACEFS_UEVENT_u8:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u8 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s8:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s8 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_u16:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u16 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s16:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s16 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_u32:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u32 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s32:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s32 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_u64:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u64 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s64:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s64 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_string:
+		if (item->len <= 0) {
+			errno = EINVAL;
+			return -1;
+		}
+
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" char[%d] %s", item->len, item->name);
+		break;
+
+	case TRACEFS_UEVENT_struct:
+		/*
+		 * struct must have 2 strings, do simple check
+		 * in user, kernel will fully validate
+		 */
+		if (!strchr(item->name, ' ')) {
+			errno = EINVAL;
+			return -1;
+		}
+
+		if (item->len <= 0) {
+			errno = EINVAL;
+			return -1;
+		}
+
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" struct %s %d", item->name, item->len);
+		break;
+
+	case TRACEFS_UEVENT_varray:
+		/* Variable length array */
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" __rel_loc u8[] %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_vstring:
+		/* Variable length string */
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" __rel_loc char[] %s", item->name);
+		break;
+
+	default:
+		/* Unknown */
+		errno = ENOENT;
+		return -1;
+	}
+
+	return pos;
+}
+
+static int create_reg_cmd(const char *name, enum tracefs_uevent_flags flags,
+			  struct tracefs_uevent_item *items, char *buf, int len)
+{
+	int pos = 0;
+	int index = 0;
+
+	pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", name);
+
+	if (flags & TRACEFS_UEVENT_FLAG_bpf_iter)
+		pos += snprintf(buf + pos, LEN_OR_ZERO, ":BPF_ITER");
+
+	while (items->type != TRACEFS_UEVENT_END) {
+		pos = append_field(items, buf, len, pos, index++);
+
+		if (pos < 0)
+			return pos;
+
+		items++;
+	}
+
+	return pos + 1;
+}
+#undef LEN_OR_ZERO
+
+static int get_write_counts(struct tracefs_user_event *event,
+			    struct tracefs_uevent_item *item)
+{
+	event->rels = 0;
+	event->len = 0;
+
+	/* Start at 1, need iovec for write_index */
+	event->iovecs = 1;
+
+	while (item->type != TRACEFS_UEVENT_END) {
+		switch (item->type) {
+		case TRACEFS_UEVENT_u8:
+		case TRACEFS_UEVENT_s8:
+			event->len += sizeof(__u8);
+			break;
+
+		case TRACEFS_UEVENT_u16:
+		case TRACEFS_UEVENT_s16:
+			event->len += sizeof(__u16);
+			break;
+
+		case TRACEFS_UEVENT_u32:
+		case TRACEFS_UEVENT_s32:
+			event->len += sizeof(__u32);
+			break;
+
+		case TRACEFS_UEVENT_u64:
+		case TRACEFS_UEVENT_s64:
+			event->len += sizeof(__u64);
+			break;
+
+		case TRACEFS_UEVENT_string:
+		case TRACEFS_UEVENT_struct:
+			event->len += item->len;
+			break;
+
+		case TRACEFS_UEVENT_varray:
+		case TRACEFS_UEVENT_vstring:
+			/* Requires a rel loc entry */
+			event->len += sizeof(__u32);
+			event->rels++;
+			break;
+
+		default:
+			/* Unknown */
+			errno = ENOENT;
+			return -1;
+		}
+
+		event->iovecs++;
+		item++;
+	}
+
+	return 0;
+}
+
+/**
+ * tracefs_user_event_group_create - Create a new group to use for user events
+ *
+ * Returns a pointer to a group to use for user events. The pointer is valid until
+ * tracefs_user_event_group_close() is called. In case of an error NULL is
+ * returned.
+ */
+struct tracefs_user_event_group *tracefs_user_event_group_create(void)
+{
+	int stat, write, page_size, i;
+	struct tracefs_user_event_group *group;
+
+	stat = tracefs_instance_file_open(NULL, STAT_FILE, O_RDWR);
+
+	if (stat < 0)
+		return NULL;
+
+	write = tracefs_instance_file_open(NULL, DATA_FILE, O_RDWR);
+
+	if (write < 0)
+		goto put_stat;
+
+	group = malloc(sizeof(*group));
+
+	if (!group)
+		goto put_write;
+
+	if (pthread_mutex_init(&group->lock, NULL) < 0)
+		goto put_group;
+
+	/* Scale up to 16-bit max user events a page at a time */
+	page_size = sysconf(_SC_PAGESIZE);
+	group->mmap_len = page_size;
+
+	for (i = 0; i < 16; ++i) {
+		group->mmap = mmap(NULL, group->mmap_len,
+				   PROT_READ, MAP_SHARED, stat, 0);
+
+		if (group->mmap == MAP_FAILED && errno == EINVAL) {
+			/* Increase by page size and try again */
+			group->mmap_len += page_size;
+			continue;
+		}
+
+		break;
+	}
+
+	if (group->mmap == MAP_FAILED)
+		goto put_group;
+
+	group->fd = write;
+	group->events = NULL;
+
+	/* Status fd no longer needed */
+	close(stat);
+
+	return group;
+
+put_group:
+	free(group);
+put_write:
+	close(write);
+put_stat:
+	close(stat);
+
+	return NULL;
+}
+
+/**
+ * tracefs_user_event_delete - Deletes a user event from the system
+ * @name: Name of the event to delete
+ *
+ * Deletes the event from the system if it is not used.
+ */
+int tracefs_user_event_delete(const char *name)
+{
+	int ret, write;
+       
+	write = tracefs_instance_file_open(NULL, DATA_FILE, O_RDWR);
+
+	if (write < 0)
+		return write;
+
+	ret = ioctl(write, DIAG_IOCSDEL, name);
+
+	close(write);
+
+	return ret;
+}
+
+/**
+ * tracefs_user_event_group_close - Closes a group containing user events
+ * @group: Group to close
+ *
+ * Closes a group and all the user events within it. Any user event that has
+ * been added to the group is no longer valid and cannot be used.
+ */
+void tracefs_user_event_group_close(struct tracefs_user_event_group *group)
+{
+	if (!group)
+		return;
+
+	if (group->mmap != MAP_FAILED)
+		munmap(group->mmap, group->mmap_len);
+
+	if (group->fd != -1)
+		close(group->fd);
+
+	free_user_events(group->events);
+	free(group);
+}
+
+/**
+ * tracefs_user_event_register - Registers a user event with the system
+ * @group: Group to add the user event to
+ * @name: Name of the event to register
+ * @flags: Flags to use
+ * @items: Array of items that the event contains
+ *
+ * Allocates and registers a user event with the system. The user event will be
+ * added to the @group. The lifetime of the event is bound to the @group. When
+ * the @group is closed via tracefs_user_event_group_close() the event will no
+ * longer exist and should not be used.
+ *
+ * The @items are processed in order and the final item type must be set to
+ * TRACEFS_UEVENT_END to mark the last item. Each item must have the type
+ * and name defined. The string and struct type also require the len to be set
+ * for the item.
+ *
+ * Return a pointer to a user event on success, or NULL or error.
+ *
+ * errno will be set to EINVAL if @group is null or unexpected @items.
+ */
+struct tracefs_user_event *
+tracefs_user_event_register(struct tracefs_user_event_group *group,
+			    const char *name, enum tracefs_uevent_flags flags,
+			    struct tracefs_uevent_item *items)
+{
+	struct tracefs_user_event *event = NULL;
+	struct user_reg reg = {0};
+	char *cmd = NULL;
+	int len;
+
+	if (!group || !items) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	/* Determine length of cmd */
+	len = create_reg_cmd(name, flags, items, cmd, 0);
+
+	if (len < 0) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	/* Allocate and fill cmd */
+	cmd = malloc(len);
+
+	if (!cmd)
+		return NULL;
+
+	create_reg_cmd(name, flags, items, cmd, len);
+
+	event = malloc(sizeof(*event));
+
+	if (!event)
+		goto put_cmd;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)cmd;
+
+	/* Register event with kernel */
+	if (ioctl(group->fd, DIAG_IOCSREG, &reg) == -1)
+		goto put_event;
+
+	/* Sanity check bounds returned */
+	if (reg.status_index >= group->mmap_len) {
+		errno = EINVAL;
+		goto put_event;
+	}
+
+	if (get_write_counts(event, items))
+		goto put_event;
+
+	event->write_index = reg.write_index;
+	event->status_index = reg.status_index;
+	event->group = group;
+
+	/* Add event into the group under lock */
+	pthread_mutex_lock(&group->lock);
+	event->next = group->events;
+	group->events = event->next;
+	pthread_mutex_unlock(&group->lock);
+
+	free(cmd);
+
+	return event;
+put_event:
+	free(event);
+put_cmd:
+	free(cmd);
+
+	return NULL;
+}
+
+/**
+ * tracefs_user_event_test - Tests if an event is currently enabled
+ * @event: User event to test
+ *
+ * Tests if the @event is valid and currently enabled on the system.
+ *
+ * Return true if enabled, false otherwise.
+ */
+bool tracefs_user_event_test(struct tracefs_user_event *event)
+{
+	return event && event->group->mmap[event->status_index] != 0;
+}
+
+/**
+ * tracefs_user_event_write - Writes data out to an event
+ * @event: User event to write data about
+ * @items: Items to write for the event
+ *
+ * Writes out items for the event. Callers should check if the cost of writing
+ * should be performed by calling tracefs_user_event_test(). Items are checked
+ * to ensure they fit within the described items during register. Each item
+ * must specify the length of the item being written.
+ *
+ * Return the number of bytes written or -1 upon error.
+ *
+ * errno will be set to EINVAL if @event or @items is null or @items contains
+ * an item with a length of less than or equal to 0.
+ * errno will be set to E2BIG if @items contains more items than previously
+ * registered for the event.
+ */
+int tracefs_user_event_write(struct tracefs_user_event *event,
+			     struct tracefs_uevent_item *items)
+{
+	struct iovec *head, *io, *relio, *io_end;
+	__u32 *rel, *rel_end;
+	int len, rel_offset, data_offset, used;
+
+	if (!event || !items) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	head = io = alloca(sizeof(*io) * (event->iovecs + event->rels));
+	rel = alloca(sizeof(*rel) * event->rels);
+
+	io_end = head + (event->iovecs + event->rels);
+	rel_end = rel + event->rels;
+
+	/* Relative offset starts at end of static data */
+	relio = io + event->iovecs;
+	rel_offset = event->len;
+	data_offset = 0;
+
+	/* Write index must be first */
+	io->iov_base = &event->write_index;
+	io->iov_len = sizeof(event->write_index);
+	io++;
+	used = 1;
+
+	while (items->type != TRACEFS_UEVENT_END) {
+		len = items->len;
+
+		if (len <= 0)
+			goto bad_length;
+
+		if (io >= io_end)
+			goto bad_count;
+
+		switch (items->type) {
+		case TRACEFS_UEVENT_varray:
+		case TRACEFS_UEVENT_vstring:
+			/* Dual vectors */
+			used += 2;
+
+			if (rel >= rel_end || relio >= io_end)
+				goto bad_count;
+
+			/* __rel_loc types */
+			relio->iov_base = (void *)items->data;
+			relio->iov_len = len;
+			relio++;
+
+			io->iov_base = (void *)rel;
+			io->iov_len = sizeof(*rel);
+			io++;
+			rel_offset -= sizeof(*rel);
+
+			/* Fill in rel loc data */
+			*rel = DYN_LOC(rel_offset + data_offset, len);
+			data_offset += len;
+			rel++;
+
+			break;
+
+		default:
+			/* Single vector */
+			used++;
+
+			/* Direct types */
+			io->iov_base = (void *)items->data;
+			io->iov_len = len;
+			io++;
+			rel_offset -= len;
+
+			break;
+		}
+
+		items++;
+	}
+
+	return writev(event->group->fd, head, used);
+
+bad_length:
+	fprintf(stderr, "Bad user_event item length at index %d\n",
+		used - 1);
+	errno = EINVAL;
+	return -1;
+
+bad_count:
+	fprintf(stderr, "Too many user_event items passed\n");
+	errno = E2BIG;
+	return -1;
+}
-- 
2.17.1


  reply	other threads:[~2022-02-18 22:51 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-18 22:50 [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Beau Belgrave
2022-02-18 22:50 ` Beau Belgrave [this message]
2022-02-21 11:34   ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Yordan Karadzhov
2022-02-21 17:57     ` Beau Belgrave
2022-02-21 19:16       ` Steven Rostedt
2022-02-22  6:27         ` Yordan Karadzhov
2022-02-22 14:00           ` Steven Rostedt
2022-02-22 14:25             ` Yordan Karadzhov
2022-02-22 15:41               ` Steven Rostedt
2022-02-22 16:59             ` Beau Belgrave
2022-02-22 17:10               ` Steven Rostedt
2022-02-22 17:08   ` Steven Rostedt
2022-02-22 17:31   ` Steven Rostedt
2022-02-22 17:39     ` Steven Rostedt
2022-02-22 17:46     ` Beau Belgrave
2022-02-22 18:59       ` Steven Rostedt
2022-02-18 22:50 ` [PATCH v1 2/3] libtracefs: Add documentation and sample code for user_events Beau Belgrave
2022-02-18 22:50 ` [PATCH v1 3/3] libtracefs: Add unit tests " Beau Belgrave
2022-02-22 17:20   ` Steven Rostedt
2022-02-22 17:34 ` [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Steven Rostedt
2022-02-22 17:50   ` Beau Belgrave
2022-02-22 18:20     ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220218225058.12701-2-beaub@linux.microsoft.com \
    --to=beaub@linux.microsoft.com \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.