All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs
@ 2022-02-18 22:50 Beau Belgrave
  2022-02-18 22:50 ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Beau Belgrave
                   ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Beau Belgrave @ 2022-02-18 22:50 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel, beaub

New APIs added:
struct tracefs_user_event_group *tracefs_user_event_group_create(void);

void tracefs_user_event_group_close(struct tracefs_user_event_group *group);

int tracefs_user_event_delete(const char *name);

struct tracefs_user_event *
tracefs_user_event_register(struct tracefs_user_event_group *group,
                            const char *name, enum tracefs_uevent_flags flags,
                            struct tracefs_uevent_item *items);

bool tracefs_user_event_test(struct tracefs_user_event *event);

int tracefs_user_event_write(struct tracefs_user_event *event,
                             struct tracefs_uevent_item *items);

Documentation updates in this series describes the various APIs and the reason
for a group.

Items are described with a struct to better ensure contracts for things like
custom structs or static length strings. Items are allowed to be mixed/different
than what they were when registered. This allows callers the ability to describe
events verbosely, but write out via packed structs in their own code if needed.

Beau Belgrave (3):
  libtracefs: Add user_events to libtracefs sources
  libtracefs: Add documentation and sample code for user_events
  libtracefs: Add unit tests for user_events

 Documentation/libtracefs-userevents.txt | 258 +++++++++++
 Makefile                                |   8 +
 include/tracefs-local.h                 |  24 ++
 include/tracefs.h                       |  60 +++
 samples/Makefile                        |   4 +
 src/Makefile                            |   4 +
 src/tracefs-userevents.c                | 545 ++++++++++++++++++++++++
 utest/tracefs-utest.c                   | 233 ++++++++++
 8 files changed, 1136 insertions(+)
 create mode 100644 Documentation/libtracefs-userevents.txt
 create mode 100644 src/tracefs-userevents.c


base-commit: e579ba38ff6bc07cd2278faf9d3ac08c15d4e9e8
-- 
2.17.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-18 22:50 [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Beau Belgrave
@ 2022-02-18 22:50 ` Beau Belgrave
  2022-02-21 11:34   ` Yordan Karadzhov
                     ` (2 more replies)
  2022-02-18 22:50 ` [PATCH v1 2/3] libtracefs: Add documentation and sample code for user_events Beau Belgrave
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 22+ messages in thread
From: Beau Belgrave @ 2022-02-18 22:50 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel, beaub

Adds the required APIs to libtracefs to create, manage and write out
data to trace events via the user_events kernel mechanism.

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 Makefile                 |   8 +
 include/tracefs-local.h  |  24 ++
 include/tracefs.h        |  60 +++++
 src/Makefile             |   4 +
 src/tracefs-userevents.c | 545 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 641 insertions(+)
 create mode 100644 src/tracefs-userevents.c

diff --git a/Makefile b/Makefile
index 544684c..a4598b4 100644
--- a/Makefile
+++ b/Makefile
@@ -154,6 +154,14 @@ CFLAGS ?= -g -Wall
 CPPFLAGS ?=
 LDFLAGS ?=
 
+USEREVENTS_INSTALLED := $(shell if (echo "$(pound)include <linux/user_events.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+export USEREVENTS_INSTALLED
+ifeq ($(USEREVENTS_INSTALLED), 1)
+CFLAGS += -DUSEREVENTS
+else
+$(warning user_events.h not installed, skipping)
+endif
+
 CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -x c - -lcunit -o /dev/null >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
 export CUNIT_INSTALLED
 
diff --git a/include/tracefs-local.h b/include/tracefs-local.h
index bf157e1..e768cba 100644
--- a/include/tracefs-local.h
+++ b/include/tracefs-local.h
@@ -119,4 +119,28 @@ int trace_rescan_events(struct tep_handle *tep,
 struct tep_event *get_tep_event(struct tep_handle *tep,
 				const char *system, const char *name);
 
+/* Internal interface for ftrace user events */
+
+struct tracefs_user_event_group;
+
+struct tracefs_user_event
+{
+	int write_index;
+	int status_index;
+	int iovecs;
+	int rels;
+	int len;
+	struct tracefs_user_event_group *group;
+	struct tracefs_user_event *next;
+};
+
+struct tracefs_user_event_group
+{
+	int fd;
+	int mmap_len;
+	char *mmap;
+	pthread_mutex_t lock;
+	struct tracefs_user_event *events;
+};
+
 #endif /* _TRACE_FS_LOCAL_H */
diff --git a/include/tracefs.h b/include/tracefs.h
index 1848ad0..7871dfe 100644
--- a/include/tracefs.h
+++ b/include/tracefs.h
@@ -571,4 +571,64 @@ struct tracefs_synth *tracefs_sql(struct tep_handle *tep, const char *name,
 struct tep_event *
 tracefs_synth_get_event(struct tep_handle *tep, struct tracefs_synth *synth);
 
+/* User events */
+enum tracefs_uevent_type {
+	TRACEFS_UEVENT_END,
+	TRACEFS_UEVENT_u8,
+	TRACEFS_UEVENT_s8,
+	TRACEFS_UEVENT_u16,
+	TRACEFS_UEVENT_s16,
+	TRACEFS_UEVENT_u32,
+	TRACEFS_UEVENT_s32,
+	TRACEFS_UEVENT_u64,
+	TRACEFS_UEVENT_s64,
+	TRACEFS_UEVENT_string,
+	TRACEFS_UEVENT_struct,
+	TRACEFS_UEVENT_varray,
+	TRACEFS_UEVENT_vstring,
+};
+
+enum tracefs_uevent_flags {
+	/* None */
+	TRACEFS_UEVENT_FLAG_NONE = 0,
+
+	/* When BPF is attached, use iterator/no copy */
+	TRACEFS_UEVENT_FLAG_bpf_iter = 1 << 0,
+};
+
+struct tracefs_uevent_item {
+	/* Type of item */
+	enum tracefs_uevent_type type;
+
+	/* Length of data, optional during register */
+	int len;
+
+	union {
+		/* Used during write */
+		const void *data;
+
+		/* Used during register */
+		const char *name;
+	};
+};
+
+struct tracefs_user_event;
+struct tracefs_user_event_group;
+
+struct tracefs_user_event_group *tracefs_user_event_group_create(void);
+
+void tracefs_user_event_group_close(struct tracefs_user_event_group *group);
+
+int tracefs_user_event_delete(const char *name);
+
+struct tracefs_user_event *
+tracefs_user_event_register(struct tracefs_user_event_group *group,
+			    const char *name, enum tracefs_uevent_flags flags,
+			    struct tracefs_uevent_item *items);
+
+bool tracefs_user_event_test(struct tracefs_user_event *event);
+
+int tracefs_user_event_write(struct tracefs_user_event *event,
+			     struct tracefs_uevent_item *items);
+
 #endif /* _TRACE_FS_H */
diff --git a/src/Makefile b/src/Makefile
index e8afab5..984e8cf 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -14,6 +14,10 @@ OBJS += tracefs-filter.o
 OBJS += tracefs-dynevents.o
 OBJS += tracefs-eprobes.o
 
+ifeq ($(USEREVENTS_INSTALLED), 1)
+OBJS += tracefs-userevents.o
+endif
+
 # Order matters for the the three below
 OBJS += sqlhist-lex.o
 OBJS += sqlhist.tab.o
diff --git a/src/tracefs-userevents.c b/src/tracefs-userevents.c
new file mode 100644
index 0000000..4d64fd8
--- /dev/null
+++ b/src/tracefs-userevents.c
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2022 Microsoft Corporation.
+ *
+ * Authors:
+ *   Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <alloca.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <linux/user_events.h>
+
+#include "tracefs.h"
+#include "tracefs-local.h"
+
+#define STAT_FILE "user_events_status"
+#define DATA_FILE "user_events_data"
+
+static void free_user_events(struct tracefs_user_event *event)
+{
+	struct tracefs_user_event *next;
+
+	while (event) {
+		next = event->next;
+		free(event);
+		event = next;
+	}
+}
+
+#define LEN_OR_ZERO (len ? len - pos : 0)
+static int append_field(struct tracefs_uevent_item *item, char *buf,
+			int len, int offset, int index)
+{
+	int pos = offset;
+
+	if (index != 0)
+		pos += snprintf(buf + pos, LEN_OR_ZERO, ";");
+
+	switch (item->type) {
+	case TRACEFS_UEVENT_u8:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u8 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s8:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s8 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_u16:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u16 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s16:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s16 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_u32:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u32 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s32:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s32 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_u64:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" u64 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_s64:
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" s64 %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_string:
+		if (item->len <= 0) {
+			errno = EINVAL;
+			return -1;
+		}
+
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" char[%d] %s", item->len, item->name);
+		break;
+
+	case TRACEFS_UEVENT_struct:
+		/*
+		 * struct must have 2 strings, do simple check
+		 * in user, kernel will fully validate
+		 */
+		if (!strchr(item->name, ' ')) {
+			errno = EINVAL;
+			return -1;
+		}
+
+		if (item->len <= 0) {
+			errno = EINVAL;
+			return -1;
+		}
+
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" struct %s %d", item->name, item->len);
+		break;
+
+	case TRACEFS_UEVENT_varray:
+		/* Variable length array */
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" __rel_loc u8[] %s", item->name);
+		break;
+
+	case TRACEFS_UEVENT_vstring:
+		/* Variable length string */
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				" __rel_loc char[] %s", item->name);
+		break;
+
+	default:
+		/* Unknown */
+		errno = ENOENT;
+		return -1;
+	}
+
+	return pos;
+}
+
+static int create_reg_cmd(const char *name, enum tracefs_uevent_flags flags,
+			  struct tracefs_uevent_item *items, char *buf, int len)
+{
+	int pos = 0;
+	int index = 0;
+
+	pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", name);
+
+	if (flags & TRACEFS_UEVENT_FLAG_bpf_iter)
+		pos += snprintf(buf + pos, LEN_OR_ZERO, ":BPF_ITER");
+
+	while (items->type != TRACEFS_UEVENT_END) {
+		pos = append_field(items, buf, len, pos, index++);
+
+		if (pos < 0)
+			return pos;
+
+		items++;
+	}
+
+	return pos + 1;
+}
+#undef LEN_OR_ZERO
+
+static int get_write_counts(struct tracefs_user_event *event,
+			    struct tracefs_uevent_item *item)
+{
+	event->rels = 0;
+	event->len = 0;
+
+	/* Start at 1, need iovec for write_index */
+	event->iovecs = 1;
+
+	while (item->type != TRACEFS_UEVENT_END) {
+		switch (item->type) {
+		case TRACEFS_UEVENT_u8:
+		case TRACEFS_UEVENT_s8:
+			event->len += sizeof(__u8);
+			break;
+
+		case TRACEFS_UEVENT_u16:
+		case TRACEFS_UEVENT_s16:
+			event->len += sizeof(__u16);
+			break;
+
+		case TRACEFS_UEVENT_u32:
+		case TRACEFS_UEVENT_s32:
+			event->len += sizeof(__u32);
+			break;
+
+		case TRACEFS_UEVENT_u64:
+		case TRACEFS_UEVENT_s64:
+			event->len += sizeof(__u64);
+			break;
+
+		case TRACEFS_UEVENT_string:
+		case TRACEFS_UEVENT_struct:
+			event->len += item->len;
+			break;
+
+		case TRACEFS_UEVENT_varray:
+		case TRACEFS_UEVENT_vstring:
+			/* Requires a rel loc entry */
+			event->len += sizeof(__u32);
+			event->rels++;
+			break;
+
+		default:
+			/* Unknown */
+			errno = ENOENT;
+			return -1;
+		}
+
+		event->iovecs++;
+		item++;
+	}
+
+	return 0;
+}
+
+/**
+ * tracefs_user_event_group_create - Create a new group to use for user events
+ *
+ * Returns a pointer to a group to use for user events. The pointer is valid until
+ * tracefs_user_event_group_close() is called. In case of an error NULL is
+ * returned.
+ */
+struct tracefs_user_event_group *tracefs_user_event_group_create(void)
+{
+	int stat, write, page_size, i;
+	struct tracefs_user_event_group *group;
+
+	stat = tracefs_instance_file_open(NULL, STAT_FILE, O_RDWR);
+
+	if (stat < 0)
+		return NULL;
+
+	write = tracefs_instance_file_open(NULL, DATA_FILE, O_RDWR);
+
+	if (write < 0)
+		goto put_stat;
+
+	group = malloc(sizeof(*group));
+
+	if (!group)
+		goto put_write;
+
+	if (pthread_mutex_init(&group->lock, NULL) < 0)
+		goto put_group;
+
+	/* Scale up to 16-bit max user events a page at a time */
+	page_size = sysconf(_SC_PAGESIZE);
+	group->mmap_len = page_size;
+
+	for (i = 0; i < 16; ++i) {
+		group->mmap = mmap(NULL, group->mmap_len,
+				   PROT_READ, MAP_SHARED, stat, 0);
+
+		if (group->mmap == MAP_FAILED && errno == EINVAL) {
+			/* Increase by page size and try again */
+			group->mmap_len += page_size;
+			continue;
+		}
+
+		break;
+	}
+
+	if (group->mmap == MAP_FAILED)
+		goto put_group;
+
+	group->fd = write;
+	group->events = NULL;
+
+	/* Status fd no longer needed */
+	close(stat);
+
+	return group;
+
+put_group:
+	free(group);
+put_write:
+	close(write);
+put_stat:
+	close(stat);
+
+	return NULL;
+}
+
+/**
+ * tracefs_user_event_delete - Deletes a user event from the system
+ * @name: Name of the event to delete
+ *
+ * Deletes the event from the system if it is not used.
+ */
+int tracefs_user_event_delete(const char *name)
+{
+	int ret, write;
+       
+	write = tracefs_instance_file_open(NULL, DATA_FILE, O_RDWR);
+
+	if (write < 0)
+		return write;
+
+	ret = ioctl(write, DIAG_IOCSDEL, name);
+
+	close(write);
+
+	return ret;
+}
+
+/**
+ * tracefs_user_event_group_close - Closes a group containing user events
+ * @group: Group to close
+ *
+ * Closes a group and all the user events within it. Any user event that has
+ * been added to the group is no longer valid and cannot be used.
+ */
+void tracefs_user_event_group_close(struct tracefs_user_event_group *group)
+{
+	if (!group)
+		return;
+
+	if (group->mmap != MAP_FAILED)
+		munmap(group->mmap, group->mmap_len);
+
+	if (group->fd != -1)
+		close(group->fd);
+
+	free_user_events(group->events);
+	free(group);
+}
+
+/**
+ * tracefs_user_event_register - Registers a user event with the system
+ * @group: Group to add the user event to
+ * @name: Name of the event to register
+ * @flags: Flags to use
+ * @items: Array of items that the event contains
+ *
+ * Allocates and registers a user event with the system. The user event will be
+ * added to the @group. The lifetime of the event is bound to the @group. When
+ * the @group is closed via tracefs_user_event_group_close() the event will no
+ * longer exist and should not be used.
+ *
+ * The @items are processed in order and the final item type must be set to
+ * TRACEFS_UEVENT_END to mark the last item. Each item must have the type
+ * and name defined. The string and struct type also require the len to be set
+ * for the item.
+ *
+ * Return a pointer to a user event on success, or NULL or error.
+ *
+ * errno will be set to EINVAL if @group is null or unexpected @items.
+ */
+struct tracefs_user_event *
+tracefs_user_event_register(struct tracefs_user_event_group *group,
+			    const char *name, enum tracefs_uevent_flags flags,
+			    struct tracefs_uevent_item *items)
+{
+	struct tracefs_user_event *event = NULL;
+	struct user_reg reg = {0};
+	char *cmd = NULL;
+	int len;
+
+	if (!group || !items) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	/* Determine length of cmd */
+	len = create_reg_cmd(name, flags, items, cmd, 0);
+
+	if (len < 0) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	/* Allocate and fill cmd */
+	cmd = malloc(len);
+
+	if (!cmd)
+		return NULL;
+
+	create_reg_cmd(name, flags, items, cmd, len);
+
+	event = malloc(sizeof(*event));
+
+	if (!event)
+		goto put_cmd;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)cmd;
+
+	/* Register event with kernel */
+	if (ioctl(group->fd, DIAG_IOCSREG, &reg) == -1)
+		goto put_event;
+
+	/* Sanity check bounds returned */
+	if (reg.status_index >= group->mmap_len) {
+		errno = EINVAL;
+		goto put_event;
+	}
+
+	if (get_write_counts(event, items))
+		goto put_event;
+
+	event->write_index = reg.write_index;
+	event->status_index = reg.status_index;
+	event->group = group;
+
+	/* Add event into the group under lock */
+	pthread_mutex_lock(&group->lock);
+	event->next = group->events;
+	group->events = event->next;
+	pthread_mutex_unlock(&group->lock);
+
+	free(cmd);
+
+	return event;
+put_event:
+	free(event);
+put_cmd:
+	free(cmd);
+
+	return NULL;
+}
+
+/**
+ * tracefs_user_event_test - Tests if an event is currently enabled
+ * @event: User event to test
+ *
+ * Tests if the @event is valid and currently enabled on the system.
+ *
+ * Return true if enabled, false otherwise.
+ */
+bool tracefs_user_event_test(struct tracefs_user_event *event)
+{
+	return event && event->group->mmap[event->status_index] != 0;
+}
+
+/**
+ * tracefs_user_event_write - Writes data out to an event
+ * @event: User event to write data about
+ * @items: Items to write for the event
+ *
+ * Writes out items for the event. Callers should check if the cost of writing
+ * should be performed by calling tracefs_user_event_test(). Items are checked
+ * to ensure they fit within the described items during register. Each item
+ * must specify the length of the item being written.
+ *
+ * Return the number of bytes written or -1 upon error.
+ *
+ * errno will be set to EINVAL if @event or @items is null or @items contains
+ * an item with a length of less than or equal to 0.
+ * errno will be set to E2BIG if @items contains more items than previously
+ * registered for the event.
+ */
+int tracefs_user_event_write(struct tracefs_user_event *event,
+			     struct tracefs_uevent_item *items)
+{
+	struct iovec *head, *io, *relio, *io_end;
+	__u32 *rel, *rel_end;
+	int len, rel_offset, data_offset, used;
+
+	if (!event || !items) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	head = io = alloca(sizeof(*io) * (event->iovecs + event->rels));
+	rel = alloca(sizeof(*rel) * event->rels);
+
+	io_end = head + (event->iovecs + event->rels);
+	rel_end = rel + event->rels;
+
+	/* Relative offset starts at end of static data */
+	relio = io + event->iovecs;
+	rel_offset = event->len;
+	data_offset = 0;
+
+	/* Write index must be first */
+	io->iov_base = &event->write_index;
+	io->iov_len = sizeof(event->write_index);
+	io++;
+	used = 1;
+
+	while (items->type != TRACEFS_UEVENT_END) {
+		len = items->len;
+
+		if (len <= 0)
+			goto bad_length;
+
+		if (io >= io_end)
+			goto bad_count;
+
+		switch (items->type) {
+		case TRACEFS_UEVENT_varray:
+		case TRACEFS_UEVENT_vstring:
+			/* Dual vectors */
+			used += 2;
+
+			if (rel >= rel_end || relio >= io_end)
+				goto bad_count;
+
+			/* __rel_loc types */
+			relio->iov_base = (void *)items->data;
+			relio->iov_len = len;
+			relio++;
+
+			io->iov_base = (void *)rel;
+			io->iov_len = sizeof(*rel);
+			io++;
+			rel_offset -= sizeof(*rel);
+
+			/* Fill in rel loc data */
+			*rel = DYN_LOC(rel_offset + data_offset, len);
+			data_offset += len;
+			rel++;
+
+			break;
+
+		default:
+			/* Single vector */
+			used++;
+
+			/* Direct types */
+			io->iov_base = (void *)items->data;
+			io->iov_len = len;
+			io++;
+			rel_offset -= len;
+
+			break;
+		}
+
+		items++;
+	}
+
+	return writev(event->group->fd, head, used);
+
+bad_length:
+	fprintf(stderr, "Bad user_event item length at index %d\n",
+		used - 1);
+	errno = EINVAL;
+	return -1;
+
+bad_count:
+	fprintf(stderr, "Too many user_event items passed\n");
+	errno = E2BIG;
+	return -1;
+}
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v1 2/3] libtracefs: Add documentation and sample code for user_events
  2022-02-18 22:50 [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Beau Belgrave
  2022-02-18 22:50 ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Beau Belgrave
@ 2022-02-18 22:50 ` Beau Belgrave
  2022-02-18 22:50 ` [PATCH v1 3/3] libtracefs: Add unit tests " Beau Belgrave
  2022-02-22 17:34 ` [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Steven Rostedt
  3 siblings, 0 replies; 22+ messages in thread
From: Beau Belgrave @ 2022-02-18 22:50 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel, beaub

Adds the initial documentation file for user_event APIs within
libtracefs. Adds sample code on how to use these new APIs.

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 Documentation/libtracefs-userevents.txt | 258 ++++++++++++++++++++++++
 samples/Makefile                        |   4 +
 2 files changed, 262 insertions(+)
 create mode 100644 Documentation/libtracefs-userevents.txt

diff --git a/Documentation/libtracefs-userevents.txt b/Documentation/libtracefs-userevents.txt
new file mode 100644
index 0000000..3d43ed7
--- /dev/null
+++ b/Documentation/libtracefs-userevents.txt
@@ -0,0 +1,258 @@
+libtracefs(3)
+=============
+
+NAME
+----
+tracefs_user_event_group_create, tracefs_user_event_group_close, tracefs_user_event_register,
+tracefs_user_event_test, tracefs_user_event_write, tracefs_user_event_delete,
+- Creation and management of a user event group, event, and data
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <tracefs.h>*
+
+struct tracefs_user_event_group pass:[*]*tracefs_user_event_group_create*(void);
+void *tracefs_user_event_group_close*(struct tracefs_user_event_group pass:[*]_group_);
+int *tracefs_user_event_delete*(const char pass:[*]_name_);
+struct tracefs_user_event pass:[*]*tracefs_user_event_register*(struct tracefs_user_event_group pass:[*]_group_,
+				const char pass:[*]_name_,
+				enum tracefs_uevent_flags _flags_,
+				struct tracefs_uevent_item pass:[*]_items_);
+bool *tracefs_user_event_test*(struct tracefs_user_event pass:[*]_event_);
+int *tracefs_user_event_write*(struct tracefs_user_event pass:[*]_event_,
+				struct tracefs_uevent_item pass:[*]_items_);
+--
+
+DESCRIPTION
+-----------
+User events allow user applications to create trace events from their process.
+The status of each event that is created is exposed to the application, which
+enables quick checking if the event is being traced out or not. This enables
+a way to always have the events on with minimal impact to the performance of
+the process. The *tracefs_user_event_test*() function should be used for this.
+When the event is to be traced out with data the *tracefs_user_event_write*()
+function is to be used. Processes should delay any work related to the write
+calls until after *tracefs_user_event_test*() returns true. This ensures minimal
+performance impacts until tracing is enabled on the system for each event.
+
+To reduce the number of resources, and to manage lifetimes of the events, the
+concept of a group exists within this implementation. Groups keep track of all
+the events that have been registered within it and ensure that once the group
+is closed that all the events are cleaned up properly. All events in a group
+share system resources used for writing. Groups and events are thread safe and do
+not require additional locking when operating in a multi-threaded process.
+Once a group has been closed the events within it are no longer valid to use.
+When groups are closed their resources go away, callers need to ensure that a
+group is closed only once it's no longer needed by any of the events.
+
+*tracefs_user_event_group_create*() allocates and initializes a group to be used.
+Group creation can fail if user_events are not installed, the user doesn't have
+permissions to access tracefs or memory allocation errors. The returned group is
+then used to register events.
+
+*tracefs_user_event_group_close*() closes a group and any events that have been
+registered within the group. Events within the group are no longer valid.
+
+*tracefs_user_event_register*() registers an event within the group and the system.
+The name and items passed are translated to show up in tracefs format file.
+The last item passed to this function must have the type of *TRACEFS_UEVENT_END*.
+While events do not require any items, it's expected that each item that will
+be written out should have a matching item during registration.
+
+Flags passed during registration affect the behavior of the event:
+
+*TRACEFS_UEVENT_FLAG_NONE* - No affect
+
+*TRACEFS_UEVENT_FLAG_bpf_iter* - Force BPF programs to receive iovecs instead
+of a copy of the data. Enables higher performance under some conditions for
+events that plan to use BPF programs and require very little overhead when writing.
+
+Types of items passed during the registration:
+
+*TRACEFS_UEVENT_END* - Indicates the last item.
+
+*TRACEFS_UEVENT_u8* - u8 item.
+
+*TRACEFS_UEVENT_s8* - s8 item.
+
+*TRACEFS_UEVENT_u16* - u16 item.
+
+*TRACEFS_UEVENT_s16* - s16 item.
+
+*TRACEFS_UEVENT_u32* - u32 item.
+
+*TRACEFS_UEVENT_s32* - s32 item.
+
+*TRACEFS_UEVENT_u64* - u64 item.
+
+*TRACEFS_UEVENT_s64* - s64 item.
+
+*TRACEFS_UEVENT_string* - Specific length array of char data.
+
+*TRACEFS_UEVENT_struct* - Specific sized struct by name and type (When used the
+name of the item must include both the type and name of the struct).
+
+*TRACEFS_UEVENT_varray* - Variable length array of u8 data.
+
+*TRACEFS_UEVENT_vstring* - Variable length array of char data.
+
+Each item has a type, length, and name during registration. When items are being
+used for writing each item has a type, length, and the data to write.
+
+When items are being registered, length is required for theses types:
+
+*TRACEFS_UEVENT_string*
+
+*TRACEFS_UEVENT_struct*
+
+If lengths are not set for the above types the register will fail. All other
+types have the length determined automatically.
+
+When items are being written, all items must indicate their length. If lengths
+are not set the write will fail. Automatic length sets are not performed.
+
+*tracefs_user_event_test*() checks if an event is being traced at that moment.
+This check is quick and should always be used before writing or calculating
+data required for writing out the event data.
+
+*tracefs_user_event_write*() writes data out for the event. This causes an entry
+to be made in the tracing system that has been attached (perf, ftrace, BPF, etc.).
+
+All items passed must set their length, even for common data types (u8, u16, etc.).
+If required, callers may mix types from what has been registered as long as it
+fits within the allocated resources. A common scenario for this is having an
+internal struct that is packed and contains several consecutive registered types.
+Passing the struct directly instead of each individual item might be beneficial.
+While mixing types is allowed, it's a bug to pass more items than have been
+registered. If this occurs the write will fail.
+
+When the type *TRACEFS_UEVENT_vstring* is being used, the length of the data must
+include the null character of a string. If the length does not include the null
+character the write will fail.
+
+*tracefs_user_event_delete*() deletes the event from the system. Delete only works
+if the event is not being used by anything, include the calling program.
+
+RETURN VALUE
+------------
+*tracefs_user_event_group_create*() returns an allocated struct tracefs_user_event_group
+on success or NULL on error.
+
+*tracefs_user_event_register*() returns an allocated struct tracefs_user_event
+on success or NULL on error.
+
+*tracefs_user_event_test*() returns true if the event is currently being traced.
+
+*tracefs_user_event_write*() reuturns the number of bytes written out on success
+or -1 on error.
+
+*tracefs_user_event_delete*() returns 0 on success or -1 on error.
+
+ERRORS
+------
+The following errors are for all the above calls:
+
+*EPERM* Not run as root user when required.
+
+*EINVAL* Either a parameter is not valid (NULL when it should not be)
+  or a item that is missing an attribute.
+
+*E2BIG* Too many items passed.
+
+*ENOMEM* not enough memory is available.
+
+And more errors may have happened from the system calls to the system.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <stdlib.h>
+#include <tracefs.h>
+
+static struct tracefs_user_event_group *group;
+static struct tracefs_user_event *event;
+
+static void make_event(void)
+{
+	struct tracefs_uevent_item items[] = {
+		{ TRACEFS_UEVENT_vstring, .name = "message" },
+		{ TRACEFS_UEVENT_END },
+	};
+
+	group = tracefs_user_event_group_create();
+
+	event = tracefs_user_event_register(group, "example",
+					    TRACEFS_UEVENT_FLAG_NONE, items);
+}
+
+int main (int argc, char **argv)
+{
+	make_event();
+
+	if (argc != 2) {
+		printf("Usage: %s <message>\n", argv[0]);
+		return 1;
+	}
+
+	if (tracefs_user_event_test(event)) {
+		const char *msg = argv[1];
+
+		struct tracefs_uevent_item items[] = {
+			{ TRACEFS_UEVENT_vstring, strlen(msg)+1, .data = msg },
+			{ TRACEFS_UEVENT_END },
+		};
+
+		tracefs_user_event_write(event, items);
+
+		printf("Event enabled, wrote '%s'\n", msg);
+	} else {
+		printf("Event user_events/example not enabled, enable via tracefs\n");
+	}
+
+	tracefs_user_event_group_close(group);
+
+	return 0;
+}
+--
+
+FILES
+-----
+[verse]
+--
+*tracefs.h*
+	Header file to include in order to have access to the library APIs.
+*-ltracefs*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+*libtracefs*(3),
+*libtraceevent*(3),
+*trace-cmd*(1)
+
+AUTHOR
+------
+[verse]
+--
+*Beau Belgrave* <beaub@linux.microsoft.com>
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracefs is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/
+
+COPYING
+-------
+Copyright \(C) 2022 Microsoft Corporation Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/samples/Makefile b/samples/Makefile
index f03aff6..169c106 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -21,6 +21,10 @@ EXAMPLES += tracer
 EXAMPLES += stream
 EXAMPLES += instances-affinity
 
+ifeq ($(USEREVENTS_INSTALLED), 1)
+EXAMPLES += userevents
+endif
+
 TARGETS :=
 TARGETS += sqlhist
 TARGETS += $(EXAMPLES)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v1 3/3] libtracefs: Add unit tests for user_events
  2022-02-18 22:50 [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Beau Belgrave
  2022-02-18 22:50 ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Beau Belgrave
  2022-02-18 22:50 ` [PATCH v1 2/3] libtracefs: Add documentation and sample code for user_events Beau Belgrave
@ 2022-02-18 22:50 ` Beau Belgrave
  2022-02-22 17:20   ` Steven Rostedt
  2022-02-22 17:34 ` [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Steven Rostedt
  3 siblings, 1 reply; 22+ messages in thread
From: Beau Belgrave @ 2022-02-18 22:50 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel, beaub

Adds unit tests for user_events when available. Ensures APIs are working
correctly and appropriate errors are being returned.

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 utest/tracefs-utest.c | 233 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)

diff --git a/utest/tracefs-utest.c b/utest/tracefs-utest.c
index e8d5c69..0c4fd1e 100644
--- a/utest/tracefs-utest.c
+++ b/utest/tracefs-utest.c
@@ -11,6 +11,7 @@
 #include <time.h>
 #include <dirent.h>
 #include <ftw.h>
+#include <linux/types.h>
 
 #include <CUnit/CUnit.h>
 #include <CUnit/Basic.h>
@@ -871,6 +872,235 @@ static void test_eprobes(void)
 	test_eprobes_instance(test_instance);
 }
 
+#ifdef USEREVENTS
+struct user_test_context {
+	int seen;
+	int failed;
+};
+static int user_callback(struct tep_event *event, struct tep_record *record,
+			  int cpu, void *context)
+{
+	struct tep_format_field *field;
+	struct user_test_context *user_context;
+	__u32 *rel, size, offset;
+       
+	user_context = (struct user_test_context *)context;
+	user_context->seen++;
+
+	field = tep_find_field(event, "u8");
+	if (!field || *(__u8 *)(record->data + field->offset) != 1)
+	{
+		user_context->failed = 1;
+		return -1;
+	}
+
+	field = tep_find_field(event, "s8");
+	if (!field || *(__s8 *)(record->data + field->offset) != 2)
+	{
+		user_context->failed = 2;
+		return -1;
+	}
+
+	field = tep_find_field(event, "u16");
+	if (!field || *(__u16 *)(record->data + field->offset) != 3)
+	{
+		user_context->failed = 3;
+		return -1;
+	}
+
+	field = tep_find_field(event, "s16");
+	if (!field || *(__s16 *)(record->data + field->offset) != 4)
+	{
+		user_context->failed = 4;
+		return -1;
+	}
+
+	field = tep_find_field(event, "u32");
+	if (!field || *(__u32 *)(record->data + field->offset) != 5)
+	{
+		user_context->failed = 5;
+		return -1;
+	}
+
+	field = tep_find_field(event, "s32");
+	if (!field || *(__s32 *)(record->data + field->offset) != 6)
+	{
+		user_context->failed = 6;
+		return -1;
+	}
+
+	field = tep_find_field(event, "u64");
+	if (!field || *(__u64 *)(record->data + field->offset) != 7)
+	{
+		user_context->failed = 7;
+		return -1;
+	}
+
+	field = tep_find_field(event, "s64");
+	if (!field || *(__s64 *)(record->data + field->offset) != 8)
+	{
+		user_context->failed = 8;
+		return -1;
+	}
+
+	field = tep_find_field(event, "string");
+	if (!field || memcmp(record->data + field->offset, "12345678", 8))
+	{
+		user_context->failed = 9;
+		return -1;
+	}
+
+	field = tep_find_field(event, "struct");
+	if (!field || *(__u64 *)(record->data + field->offset) != 9)
+	{
+		user_context->failed = 10;
+		return -1;
+	}
+
+	field = tep_find_field(event, "varray");
+	if (!field) {
+		user_context->failed = 11;
+		return -1;
+	}
+
+	rel = (__u32 *)(record->data + field->offset);
+	offset = *rel & 0xffff;
+	size = *rel >> 16;
+	rel++;
+
+	if (memcmp((void *)(rel) + offset, "Array", size)) {
+		user_context->failed = 12;
+		return -1;
+	}
+
+	field = tep_find_field(event, "vstring");
+	if (!field) {
+		user_context->failed = 13;
+		return -1;
+	}
+
+	rel = (__u32 *)(record->data + field->offset);
+	offset = *rel & 0xffff;
+	size = *rel >> 16;
+	rel++;
+
+	if (memcmp((void *)(rel) + offset, "Variable", size)) {
+		user_context->failed = 14;
+		return -1;
+	}
+
+	return 0;
+}
+
+static void test_userevents_instance(struct tracefs_instance *instance)
+{
+	struct tracefs_user_event_group *group;
+	struct tracefs_user_event *event;
+	struct tep_handle *user_tep;
+	enum tracefs_uevent_flags flags = TRACEFS_UEVENT_FLAG_NONE;
+	const char *systems[] = { "user_events", NULL };
+	const char *name = "libtracefs_utest";
+	const char *system = "user_events";
+	const char *test_string = "12345678";
+	const char *test_array = "Array";
+	const char *test_vstring = "Variable";
+	__u8 a = 1;
+	__s8 b = 2;
+	__u16 c = 3;
+	__s16 d = 4;
+	__u32 e = 5;
+	__s32 f = 6;
+	__u64 g = 7;
+	__s64 h = 8;
+	__u64 i = 9;
+	struct tracefs_uevent_item all_items[] = {
+		{ TRACEFS_UEVENT_u8, .name = "u8" },
+		{ TRACEFS_UEVENT_s8, .name = "s8" },
+		{ TRACEFS_UEVENT_u16, .name = "u16" },
+		{ TRACEFS_UEVENT_s16, .name = "s16" },
+		{ TRACEFS_UEVENT_u32, .name = "u32" },
+		{ TRACEFS_UEVENT_s32, .name = "s32" },
+		{ TRACEFS_UEVENT_u64, .name = "u64" },
+		{ TRACEFS_UEVENT_s64, .name = "s64" },
+		{ TRACEFS_UEVENT_string, .name = "string", .len = 8 },
+		{ TRACEFS_UEVENT_struct, .name = "test struct", .len = 8 },
+		{ TRACEFS_UEVENT_varray, .name = "varray" },
+		{ TRACEFS_UEVENT_vstring, .name = "vstring" },
+		{ TRACEFS_UEVENT_END },
+	};
+	struct tracefs_uevent_item write_items[] = {
+		{ TRACEFS_UEVENT_u8, .data = &a, .len = sizeof(a) },
+		{ TRACEFS_UEVENT_s8, .data = &b, .len = sizeof(b) },
+		{ TRACEFS_UEVENT_u16, .data = &c, .len = sizeof(c) },
+		{ TRACEFS_UEVENT_s16, .data = &d, .len = sizeof(d) },
+		{ TRACEFS_UEVENT_u32, .data = &e, .len = sizeof(e) },
+		{ TRACEFS_UEVENT_s32, .data = &f, .len = sizeof(f) },
+		{ TRACEFS_UEVENT_u64, .data = &g, .len = sizeof(g) },
+		{ TRACEFS_UEVENT_s64, .data = &h, .len = sizeof(h) },
+		{ TRACEFS_UEVENT_string, .data = test_string,
+					 .len = strlen(test_string) },
+		{ TRACEFS_UEVENT_struct, .data = &i, .len = sizeof(i) },
+		{ TRACEFS_UEVENT_varray, .data = test_array,
+					 .len = strlen(test_array) },
+		{ TRACEFS_UEVENT_vstring, .data = test_vstring,
+					  .len = strlen(test_vstring)+1 },
+		{ TRACEFS_UEVENT_END },
+	};
+	struct user_test_context context;
+	int ret;
+
+	/* Delete if it already exists */
+	tracefs_user_event_delete(name);
+
+	group = tracefs_user_event_group_create();
+	CU_TEST(group != NULL);
+
+	event = tracefs_user_event_register(group, name, flags, all_items);
+	CU_TEST(event != NULL);
+
+	/* Test enable and status */
+	CU_TEST(!tracefs_user_event_test(event));
+	CU_TEST(tracefs_event_enable(instance, system, name) == 0);
+	CU_TEST(tracefs_user_event_test(event));
+
+	/* Correct write should work */
+	CU_TEST(tracefs_user_event_write(event, write_items) > 0);
+
+	/* Ensure write output correctly */
+	user_tep = tracefs_local_events_system(NULL, systems);
+	CU_TEST(user_tep != NULL);
+
+	memset(&context, 0, sizeof(context));
+	ret = tracefs_iterate_raw_events(user_tep, instance, NULL, 0,
+					 user_callback, &context);
+	tep_free(user_tep);
+
+	CU_TEST(ret == 0);
+	CU_TEST(context.seen == 1);
+	CU_TEST(context.failed == 0);
+
+	/* Simulate bad length */
+	write_items[0].len = 0;
+	CU_TEST(tracefs_user_event_write(event, write_items) == -1);
+
+	/* Simulate bad pointer */
+	write_items[0].len = sizeof(a);
+	write_items[0].data = NULL;
+	CU_TEST(tracefs_user_event_write(event, write_items) == -1);
+
+	tracefs_user_event_group_close(group);
+
+	/* Disable and deletion must work */
+	CU_TEST(tracefs_event_disable(instance, system, name) == 0);
+	CU_TEST(tracefs_user_event_delete(name) == 0);
+}
+
+static void test_userevents(void)
+{
+	test_userevents_instance(test_instance);
+}
+#endif
+
 static void test_instance_file(void)
 {
 	struct tracefs_instance *instance = NULL;
@@ -1706,4 +1936,7 @@ void test_tracefs_lib(void)
 	CU_add_test(suite, "kprobes", test_kprobes);
 	CU_add_test(suite, "syntetic events", test_synthetic);
 	CU_add_test(suite, "eprobes", test_eprobes);
+#ifdef USEREVENTS
+	CU_add_test(suite, "user events", test_userevents);
+#endif
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-18 22:50 ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Beau Belgrave
@ 2022-02-21 11:34   ` Yordan Karadzhov
  2022-02-21 17:57     ` Beau Belgrave
  2022-02-22 17:08   ` Steven Rostedt
  2022-02-22 17:31   ` Steven Rostedt
  2 siblings, 1 reply; 22+ messages in thread
From: Yordan Karadzhov @ 2022-02-21 11:34 UTC (permalink / raw)
  To: Beau Belgrave, rostedt; +Cc: linux-trace-devel



On 19.02.22 г. 0:50 ч., Beau Belgrave wrote:
> diff --git a/include/tracefs-local.h b/include/tracefs-local.h
> index bf157e1..e768cba 100644
> --- a/include/tracefs-local.h
> +++ b/include/tracefs-local.h
> @@ -119,4 +119,28 @@ int trace_rescan_events(struct tep_handle *tep,
>   struct tep_event *get_tep_event(struct tep_handle *tep,
>   				const char *system, const char *name);
>   
> +/* Internal interface for ftrace user events */
> +
> +struct tracefs_user_event_group;
> +
> +struct tracefs_user_event
> +{
> +	int write_index;
> +	int status_index;
> +	int iovecs;
> +	int rels;
> +	int len;
> +	struct tracefs_user_event_group *group;
> +	struct tracefs_user_event *next;
> +};
> +
> +struct tracefs_user_event_group
> +{
> +	int fd;
> +	int mmap_len;
> +	char *mmap;
> +	pthread_mutex_t lock;
> +	struct tracefs_user_event *events;
> +};
> +
>   #endif /* _TRACE_FS_LOCAL_H */
> diff --git a/include/tracefs.h b/include/tracefs.h
> index 1848ad0..7871dfe 100644
> --- a/include/tracefs.h
> +++ b/include/tracefs.h
> @@ -571,4 +571,64 @@ struct tracefs_synth *tracefs_sql(struct tep_handle *tep, const char *name,
>   struct tep_event *
>   tracefs_synth_get_event(struct tep_handle *tep, struct tracefs_synth *synth);
>   
> +/* User events */
> +enum tracefs_uevent_type {
> +	TRACEFS_UEVENT_END,
> +	TRACEFS_UEVENT_u8,
> +	TRACEFS_UEVENT_s8,
> +	TRACEFS_UEVENT_u16,
> +	TRACEFS_UEVENT_s16,
> +	TRACEFS_UEVENT_u32,
> +	TRACEFS_UEVENT_s32,
> +	TRACEFS_UEVENT_u64,
> +	TRACEFS_UEVENT_s64,
> +	TRACEFS_UEVENT_string,
> +	TRACEFS_UEVENT_struct,
> +	TRACEFS_UEVENT_varray,
> +	TRACEFS_UEVENT_vstring,
> +};
> +
> +enum tracefs_uevent_flags {
> +	/* None */
> +	TRACEFS_UEVENT_FLAG_NONE = 0,
> +
> +	/* When BPF is attached, use iterator/no copy */
> +	TRACEFS_UEVENT_FLAG_bpf_iter = 1 << 0,
> +};
> +
> +struct tracefs_uevent_item {
> +	/* Type of item */
> +	enum tracefs_uevent_type type;
> +
> +	/* Length of data, optional during register */
> +	int len;
> +
> +	union {
> +		/* Used during write */
> +		const void *data;
> +
> +		/* Used during register */
> +		const char *name;
> +	};
> +};
> +
> +struct tracefs_user_event;
> +struct tracefs_user_event_group;
> +

We've been trying to follow certain naming convention for the APIs and to provide similar usage patterns for all types 
of trace events that are supported by the library so far (dynamic, synthetic and histograms). If 'XXX' is the type of 
the event ('user_event' in your case), the pattern looks like this:

tracefs_XXX_alloc() - this constructor just allocates memory and initializes the descriptor object without modifying 
anything on the system. We allow for multiple constructor function, in the case when your objects has to many possible 
configurations and it is hard to do everything in a single API. Looking into your implementation, such constructor can 
do half of the work done in 'tracefs_user_event_group_create()'

int tracefs_XXX_create(struct tracefs_XXX *evt) - This is the API that actually adds your event on the system. Note that 
it takes just one argument that is the object itself. Again, looking into your implementation, this will combine the 
other half of tracefs_user_event_group_create() and tracefs_user_event_register().

int tracefs_XXX_destroy(struct tracefs_XXX *evt) This API removes your event from the system. The first argument is 
again the object. If needed, here you can use a second argument that is 'bool force'.

int tracefs_XXX_free(struct tracefs_XXX *evt) - just to free the memory


> +struct tracefs_user_event_group *tracefs_user_event_group_create(void);
> +
> +void tracefs_user_event_group_close(struct tracefs_user_event_group *group);
> +
> +int tracefs_user_event_delete(const char *name);
> +
> +struct tracefs_user_event *
> +tracefs_user_event_register(struct tracefs_user_event_group *group,
> +			    const char *name, enum tracefs_uevent_flags flags,
> +			    struct tracefs_uevent_item *items);
> +
> +bool tracefs_user_event_test(struct tracefs_user_event *event);

And maybe "test" is redundant. You can do the check in tracefs_XXX_create() and return an error if it fails.

> +
> +int tracefs_user_event_write(struct tracefs_user_event *event,
> +			     struct tracefs_uevent_item *items);

The "write" is OK.

Maybe we can change 'tracefs_user_event' to 'tracefs_usrevent' since we already use 'tracefs_dynevent' for dynamic events.

What do you think?

Thanks!
Yordan


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-21 11:34   ` Yordan Karadzhov
@ 2022-02-21 17:57     ` Beau Belgrave
  2022-02-21 19:16       ` Steven Rostedt
  0 siblings, 1 reply; 22+ messages in thread
From: Beau Belgrave @ 2022-02-21 17:57 UTC (permalink / raw)
  To: Yordan Karadzhov; +Cc: rostedt, linux-trace-devel

On Mon, Feb 21, 2022 at 01:34:00PM +0200, Yordan Karadzhov wrote:
> 
> 
> On 19.02.22 г. 0:50 ч., Beau Belgrave wrote:
> > diff --git a/include/tracefs-local.h b/include/tracefs-local.h
> > index bf157e1..e768cba 100644
> > --- a/include/tracefs-local.h
> > +++ b/include/tracefs-local.h
> > @@ -119,4 +119,28 @@ int trace_rescan_events(struct tep_handle *tep,
> >   struct tep_event *get_tep_event(struct tep_handle *tep,
> >   				const char *system, const char *name);
> > +/* Internal interface for ftrace user events */
> > +
> > +struct tracefs_user_event_group;
> > +
> > +struct tracefs_user_event
> > +{
> > +	int write_index;
> > +	int status_index;
> > +	int iovecs;
> > +	int rels;
> > +	int len;
> > +	struct tracefs_user_event_group *group;
> > +	struct tracefs_user_event *next;
> > +};
> > +
> > +struct tracefs_user_event_group
> > +{
> > +	int fd;
> > +	int mmap_len;
> > +	char *mmap;
> > +	pthread_mutex_t lock;
> > +	struct tracefs_user_event *events;
> > +};
> > +
> >   #endif /* _TRACE_FS_LOCAL_H */
> > diff --git a/include/tracefs.h b/include/tracefs.h
> > index 1848ad0..7871dfe 100644
> > --- a/include/tracefs.h
> > +++ b/include/tracefs.h
> > @@ -571,4 +571,64 @@ struct tracefs_synth *tracefs_sql(struct tep_handle *tep, const char *name,
> >   struct tep_event *
> >   tracefs_synth_get_event(struct tep_handle *tep, struct tracefs_synth *synth);
> > +/* User events */
> > +enum tracefs_uevent_type {
> > +	TRACEFS_UEVENT_END,
> > +	TRACEFS_UEVENT_u8,
> > +	TRACEFS_UEVENT_s8,
> > +	TRACEFS_UEVENT_u16,
> > +	TRACEFS_UEVENT_s16,
> > +	TRACEFS_UEVENT_u32,
> > +	TRACEFS_UEVENT_s32,
> > +	TRACEFS_UEVENT_u64,
> > +	TRACEFS_UEVENT_s64,
> > +	TRACEFS_UEVENT_string,
> > +	TRACEFS_UEVENT_struct,
> > +	TRACEFS_UEVENT_varray,
> > +	TRACEFS_UEVENT_vstring,
> > +};
> > +
> > +enum tracefs_uevent_flags {
> > +	/* None */
> > +	TRACEFS_UEVENT_FLAG_NONE = 0,
> > +
> > +	/* When BPF is attached, use iterator/no copy */
> > +	TRACEFS_UEVENT_FLAG_bpf_iter = 1 << 0,
> > +};
> > +
> > +struct tracefs_uevent_item {
> > +	/* Type of item */
> > +	enum tracefs_uevent_type type;
> > +
> > +	/* Length of data, optional during register */
> > +	int len;
> > +
> > +	union {
> > +		/* Used during write */
> > +		const void *data;
> > +
> > +		/* Used during register */
> > +		const char *name;
> > +	};
> > +};
> > +
> > +struct tracefs_user_event;
> > +struct tracefs_user_event_group;
> > +
> 
> We've been trying to follow certain naming convention for the APIs and to
> provide similar usage patterns for all types of trace events that are
> supported by the library so far (dynamic, synthetic and histograms). If
> 'XXX' is the type of the event ('user_event' in your case), the pattern
> looks like this:
> 
> tracefs_XXX_alloc() - this constructor just allocates memory and initializes
> the descriptor object without modifying anything on the system. We allow for
> multiple constructor function, in the case when your objects has to many
> possible configurations and it is hard to do everything in a single API.
> Looking into your implementation, such constructor can do half of the work
> done in 'tracefs_user_event_group_create()'
> 
> int tracefs_XXX_create(struct tracefs_XXX *evt) - This is the API that
> actually adds your event on the system. Note that it takes just one argument
> that is the object itself. Again, looking into your implementation, this
> will combine the other half of tracefs_user_event_group_create() and
> tracefs_user_event_register().

Are you and Steven aligned on this convention?

The request looked slightly different:
See https://lore.kernel.org/linux-trace-devel/20220121192833.GA3128@kbox/T/#m2bcf53c373fbeaba2c46d1a053b3174171167e4e

> 
> int tracefs_XXX_destroy(struct tracefs_XXX *evt) This API removes your event
> from the system. The first argument is again the object. If needed, here you
> can use a second argument that is 'bool force'.
> 
> int tracefs_XXX_free(struct tracefs_XXX *evt) - just to free the memory
> 
> 
> > +struct tracefs_user_event_group *tracefs_user_event_group_create(void);
> > +
> > +void tracefs_user_event_group_close(struct tracefs_user_event_group *group);
> > +
> > +int tracefs_user_event_delete(const char *name);
> > +
> > +struct tracefs_user_event *
> > +tracefs_user_event_register(struct tracefs_user_event_group *group,
> > +			    const char *name, enum tracefs_uevent_flags flags,
> > +			    struct tracefs_uevent_item *items);
> > +
> > +bool tracefs_user_event_test(struct tracefs_user_event *event);
> 
> And maybe "test" is redundant. You can do the check in tracefs_XXX_create() and return an error if it fails.
> 

Test is required so user programs can tell when write should be called.
Otherwise excessive calculations and stack data are pushed for no good
reason.

> > +
> > +int tracefs_user_event_write(struct tracefs_user_event *event,
> > +			     struct tracefs_uevent_item *items);
> 
> The "write" is OK.
> 
> Maybe we can change 'tracefs_user_event' to 'tracefs_usrevent' since we already use 'tracefs_dynevent' for dynamic events.
> 
> What do you think?
> 

I'm happy to do whatever, I just want to ensure you and Steven are
aligned.

> Thanks!
> Yordan

Thanks,
-Beau

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-21 17:57     ` Beau Belgrave
@ 2022-02-21 19:16       ` Steven Rostedt
  2022-02-22  6:27         ` Yordan Karadzhov
  0 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2022-02-21 19:16 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: Yordan Karadzhov, linux-trace-devel

On Mon, 21 Feb 2022 09:57:20 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> > We've been trying to follow certain naming convention for the APIs and to
> > provide similar usage patterns for all types of trace events that are
> > supported by the library so far (dynamic, synthetic and histograms). If
> > 'XXX' is the type of the event ('user_event' in your case), the pattern
> > looks like this:
> > 
> > tracefs_XXX_alloc() - this constructor just allocates memory and initializes
> > the descriptor object without modifying anything on the system. We allow for
> > multiple constructor function, in the case when your objects has to many
> > possible configurations and it is hard to do everything in a single API.
> > Looking into your implementation, such constructor can do half of the work
> > done in 'tracefs_user_event_group_create()'
> > 
> > int tracefs_XXX_create(struct tracefs_XXX *evt) - This is the API that
> > actually adds your event on the system. Note that it takes just one argument
> > that is the object itself. Again, looking into your implementation, this
> > will combine the other half of tracefs_user_event_group_create() and
> > tracefs_user_event_register().  
> 
> Are you and Steven aligned on this convention?

I would say you are both correct ;-)

The problem is, this doesn't really match the other events. The other
events are created in the kernel, and become a normal event. The big
difference here is that the event is tested in user space. It will be
hard to make it match the same criteria as kprobes, dynamic events, and
eprobes.

> 
> The request looked slightly different:
> See https://lore.kernel.org/linux-trace-devel/20220121192833.GA3128@kbox/T/#m2bcf53c373fbeaba2c46d1a053b3174171167e4e
> 
> > 
> > int tracefs_XXX_destroy(struct tracefs_XXX *evt) This API removes your event
> > from the system. The first argument is again the object. If needed, here you
> > can use a second argument that is 'bool force'.
> > 
> > int tracefs_XXX_free(struct tracefs_XXX *evt) - just to free the memory
> > 
> >   
> > > +struct tracefs_user_event_group *tracefs_user_event_group_create(void);
> > > +
> > > +void tracefs_user_event_group_close(struct tracefs_user_event_group *group);
> > > +
> > > +int tracefs_user_event_delete(const char *name);
> > > +
> > > +struct tracefs_user_event *
> > > +tracefs_user_event_register(struct tracefs_user_event_group *group,
> > > +			    const char *name, enum tracefs_uevent_flags flags,
> > > +			    struct tracefs_uevent_item *items);
> > > +
> > > +bool tracefs_user_event_test(struct tracefs_user_event *event);  
> > 
> > And maybe "test" is redundant. You can do the check in tracefs_XXX_create() and return an error if it fails.
> >   
> 
> Test is required so user programs can tell when write should be called.
> Otherwise excessive calculations and stack data are pushed for no good
> reason.

This is the part I'm talking about.  The "user_event" here is actually
executed in the application, and this API is to handle it. This is
where the user events diverge from the other events.

The user events create the event like any other dynamic event (we could
look at keeping that part similar with the other APIs). But then
everything is different.

When the event is created, the application is given a location on a
special mmapped page that represents if the event is enabled or not.
Then the application is to read it *at the event site* to know if it
should record the event into the ring buffer or not.

That's what the test is. It's equivalent to the "static_branch" that
tracepoints use in the kernel, except this isn't a static branch
(although we could possibly do something like that in the future!).

The application will:

	if (tracefs_user_event_test(event)) {
		tracefs_user_event_write(event, ...);
	}

Where if the event is enabled (by an external program), then it is to
record the events. This is the API that does that.

And test is not redundant at all.

> 
> > > +
> > > +int tracefs_user_event_write(struct tracefs_user_event *event,
> > > +			     struct tracefs_uevent_item *items);  
> > 
> > The "write" is OK.
> > 
> > Maybe we can change 'tracefs_user_event' to 'tracefs_usrevent' since we already use 'tracefs_dynevent' for dynamic events.
> > 
> > What do you think?

Not really. This is closer to tracefs_printf(). But thinking about this
more, perhaps "write" is a bit confusing as we use it to write to
tracefs files.

What about:  tracefs_user_event_record() ?

As this is exactly what it is doing. It's recording the event.


> >   
> 
> I'm happy to do whatever, I just want to ensure you and Steven are
> aligned.

Thanks!

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-21 19:16       ` Steven Rostedt
@ 2022-02-22  6:27         ` Yordan Karadzhov
  2022-02-22 14:00           ` Steven Rostedt
  0 siblings, 1 reply; 22+ messages in thread
From: Yordan Karadzhov @ 2022-02-22  6:27 UTC (permalink / raw)
  To: Steven Rostedt, Beau Belgrave; +Cc: linux-trace-devel



On 21.02.22 г. 21:16 ч., Steven Rostedt wrote:
> On Mon, 21 Feb 2022 09:57:20 -0800
> Beau Belgrave <beaub@linux.microsoft.com> wrote:
> 
>>> We've been trying to follow certain naming convention for the APIs and to
>>> provide similar usage patterns for all types of trace events that are
>>> supported by the library so far (dynamic, synthetic and histograms). If
>>> 'XXX' is the type of the event ('user_event' in your case), the pattern
>>> looks like this:
>>>
>>> tracefs_XXX_alloc() - this constructor just allocates memory and initializes
>>> the descriptor object without modifying anything on the system. We allow for
>>> multiple constructor function, in the case when your objects has to many
>>> possible configurations and it is hard to do everything in a single API.
>>> Looking into your implementation, such constructor can do half of the work
>>> done in 'tracefs_user_event_group_create()'
>>>
>>> int tracefs_XXX_create(struct tracefs_XXX *evt) - This is the API that
>>> actually adds your event on the system. Note that it takes just one argument
>>> that is the object itself. Again, looking into your implementation, this
>>> will combine the other half of tracefs_user_event_group_create() and
>>> tracefs_user_event_register().
>>
>> Are you and Steven aligned on this convention?
> 
> I would say you are both correct ;-)
> 
> The problem is, this doesn't really match the other events. The other
> events are created in the kernel, and become a normal event. The big
> difference here is that the event is tested in user space. It will be
> hard to make it match the same criteria as kprobes, dynamic events, and
> eprobes.
> 
>>
>> The request looked slightly different:
>> See https://lore.kernel.org/linux-trace-devel/20220121192833.GA3128@kbox/T/#m2bcf53c373fbeaba2c46d1a053b3174171167e4e
>>
>>>
>>> int tracefs_XXX_destroy(struct tracefs_XXX *evt) This API removes your event
>>> from the system. The first argument is again the object. If needed, here you
>>> can use a second argument that is 'bool force'.
>>>
>>> int tracefs_XXX_free(struct tracefs_XXX *evt) - just to free the memory
>>>
>>>    
>>>> +struct tracefs_user_event_group *tracefs_user_event_group_create(void);
>>>> +
>>>> +void tracefs_user_event_group_close(struct tracefs_user_event_group *group);
>>>> +
>>>> +int tracefs_user_event_delete(const char *name);
>>>> +
>>>> +struct tracefs_user_event *
>>>> +tracefs_user_event_register(struct tracefs_user_event_group *group,
>>>> +			    const char *name, enum tracefs_uevent_flags flags,
>>>> +			    struct tracefs_uevent_item *items);
>>>> +
>>>> +bool tracefs_user_event_test(struct tracefs_user_event *event);
>>>
>>> And maybe "test" is redundant. You can do the check in tracefs_XXX_create() and return an error if it fails.
>>>    
>>
>> Test is required so user programs can tell when write should be called.
>> Otherwise excessive calculations and stack data are pushed for no good
>> reason.
> 
> This is the part I'm talking about.  The "user_event" here is actually
> executed in the application, and this API is to handle it. This is
> where the user events diverge from the other events.
> 
> The user events create the event like any other dynamic event (we could
> look at keeping that part similar with the other APIs). But then
> everything is different.
> 
> When the event is created, the application is given a location on a
> special mmapped page that represents if the event is enabled or not.
> Then the application is to read it *at the event site* to know if it
> should record the event into the ring buffer or not.
> 
> That's what the test is. It's equivalent to the "static_branch" that
> tracepoints use in the kernel, except this isn't a static branch
> (although we could possibly do something like that in the future!).
> 
> The application will:
> 
> 	if (tracefs_user_event_test(event)) {
> 		tracefs_user_event_write(event, ...);
> 	}
> 
> Where if the event is enabled (by an external program), then it is to
> record the events. This is the API that does that.
> 
> And test is not redundant at all.

Thanks a lot for clarifying this and sorry about my confusion!

I have one last question. Do you consider as a valid use case that the library must support, someone to do a just 'test" 
without writing after this, or to "write" without testing first?

thanks!
Yordan

> 
>>
>>>> +
>>>> +int tracefs_user_event_write(struct tracefs_user_event *event,
>>>> +			     struct tracefs_uevent_item *items);
>>>
>>> The "write" is OK.
>>>
>>> Maybe we can change 'tracefs_user_event' to 'tracefs_usrevent' since we already use 'tracefs_dynevent' for dynamic events.
>>>
>>> What do you think?
> 
> Not really. This is closer to tracefs_printf(). But thinking about this
> more, perhaps "write" is a bit confusing as we use it to write to
> tracefs files.
> 
> What about:  tracefs_user_event_record() ?
> 
> As this is exactly what it is doing. It's recording the event.
> 
> 
>>>    
>>
>> I'm happy to do whatever, I just want to ensure you and Steven are
>> aligned.
> 
> Thanks!
> 
> -- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22  6:27         ` Yordan Karadzhov
@ 2022-02-22 14:00           ` Steven Rostedt
  2022-02-22 14:25             ` Yordan Karadzhov
  2022-02-22 16:59             ` Beau Belgrave
  0 siblings, 2 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 14:00 UTC (permalink / raw)
  To: Yordan Karadzhov; +Cc: Beau Belgrave, linux-trace-devel

On Tue, 22 Feb 2022 08:27:31 +0200
Yordan Karadzhov <y.karadz@gmail.com> wrote:

> I have one last question. Do you consider as a valid use case that
> the library must support, someone to do a just 'test" without writing
> after this, or to "write" without testing first?

Actually, that's a very good point.

I was thinking that I didn't like the "test" name, and was thinking of
having it be:

	if (tracefs_user_event_enabled(event)) {
		tracefs_user_event_record(event, ...);
	}

But I think you have a good point. Perhaps we should just have:

	tracefs_user_event_trace(event, ...);

and it do all the work. But it would need to be a macro, that does:

#define tracefs_user_event_trace(event, ...)			\
	do {							\
		if (tracefs_user_event_enabled(event)) {	\
			tracefs_user_event_record(event, ##__VA_ARGS); \
		}						\
	} while (0)

Because we do not want to have the compiler process the arguments when
the event is not enabled. That would be too much overhead.

But as a macro, it would work.

Thoughts?

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22 14:00           ` Steven Rostedt
@ 2022-02-22 14:25             ` Yordan Karadzhov
  2022-02-22 15:41               ` Steven Rostedt
  2022-02-22 16:59             ` Beau Belgrave
  1 sibling, 1 reply; 22+ messages in thread
From: Yordan Karadzhov @ 2022-02-22 14:25 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Beau Belgrave, linux-trace-devel



On 22.02.22 г. 16:00 ч., Steven Rostedt wrote:
> On Tue, 22 Feb 2022 08:27:31 +0200
> Yordan Karadzhov <y.karadz@gmail.com> wrote:
> 
>> I have one last question. Do you consider as a valid use case that
>> the library must support, someone to do a just 'test" without writing
>> after this, or to "write" without testing first?
> 
> Actually, that's a very good point.
> 
> I was thinking that I didn't like the "test" name, and was thinking of
> having it be:
> 
> 	if (tracefs_user_event_enabled(event)) {
> 		tracefs_user_event_record(event, ...);
> 	}
> 
> But I think you have a good point. Perhaps we should just have:
> 
> 	tracefs_user_event_trace(event, ...);
> 
> and it do all the work. But it would need to be a macro, that does:
> 
> #define tracefs_user_event_trace(event, ...)			\

I personally think that, using variadic arguments in library APIs is a not a good idea, because this enforces that the 
caller must know the number of those arguments at compile time.

For me the original solution that uses an array of items is better.
Or maybe we can have both as 2 different APIs.

Thanks!
Y.

> 	do {							\
> 		if (tracefs_user_event_enabled(event)) {	\
> 			tracefs_user_event_record(event, ##__VA_ARGS); \
> 		}						\
> 	} while (0)
> 
> Because we do not want to have the compiler process the arguments when
> the event is not enabled. That would be too much overhead.
> 
> But as a macro, it would work.
> 
> Thoughts?
> 
> -- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22 14:25             ` Yordan Karadzhov
@ 2022-02-22 15:41               ` Steven Rostedt
  0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 15:41 UTC (permalink / raw)
  To: Yordan Karadzhov; +Cc: Beau Belgrave, linux-trace-devel

On Tue, 22 Feb 2022 16:25:35 +0200
Yordan Karadzhov <y.karadz@gmail.com> wrote:

> I personally think that, using variadic arguments in library APIs is a
> not a good idea, because this enforces that the caller must know the
> number of those arguments at compile time.
> 
> For me the original solution that uses an array of items is better.
> Or maybe we can have both as 2 different APIs.

Actually, I was using the va_args as an example. I really didn't care about
the implementation of the arguments, except that they need to be added
after the enable check.

For now, lets just keep the two functions to check for the event being
enabled and recording. I think the last names were the way to go:

	tracefs_user_event_enabled();
	tracefs_user_event_record();

And keep the record using the array. If we want a macro, we could do:

#define tracefs_user_event_trace(event, ...)				\
	do {								\
		if (tracefs_user_event_enabled(event)) {		\
			struct tracefs_uevent_item items[] = {		\
				##__VA_ARGS__,				\
				{ TRACEFS_UEVENT_END },			\
			}						\
			tracefs_user_event_record(event, items);	\
		}							\
	} while (0)

And the user could have:

	tracefs_user_event_trace(event,
		{ TRACEFS_UEVENT_vstring, strlen(msg)+1, .data = msg });

Again, for those that do not want compile time knowledge of the arguments,
you just use the normal interface, and those that want the helper, to use
the macro.

-- Steve


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22 14:00           ` Steven Rostedt
  2022-02-22 14:25             ` Yordan Karadzhov
@ 2022-02-22 16:59             ` Beau Belgrave
  2022-02-22 17:10               ` Steven Rostedt
  1 sibling, 1 reply; 22+ messages in thread
From: Beau Belgrave @ 2022-02-22 16:59 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Yordan Karadzhov, linux-trace-devel

On Tue, Feb 22, 2022 at 09:00:21AM -0500, Steven Rostedt wrote:
> On Tue, 22 Feb 2022 08:27:31 +0200
> Yordan Karadzhov <y.karadz@gmail.com> wrote:
> 
> > I have one last question. Do you consider as a valid use case that
> > the library must support, someone to do a just 'test" without writing
> > after this, or to "write" without testing first?
> 
> Actually, that's a very good point.
> 
> I was thinking that I didn't like the "test" name, and was thinking of
> having it be:
> 
> 	if (tracefs_user_event_enabled(event)) {
> 		tracefs_user_event_record(event, ...);
> 	}

I do like the renaming of test to enabled and write to record.

> 
> But I think you have a good point. Perhaps we should just have:
> 
> 	tracefs_user_event_trace(event, ...);
> 
> and it do all the work. But it would need to be a macro, that does:
> 
> #define tracefs_user_event_trace(event, ...)			\
> 	do {							\
> 		if (tracefs_user_event_enabled(event)) {	\
> 			tracefs_user_event_record(event, ##__VA_ARGS); \
> 		}						\
> 	} while (0)
> 
> Because we do not want to have the compiler process the arguments when
> the event is not enabled. That would be too much overhead.
> 
> But as a macro, it would work.
> 
> Thoughts?

Many times we have found that we have to do work that should only be
performed if the event is enabled. As long as there is a still clear
ways to check without the macro, this would be fine with me.

IE: strlen(message)

We should not walk strings unless the event is enabled.

Thanks,
-Beau

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-18 22:50 ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Beau Belgrave
  2022-02-21 11:34   ` Yordan Karadzhov
@ 2022-02-22 17:08   ` Steven Rostedt
  2022-02-22 17:31   ` Steven Rostedt
  2 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 17:08 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: linux-trace-devel

On Fri, 18 Feb 2022 14:50:56 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> Adds the required APIs to libtracefs to create, manage and write out
> data to trace events via the user_events kernel mechanism.

Just to be more explicit here. I would add a link to the conversation that
you shared with Yordan.

Link:
https://lore.kernel.org/linux-trace-devel/20220121192833.GA3128@kbox/T/#m2bcf53c373fbeaba2c46d1a053b3174171167e4e

With a little introduction:

   The user events are scheduled to be included into Linux 5.18, which
   register a special mmapped page to denote when the user event is enabled
   (from an external source). This API adds a wrapper to the kernel
   interface that makes it easy to register user events and test if they
   are enabled and to record the event when it is.

> 
> Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
> ---
>  Makefile                 |   8 +
>  include/tracefs-local.h  |  24 ++
>  include/tracefs.h        |  60 +++++
>  src/Makefile             |   4 +
>  src/tracefs-userevents.c | 545 +++++++++++++++++++++++++++++++++++++++
>  5 files changed, 641 insertions(+)
>  create mode 100644 src/tracefs-userevents.c
> 
> diff --git a/Makefile b/Makefile
> index 544684c..a4598b4 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -154,6 +154,14 @@ CFLAGS ?= -g -Wall
>  CPPFLAGS ?=
>  LDFLAGS ?=
>  
> +USEREVENTS_INSTALLED := $(shell if (echo "$(pound)include <linux/user_events.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
> +export USEREVENTS_INSTALLED
> +ifeq ($(USEREVENTS_INSTALLED), 1)
> +CFLAGS += -DUSEREVENTS
> +else
> +$(warning user_events.h not installed, skipping)
> +endif
> +
>  CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -x c - -lcunit -o /dev/null >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
>  export CUNIT_INSTALLED
>  
> diff --git a/include/tracefs-local.h b/include/tracefs-local.h
> index bf157e1..e768cba 100644
> --- a/include/tracefs-local.h
> +++ b/include/tracefs-local.h
> @@ -119,4 +119,28 @@ int trace_rescan_events(struct tep_handle *tep,
>  struct tep_event *get_tep_event(struct tep_handle *tep,
>  				const char *system, const char *name);
>  
> +/* Internal interface for ftrace user events */
> +
> +struct tracefs_user_event_group;
> +
> +struct tracefs_user_event
> +{
> +	int write_index;
> +	int status_index;
> +	int iovecs;
> +	int rels;
> +	int len;
> +	struct tracefs_user_event_group *group;
> +	struct tracefs_user_event *next;
> +};
> +
> +struct tracefs_user_event_group
> +{
> +	int fd;
> +	int mmap_len;
> +	char *mmap;
> +	pthread_mutex_t lock;
> +	struct tracefs_user_event *events;
> +};

Nit, but can you indent the fields like we do in the kernel.

struct tracefs_user_event
{
	int				write_index;
	int				status_index;
	int				iovecs;
	int				rels;
	int				len;
	struct tracefs_user_event_group	*group;
	struct tracefs_user_event	*next;
};

struct tracefs_user_event_group
{
	int				fd;
	int				mmap_len;
	char				*mmap;
	pthread_mutex_t			lock;
	struct tracefs_user_event	*events;
};

It's just easier to read and see the fields.

> +
>  #endif /* _TRACE_FS_LOCAL_H */
> diff --git a/include/tracefs.h b/include/tracefs.h
> index 1848ad0..7871dfe 100644
> --- a/include/tracefs.h
> +++ b/include/tracefs.h
> @@ -571,4 +571,64 @@ struct tracefs_synth *tracefs_sql(struct tep_handle *tep, const char *name,
>  struct tep_event *
>  tracefs_synth_get_event(struct tep_handle *tep, struct tracefs_synth *synth);
>  
> +/* User events */
> +enum tracefs_uevent_type {
> +	TRACEFS_UEVENT_END,
> +	TRACEFS_UEVENT_u8,
> +	TRACEFS_UEVENT_s8,
> +	TRACEFS_UEVENT_u16,
> +	TRACEFS_UEVENT_s16,
> +	TRACEFS_UEVENT_u32,
> +	TRACEFS_UEVENT_s32,
> +	TRACEFS_UEVENT_u64,
> +	TRACEFS_UEVENT_s64,
> +	TRACEFS_UEVENT_string,
> +	TRACEFS_UEVENT_struct,
> +	TRACEFS_UEVENT_varray,
> +	TRACEFS_UEVENT_vstring,
> +};
> +
> +enum tracefs_uevent_flags {
> +	/* None */
> +	TRACEFS_UEVENT_FLAG_NONE = 0,
> +
> +	/* When BPF is attached, use iterator/no copy */
> +	TRACEFS_UEVENT_FLAG_bpf_iter = 1 << 0,
> +};
> +
> +struct tracefs_uevent_item {
> +	/* Type of item */
> +	enum tracefs_uevent_type type;
> +
> +	/* Length of data, optional during register */
> +	int len;
> +
> +	union {
> +		/* Used during write */
> +		const void *data;
> +
> +		/* Used during register */
> +		const char *name;
> +	};
> +};

Same with the above:

struct tracefs_uevent_item {
	/* Type of item */
	enum tracefs_uevent_type		type;

	/* Length of data, optional during register */
	int					len;

	union {
		/* Used during write */
		const void			*data;

		/* Used during register */
		const char			*name;
	};
};


> +
> +struct tracefs_user_event;
> +struct tracefs_user_event_group;
> +
> +struct tracefs_user_event_group *tracefs_user_event_group_create(void);

Naming is hard :-(

Yordan has a point that we use the term "create" in the other APIs to mean
"create the event on the file system". Where as this isn't doing that. But
it is not just allocating either as it is doing work on the filesystem.
Perhaps a better term is "init" as it is initializing the group?

	tracefs_user_event_group_init() ?

> +
> +void tracefs_user_event_group_close(struct tracefs_user_event_group *group);

Or we could call it "open" as then it would match the "close" here?

	tracefs_user_event_group_open() ?

Is that a better name for the create function?

Hmm, I'm thinking calling it "open" may be the best, as it then matches the
close.

  (Just writing out my brain thoughts here)

> +
> +int tracefs_user_event_delete(const char *name);
> +
> +struct tracefs_user_event *
> +tracefs_user_event_register(struct tracefs_user_event_group *group,
> +			    const char *name, enum tracefs_uevent_flags flags,
> +			    struct tracefs_uevent_item *items);
> +
> +bool tracefs_user_event_test(struct tracefs_user_event *event);

Like I said before. I think tracefs_user_event_enabled() may be a better
name. I know I'm the one that suggested "test" but as I stated up front,
naming is hard ;-)

> +
> +int tracefs_user_event_write(struct tracefs_user_event *event,
> +			     struct tracefs_uevent_item *items);

And this to be tracefs_user_event_record(), that way it will not be
confused to mean something similar to what the other API "write" functions
do.

> +
>  #endif /* _TRACE_FS_H */
> diff --git a/src/Makefile b/src/Makefile
> index e8afab5..984e8cf 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -14,6 +14,10 @@ OBJS += tracefs-filter.o
>  OBJS += tracefs-dynevents.o
>  OBJS += tracefs-eprobes.o
>  
> +ifeq ($(USEREVENTS_INSTALLED), 1)
> +OBJS += tracefs-userevents.o
> +endif
> +
>  # Order matters for the the three below
>  OBJS += sqlhist-lex.o
>  OBJS += sqlhist.tab.o
> diff --git a/src/tracefs-userevents.c b/src/tracefs-userevents.c
> new file mode 100644
> index 0000000..4d64fd8
> --- /dev/null
> +++ b/src/tracefs-userevents.c
> @@ -0,0 +1,545 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +/*
> + * Copyright (C) 2022 Microsoft Corporation.
> + *
> + * Authors:
> + *   Beau Belgrave <beaub@linux.microsoft.com>
> + */
> +
> +#include <alloca.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <pthread.h>
> +#include <sys/mman.h>
> +#include <sys/ioctl.h>
> +#include <sys/uio.h>
> +#include <linux/user_events.h>
> +
> +#include "tracefs.h"
> +#include "tracefs-local.h"
> +
> +#define STAT_FILE "user_events_status"
> +#define DATA_FILE "user_events_data"
> +
> +static void free_user_events(struct tracefs_user_event *event)
> +{
> +	struct tracefs_user_event *next;
> +
> +	while (event) {
> +		next = event->next;
> +		free(event);
> +		event = next;
> +	}
> +}
> +
> +#define LEN_OR_ZERO (len ? len - pos : 0)
> +static int append_field(struct tracefs_uevent_item *item, char *buf,
> +			int len, int offset, int index)
> +{
> +	int pos = offset;
> +
> +	if (index != 0)
> +		pos += snprintf(buf + pos, LEN_OR_ZERO, ";");

You know, you can use the trace_seq functionality here, which might make
things easier.

In tracefs_user_event_reg()
	trace_seq seq;

	trace_seq_init(&seq);

then pass the &seq into the other functions where here we would have:

static int append_field(struct tracefs_uevent_item *item,
			struct trace_seq *s, int len, int offset, int index)

> +
> +	switch (item->type) {
> +	case TRACEFS_UEVENT_u8:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" u8 %s", item->name);

		trace_seq_printf(s, " u8 %s", item->name);

> +		break;
> +
> +	case TRACEFS_UEVENT_s8:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" s8 %s", item->name);

And the same for all these.

Then at the end, you can check for failed allocations with:

	if (seq.state) {
		/* failed allocation */


And you can get the access to the buffer with:

	trace_seq_terminate(&seq); // adds '\0' to the buffer

	printf("%s", seq.buffer);

Then clean it up with:

	trace_seq_destroy(&seq);

trace_seq is part of libtraceevent which tracefs depends on.

It will make a lot of this code simpler.

> +		break;
> +
> +	case TRACEFS_UEVENT_u16:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" u16 %s", item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_s16:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" s16 %s", item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_u32:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" u32 %s", item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_s32:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" s32 %s", item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_u64:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" u64 %s", item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_s64:
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" s64 %s", item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_string:
> +		if (item->len <= 0) {
> +			errno = EINVAL;
> +			return -1;
> +		}
> +
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" char[%d] %s", item->len, item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_struct:
> +		/*
> +		 * struct must have 2 strings, do simple check
> +		 * in user, kernel will fully validate
> +		 */
> +		if (!strchr(item->name, ' ')) {
> +			errno = EINVAL;
> +			return -1;
> +		}
> +
> +		if (item->len <= 0) {
> +			errno = EINVAL;
> +			return -1;
> +		}
> +
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" struct %s %d", item->name, item->len);
> +		break;
> +
> +	case TRACEFS_UEVENT_varray:
> +		/* Variable length array */
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" __rel_loc u8[] %s", item->name);
> +		break;
> +
> +	case TRACEFS_UEVENT_vstring:
> +		/* Variable length string */
> +		pos += snprintf(buf + pos, LEN_OR_ZERO,
> +				" __rel_loc char[] %s", item->name);
> +		break;
> +
> +	default:
> +		/* Unknown */
> +		errno = ENOENT;
> +		return -1;
> +	}
> +
> +	return pos;
> +}
> +
> +static int create_reg_cmd(const char *name, enum tracefs_uevent_flags flags,
> +			  struct tracefs_uevent_item *items, char *buf, int len)
> +{
> +	int pos = 0;
> +	int index = 0;
> +
> +	pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", name);
> +
> +	if (flags & TRACEFS_UEVENT_FLAG_bpf_iter)
> +		pos += snprintf(buf + pos, LEN_OR_ZERO, ":BPF_ITER");
> +
> +	while (items->type != TRACEFS_UEVENT_END) {
> +		pos = append_field(items, buf, len, pos, index++);
> +
> +		if (pos < 0)
> +			return pos;
> +
> +		items++;
> +	}
> +
> +	return pos + 1;
> +}
> +#undef LEN_OR_ZERO
> +
> +static int get_write_counts(struct tracefs_user_event *event,
> +			    struct tracefs_uevent_item *item)
> +{
> +	event->rels = 0;
> +	event->len = 0;
> +
> +	/* Start at 1, need iovec for write_index */
> +	event->iovecs = 1;
> +
> +	while (item->type != TRACEFS_UEVENT_END) {
> +		switch (item->type) {
> +		case TRACEFS_UEVENT_u8:
> +		case TRACEFS_UEVENT_s8:
> +			event->len += sizeof(__u8);
> +			break;
> +
> +		case TRACEFS_UEVENT_u16:
> +		case TRACEFS_UEVENT_s16:
> +			event->len += sizeof(__u16);
> +			break;
> +
> +		case TRACEFS_UEVENT_u32:
> +		case TRACEFS_UEVENT_s32:
> +			event->len += sizeof(__u32);
> +			break;
> +
> +		case TRACEFS_UEVENT_u64:
> +		case TRACEFS_UEVENT_s64:
> +			event->len += sizeof(__u64);
> +			break;
> +
> +		case TRACEFS_UEVENT_string:
> +		case TRACEFS_UEVENT_struct:
> +			event->len += item->len;
> +			break;
> +
> +		case TRACEFS_UEVENT_varray:
> +		case TRACEFS_UEVENT_vstring:
> +			/* Requires a rel loc entry */
> +			event->len += sizeof(__u32);
> +			event->rels++;
> +			break;
> +
> +		default:
> +			/* Unknown */
> +			errno = ENOENT;
> +			return -1;
> +		}
> +
> +		event->iovecs++;
> +		item++;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * tracefs_user_event_group_create - Create a new group to use for user events
> + *
> + * Returns a pointer to a group to use for user events. The pointer is valid until
> + * tracefs_user_event_group_close() is called. In case of an error NULL is
> + * returned.
> + */
> +struct tracefs_user_event_group *tracefs_user_event_group_create(void)
> +{
> +	int stat, write, page_size, i;
> +	struct tracefs_user_event_group *group;
> +
> +	stat = tracefs_instance_file_open(NULL, STAT_FILE, O_RDWR);
> +
> +	if (stat < 0)
> +		return NULL;
> +
> +	write = tracefs_instance_file_open(NULL, DATA_FILE, O_RDWR);
> +
> +	if (write < 0)
> +		goto put_stat;
> +
> +	group = malloc(sizeof(*group));
> +
> +	if (!group)
> +		goto put_write;
> +
> +	if (pthread_mutex_init(&group->lock, NULL) < 0)
> +		goto put_group;
> +
> +	/* Scale up to 16-bit max user events a page at a time */
> +	page_size = sysconf(_SC_PAGESIZE);
> +	group->mmap_len = page_size;
> +
> +	for (i = 0; i < 16; ++i) {
> +		group->mmap = mmap(NULL, group->mmap_len,
> +				   PROT_READ, MAP_SHARED, stat, 0);
> +
> +		if (group->mmap == MAP_FAILED && errno == EINVAL) {
> +			/* Increase by page size and try again */
> +			group->mmap_len += page_size;
> +			continue;
> +		}
> +
> +		break;
> +	}
> +
> +	if (group->mmap == MAP_FAILED)
> +		goto put_group;
> +
> +	group->fd = write;
> +	group->events = NULL;
> +
> +	/* Status fd no longer needed */
> +	close(stat);
> +
> +	return group;
> +
> +put_group:
> +	free(group);
> +put_write:
> +	close(write);
> +put_stat:
> +	close(stat);
> +
> +	return NULL;
> +}
> +
> +/**
> + * tracefs_user_event_delete - Deletes a user event from the system
> + * @name: Name of the event to delete
> + *
> + * Deletes the event from the system if it is not used.
> + */
> +int tracefs_user_event_delete(const char *name)
> +{
> +	int ret, write;
> +       

The above line has white space at the end of it.

> +	write = tracefs_instance_file_open(NULL, DATA_FILE, O_RDWR);
> +
> +	if (write < 0)
> +		return write;
> +
> +	ret = ioctl(write, DIAG_IOCSDEL, name);
> +
> +	close(write);
> +
> +	return ret;
> +}
> +
> +/**
> + * tracefs_user_event_group_close - Closes a group containing user events
> + * @group: Group to close
> + *
> + * Closes a group and all the user events within it. Any user event that has
> + * been added to the group is no longer valid and cannot be used.
> + */
> +void tracefs_user_event_group_close(struct tracefs_user_event_group *group)
> +{
> +	if (!group)
> +		return;
> +
> +	if (group->mmap != MAP_FAILED)
> +		munmap(group->mmap, group->mmap_len);
> +
> +	if (group->fd != -1)
> +		close(group->fd);
> +
> +	free_user_events(group->events);
> +	free(group);
> +}
> +
> +/**
> + * tracefs_user_event_register - Registers a user event with the system
> + * @group: Group to add the user event to
> + * @name: Name of the event to register
> + * @flags: Flags to use
> + * @items: Array of items that the event contains
> + *
> + * Allocates and registers a user event with the system. The user event will be
> + * added to the @group. The lifetime of the event is bound to the @group. When
> + * the @group is closed via tracefs_user_event_group_close() the event will no
> + * longer exist and should not be used.
> + *
> + * The @items are processed in order and the final item type must be set to
> + * TRACEFS_UEVENT_END to mark the last item. Each item must have the type
> + * and name defined. The string and struct type also require the len to be set
> + * for the item.
> + *
> + * Return a pointer to a user event on success, or NULL or error.
> + *
> + * errno will be set to EINVAL if @group is null or unexpected @items.
> + */
> +struct tracefs_user_event *
> +tracefs_user_event_register(struct tracefs_user_event_group *group,
> +			    const char *name, enum tracefs_uevent_flags flags,
> +			    struct tracefs_uevent_item *items)
> +{
> +	struct tracefs_user_event *event = NULL;
> +	struct user_reg reg = {0};
> +	char *cmd = NULL;
> +	int len;
> +
> +	if (!group || !items) {
> +		errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	/* Determine length of cmd */
> +	len = create_reg_cmd(name, flags, items, cmd, 0);
> +
> +	if (len < 0) {
> +		errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	/* Allocate and fill cmd */
> +	cmd = malloc(len);
> +
> +	if (!cmd)
> +		return NULL;
> +
> +	create_reg_cmd(name, flags, items, cmd, len);
> +
> +	event = malloc(sizeof(*event));
> +
> +	if (!event)
> +		goto put_cmd;
> +
> +	reg.size = sizeof(reg);
> +	reg.name_args = (__u64)cmd;
> +
> +	/* Register event with kernel */
> +	if (ioctl(group->fd, DIAG_IOCSREG, &reg) == -1)
> +		goto put_event;
> +
> +	/* Sanity check bounds returned */
> +	if (reg.status_index >= group->mmap_len) {
> +		errno = EINVAL;
> +		goto put_event;
> +	}
> +
> +	if (get_write_counts(event, items))
> +		goto put_event;
> +
> +	event->write_index = reg.write_index;
> +	event->status_index = reg.status_index;
> +	event->group = group;
> +
> +	/* Add event into the group under lock */
> +	pthread_mutex_lock(&group->lock);
> +	event->next = group->events;
> +	group->events = event->next;
> +	pthread_mutex_unlock(&group->lock);
> +
> +	free(cmd);
> +
> +	return event;
> +put_event:
> +	free(event);
> +put_cmd:
> +	free(cmd);
> +
> +	return NULL;
> +}
> +
> +/**
> + * tracefs_user_event_test - Tests if an event is currently enabled
> + * @event: User event to test
> + *
> + * Tests if the @event is valid and currently enabled on the system.
> + *
> + * Return true if enabled, false otherwise.
> + */
> +bool tracefs_user_event_test(struct tracefs_user_event *event)
> +{
> +	return event && event->group->mmap[event->status_index] != 0;
> +}
> +
> +/**
> + * tracefs_user_event_write - Writes data out to an event
> + * @event: User event to write data about
> + * @items: Items to write for the event
> + *
> + * Writes out items for the event. Callers should check if the cost of writing
> + * should be performed by calling tracefs_user_event_test(). Items are checked
> + * to ensure they fit within the described items during register. Each item
> + * must specify the length of the item being written.
> + *
> + * Return the number of bytes written or -1 upon error.
> + *
> + * errno will be set to EINVAL if @event or @items is null or @items contains
> + * an item with a length of less than or equal to 0.
> + * errno will be set to E2BIG if @items contains more items than previously
> + * registered for the event.
> + */
> +int tracefs_user_event_write(struct tracefs_user_event *event,
> +			     struct tracefs_uevent_item *items)
> +{
> +	struct iovec *head, *io, *relio, *io_end;
> +	__u32 *rel, *rel_end;
> +	int len, rel_offset, data_offset, used;
> +
> +	if (!event || !items) {
> +		errno = EINVAL;
> +		return -1;
> +	}
> +
> +	head = io = alloca(sizeof(*io) * (event->iovecs + event->rels));
> +	rel = alloca(sizeof(*rel) * event->rels);

Heh, I forgot about "alloca". I haven't seen that in a long time!

> +
> +	io_end = head + (event->iovecs + event->rels);
> +	rel_end = rel + event->rels;
> +
> +	/* Relative offset starts at end of static data */
> +	relio = io + event->iovecs;
> +	rel_offset = event->len;
> +	data_offset = 0;
> +
> +	/* Write index must be first */
> +	io->iov_base = &event->write_index;
> +	io->iov_len = sizeof(event->write_index);
> +	io++;
> +	used = 1;
> +
> +	while (items->type != TRACEFS_UEVENT_END) {
> +		len = items->len;
> +
> +		if (len <= 0)
> +			goto bad_length;
> +
> +		if (io >= io_end)
> +			goto bad_count;
> +
> +		switch (items->type) {
> +		case TRACEFS_UEVENT_varray:
> +		case TRACEFS_UEVENT_vstring:
> +			/* Dual vectors */
> +			used += 2;
> +
> +			if (rel >= rel_end || relio >= io_end)
> +				goto bad_count;
> +
> +			/* __rel_loc types */
> +			relio->iov_base = (void *)items->data;
> +			relio->iov_len = len;
> +			relio++;
> +
> +			io->iov_base = (void *)rel;
> +			io->iov_len = sizeof(*rel);
> +			io++;
> +			rel_offset -= sizeof(*rel);
> +
> +			/* Fill in rel loc data */
> +			*rel = DYN_LOC(rel_offset + data_offset, len);
> +			data_offset += len;
> +			rel++;
> +
> +			break;
> +
> +		default:
> +			/* Single vector */
> +			used++;
> +
> +			/* Direct types */
> +			io->iov_base = (void *)items->data;
> +			io->iov_len = len;
> +			io++;
> +			rel_offset -= len;
> +
> +			break;
> +		}
> +
> +		items++;
> +	}
> +
> +	return writev(event->group->fd, head, used);

This is fine for now, but I wonder if we want to invest time in the future
to create macros for user space that are similar to the TRACE_EVENT()
macros in the kernel, that would make the above a direct one to one mapping
of the array to the event. That way we wouldn't use a loop, but instead
would just write directly into the iovec that will be passed into the
kernel.

But that can be an optimization for another time.

This all looks good.

Thanks Beau!

-- Steve



> +
> +bad_length:
> +	fprintf(stderr, "Bad user_event item length at index %d\n",
> +		used - 1);
> +	errno = EINVAL;
> +	return -1;
> +
> +bad_count:
> +	fprintf(stderr, "Too many user_event items passed\n");
> +	errno = E2BIG;
> +	return -1;
> +}


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22 16:59             ` Beau Belgrave
@ 2022-02-22 17:10               ` Steven Rostedt
  0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 17:10 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: Yordan Karadzhov, linux-trace-devel

On Tue, 22 Feb 2022 08:59:46 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> > Thoughts?  
> 
> Many times we have found that we have to do work that should only be
> performed if the event is enabled. As long as there is a still clear
> ways to check without the macro, this would be fine with me.
> 
> IE: strlen(message)
> 
> We should not walk strings unless the event is enabled.

Right. The macro would only be a helper macro. I would even add it as a
separate patch at the end and not part of the main patch.

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 3/3] libtracefs: Add unit tests for user_events
  2022-02-18 22:50 ` [PATCH v1 3/3] libtracefs: Add unit tests " Beau Belgrave
@ 2022-02-22 17:20   ` Steven Rostedt
  0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 17:20 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: linux-trace-devel

On Fri, 18 Feb 2022 14:50:58 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> @@ -871,6 +872,235 @@ static void test_eprobes(void)
>  	test_eprobes_instance(test_instance);
>  }
>  
> +#ifdef USEREVENTS
> +struct user_test_context {
> +	int seen;
> +	int failed;
> +};
> +static int user_callback(struct tep_event *event, struct tep_record *record,
> +			  int cpu, void *context)
> +{
> +	struct tep_format_field *field;
> +	struct user_test_context *user_context;
> +	__u32 *rel, size, offset;
> +       

The above line has extra white space.

-- Steve

> +	user_context = (struct user_test_context *)context;
> +	user_context->seen++;
> +
> +	field = tep_find_field(event, "u8");
> +	if (!field || *(__u8 *)(record->data + field->offset) != 1)
> +	{
> +		user_context->failed = 1;
> +		return -1;
> +	}

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-18 22:50 ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Beau Belgrave
  2022-02-21 11:34   ` Yordan Karadzhov
  2022-02-22 17:08   ` Steven Rostedt
@ 2022-02-22 17:31   ` Steven Rostedt
  2022-02-22 17:39     ` Steven Rostedt
  2022-02-22 17:46     ` Beau Belgrave
  2 siblings, 2 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 17:31 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: linux-trace-devel

On Fri, 18 Feb 2022 14:50:56 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> diff --git a/include/tracefs-local.h b/include/tracefs-local.h
> index bf157e1..e768cba 100644
> --- a/include/tracefs-local.h
> +++ b/include/tracefs-local.h
> @@ -119,4 +119,28 @@ int trace_rescan_events(struct tep_handle *tep,
>  struct tep_event *get_tep_event(struct tep_handle *tep,
>  				const char *system, const char *name);
>  
> +/* Internal interface for ftrace user events */
> +
> +struct tracefs_user_event_group;
> +
> +struct tracefs_user_event
> +{
> +	int write_index;
> +	int status_index;
> +	int iovecs;
> +	int rels;
> +	int len;
> +	struct tracefs_user_event_group *group;
> +	struct tracefs_user_event *next;
> +};
> +
> +struct tracefs_user_event_group
> +{
> +	int fd;
> +	int mmap_len;
> +	char *mmap;
> +	pthread_mutex_t lock;
> +	struct tracefs_user_event *events;
> +};
> +
>  #endif /* _TRACE_FS_LOCAL_H */
>



> +/**
> + * tracefs_user_event_test - Tests if an event is currently enabled
> + * @event: User event to test
> + *
> + * Tests if the @event is valid and currently enabled on the system.
> + *
> + * Return true if enabled, false otherwise.
> + */
> +bool tracefs_user_event_test(struct tracefs_user_event *event)
> +{
> +	return event && event->group->mmap[event->status_index] != 0;
> +}
> +

I was thinking we could even make the above faster by making it a static
inline in the tracefs.h header file.

In tracefs.h:

struct tracefs_user_event {
	char			*enable;
};

static inline bool tracefs_user_event_test(struct tracefs_user_event *event)
{
	return event && event->enable[0] != 0;
}

Then in tracefs-local.h:

struct tracefs_user_event_internal {
	struct tracefs_user_event	event_external;
	[...]
};

Then have in tracefs_user_event_register():

	event->write_index = reg.write_index;
	event->status_index = reg.status_index;
	event->group = group;

	event->event_external.enable = &event->group->mmap[event->status_index];

	[..]

	return &event->event_external;

All the other functions wouldn't even have to do a container_of() call, as
the event_external will be the first field in the struct it needs.

	struct tracefs_user_event_internal *event = (struct tracefs_user_event_internal *)event_external;

Or make the above a helper function:

#define INTERNAL_EVENT(e) ((struct tracefs_user_event_internal *)e)

	struct tracefs_user_event_internal *event = INTERNAL_EVENT(event_external);


Then we save on the function call, and allow the code to do the test inline.

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs
  2022-02-18 22:50 [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Beau Belgrave
                   ` (2 preceding siblings ...)
  2022-02-18 22:50 ` [PATCH v1 3/3] libtracefs: Add unit tests " Beau Belgrave
@ 2022-02-22 17:34 ` Steven Rostedt
  2022-02-22 17:50   ` Beau Belgrave
  3 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 17:34 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: linux-trace-devel

On Fri, 18 Feb 2022 14:50:55 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> New APIs added:

Beau,

I'm not sure what you work load is, but if you can get out a v2 today (or
tomorrow), that would be great. I want to release libtracefs 1.3 in the
next couple of days, and I'm only holding it off to include these APIs.

At the moment, libtracefs looks good to go!

Thanks,

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22 17:31   ` Steven Rostedt
@ 2022-02-22 17:39     ` Steven Rostedt
  2022-02-22 17:46     ` Beau Belgrave
  1 sibling, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 17:39 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: linux-trace-devel

On Tue, 22 Feb 2022 12:31:43 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> static inline bool tracefs_user_event_test(struct tracefs_user_event *event)
> {
> 	return event && event->enable[0] != 0;
> }

I wonder if we need to add:

	return event && ((volatile char *)event->enable)[0] != 0;

to prevent the compiler from optimizing it.

-- Steve


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22 17:31   ` Steven Rostedt
  2022-02-22 17:39     ` Steven Rostedt
@ 2022-02-22 17:46     ` Beau Belgrave
  2022-02-22 18:59       ` Steven Rostedt
  1 sibling, 1 reply; 22+ messages in thread
From: Beau Belgrave @ 2022-02-22 17:46 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-trace-devel

On Tue, Feb 22, 2022 at 12:31:43PM -0500, Steven Rostedt wrote:
> On Fri, 18 Feb 2022 14:50:56 -0800
> Beau Belgrave <beaub@linux.microsoft.com> wrote:
> 
> > diff --git a/include/tracefs-local.h b/include/tracefs-local.h
> > index bf157e1..e768cba 100644
> > --- a/include/tracefs-local.h
> > +++ b/include/tracefs-local.h
> > @@ -119,4 +119,28 @@ int trace_rescan_events(struct tep_handle *tep,
> >  struct tep_event *get_tep_event(struct tep_handle *tep,
> >  				const char *system, const char *name);
> >  
> > +/* Internal interface for ftrace user events */
> > +
> > +struct tracefs_user_event_group;
> > +
> > +struct tracefs_user_event
> > +{
> > +	int write_index;
> > +	int status_index;
> > +	int iovecs;
> > +	int rels;
> > +	int len;
> > +	struct tracefs_user_event_group *group;
> > +	struct tracefs_user_event *next;
> > +};
> > +
> > +struct tracefs_user_event_group
> > +{
> > +	int fd;
> > +	int mmap_len;
> > +	char *mmap;
> > +	pthread_mutex_t lock;
> > +	struct tracefs_user_event *events;
> > +};
> > +
> >  #endif /* _TRACE_FS_LOCAL_H */
> >
> 
> 
> 
> > +/**
> > + * tracefs_user_event_test - Tests if an event is currently enabled
> > + * @event: User event to test
> > + *
> > + * Tests if the @event is valid and currently enabled on the system.
> > + *
> > + * Return true if enabled, false otherwise.
> > + */
> > +bool tracefs_user_event_test(struct tracefs_user_event *event)
> > +{
> > +	return event && event->group->mmap[event->status_index] != 0;
> > +}
> > +
> 
> I was thinking we could even make the above faster by making it a static
> inline in the tracefs.h header file.
> 
> In tracefs.h:
> 
> struct tracefs_user_event {
> 	char			*enable;
> };
> 
> static inline bool tracefs_user_event_test(struct tracefs_user_event *event)
> {
> 	return event && event->enable[0] != 0;
> }
> 
> Then in tracefs-local.h:
> 
> struct tracefs_user_event_internal {
> 	struct tracefs_user_event	event_external;
> 	[...]
> };
> 
> Then have in tracefs_user_event_register():
> 
> 	event->write_index = reg.write_index;
> 	event->status_index = reg.status_index;
> 	event->group = group;
> 
> 	event->event_external.enable = &event->group->mmap[event->status_index];
> 
> 	[..]
> 
> 	return &event->event_external;
> 
> All the other functions wouldn't even have to do a container_of() call, as
> the event_external will be the first field in the struct it needs.
> 
> 	struct tracefs_user_event_internal *event = (struct tracefs_user_event_internal *)event_external;
> 
> Or make the above a helper function:
> 
> #define INTERNAL_EVENT(e) ((struct tracefs_user_event_internal *)e)
> 
> 	struct tracefs_user_event_internal *event = INTERNAL_EVENT(event_external);
> 
> 
> Then we save on the function call, and allow the code to do the test inline.

Yes, I was thinking along the same lines. I saw how things were split
between private / public in the headers and wasn't sure how to
accomplish this.

What you have above seems like a great way to accomplish this without exposing
too much out.

Thanks,
-Beau

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs
  2022-02-22 17:34 ` [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Steven Rostedt
@ 2022-02-22 17:50   ` Beau Belgrave
  2022-02-22 18:20     ` Steven Rostedt
  0 siblings, 1 reply; 22+ messages in thread
From: Beau Belgrave @ 2022-02-22 17:50 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-trace-devel

On Tue, Feb 22, 2022 at 12:34:47PM -0500, Steven Rostedt wrote:
> On Fri, 18 Feb 2022 14:50:55 -0800
> Beau Belgrave <beaub@linux.microsoft.com> wrote:
> 
> > New APIs added:
> 
> Beau,
> 
> I'm not sure what you work load is, but if you can get out a v2 today (or
> tomorrow), that would be great. I want to release libtracefs 1.3 in the
> next couple of days, and I'm only holding it off to include these APIs.
> 

Yeah, I can get a v2 today I think. We keep expanding what to change in
the thread (which is great) so I'll do my best to capture it all out in
the v2. I plan to start in a few hours, with what we've talked about so
far.

Thanks,
-Beau

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs
  2022-02-22 17:50   ` Beau Belgrave
@ 2022-02-22 18:20     ` Steven Rostedt
  0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 18:20 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: linux-trace-devel

On Tue, 22 Feb 2022 09:50:45 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> Yeah, I can get a v2 today I think. We keep expanding what to change in
> the thread (which is great) so I'll do my best to capture it all out in
> the v2. I plan to start in a few hours, with what we've talked about so
> far.

My only concern is that we will get an API that we want to change. But
currently I'm pretty confident that this is the API that we'll keep. We can
always add more, but once a release is out, we are stuck with that API for
good.

Thanks a lot for doing this!

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources
  2022-02-22 17:46     ` Beau Belgrave
@ 2022-02-22 18:59       ` Steven Rostedt
  0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2022-02-22 18:59 UTC (permalink / raw)
  To: Beau Belgrave; +Cc: linux-trace-devel

On Tue, 22 Feb 2022 09:46:34 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> Yes, I was thinking along the same lines. I saw how things were split
> between private / public in the headers and wasn't sure how to
> accomplish this.
> 
> What you have above seems like a great way to accomplish this without exposing
> too much out.

Just in case we ever want to extend the user view, I wonder if we should
also expose the size as the next argument (or the first).

struct tracefs_user_event {
	unsigned int		size;
	char			*enable;
};

and set size to sizeof(struct tracefs_user_event).

Then if we add another field, we can differentiate it from new additions,
without breaking forward or backward API.

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2022-02-22 18:59 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-18 22:50 [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Beau Belgrave
2022-02-18 22:50 ` [PATCH v1 1/3] libtracefs: Add user_events to libtracefs sources Beau Belgrave
2022-02-21 11:34   ` Yordan Karadzhov
2022-02-21 17:57     ` Beau Belgrave
2022-02-21 19:16       ` Steven Rostedt
2022-02-22  6:27         ` Yordan Karadzhov
2022-02-22 14:00           ` Steven Rostedt
2022-02-22 14:25             ` Yordan Karadzhov
2022-02-22 15:41               ` Steven Rostedt
2022-02-22 16:59             ` Beau Belgrave
2022-02-22 17:10               ` Steven Rostedt
2022-02-22 17:08   ` Steven Rostedt
2022-02-22 17:31   ` Steven Rostedt
2022-02-22 17:39     ` Steven Rostedt
2022-02-22 17:46     ` Beau Belgrave
2022-02-22 18:59       ` Steven Rostedt
2022-02-18 22:50 ` [PATCH v1 2/3] libtracefs: Add documentation and sample code for user_events Beau Belgrave
2022-02-18 22:50 ` [PATCH v1 3/3] libtracefs: Add unit tests " Beau Belgrave
2022-02-22 17:20   ` Steven Rostedt
2022-02-22 17:34 ` [PATCH v1 0/3] libtracefs: Add APIs for user_events to libtracefs Steven Rostedt
2022-02-22 17:50   ` Beau Belgrave
2022-02-22 18:20     ` Steven Rostedt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.