All of lore.kernel.org
 help / color / mirror / Atom feed
From: Beau Belgrave <beaub@linux.microsoft.com>
To: rostedt@goodmis.org, mhiramat@kernel.org, mathieu.desnoyers@efficios.com
Cc: linux-trace-devel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org, beaub@linux.microsoft.com
Subject: [PATCH 6/7] tracing/user_events: Use bits vs bytes for enabled status page data
Date: Fri,  1 Apr 2022 16:43:08 -0700	[thread overview]
Message-ID: <20220401234309.21252-7-beaub@linux.microsoft.com> (raw)
In-Reply-To: <20220401234309.21252-1-beaub@linux.microsoft.com>

User processes may require many events and when they do the cache
performance of a byte index status check is less ideal than a bit index.
The previous event limit per-page was 4096, the new limit is 32,768.

This change adds a mask property to the user_reg struct. Programs check
that the byte at status_index has a bit set by ANDing the status_mask.

Link: https://lore.kernel.org/all/2059213643.196683.1648499088753.JavaMail.zimbra@efficios.com/

Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 include/linux/user_events.h                   | 19 +++---
 kernel/trace/trace_events_user.c              | 58 ++++++++++++++++---
 samples/user_events/example.c                 | 12 ++--
 .../selftests/user_events/ftrace_test.c       | 16 ++---
 .../testing/selftests/user_events/perf_test.c |  6 +-
 5 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 736e05603463..c5051fee26c6 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -20,15 +20,6 @@
 #define USER_EVENTS_SYSTEM "user_events"
 #define USER_EVENTS_PREFIX "u:"
 
-/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
-#define EVENT_BIT_FTRACE 0
-#define EVENT_BIT_PERF 1
-#define EVENT_BIT_OTHER 7
-
-#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
-#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
-#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
-
 /* Create dynamic location entry within a 32-bit value */
 #define DYN_LOC(offset, size) ((size) << 16 | (offset))
 
@@ -48,9 +39,17 @@ struct user_reg {
 	/* Output: Byte index of the event within the status page */
 	__u32 status_index;
 
+	/* Output: Mask for the event within the status page byte */
+	__u32 status_mask;
+
 	/* Output: Index of the event to use when writing data */
 	__u32 write_index;
-};
+} __attribute__((__packed__));
+
+static inline int user_event_enabled(void *status_data, int index, int mask)
+{
+	return status_data && (((const char *)status_data)[index] & mask);
+}
 
 #define DIAG_IOC_MAGIC '*'
 
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 2bcae7abfa81..d960b5ea76c4 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -40,17 +40,26 @@
  */
 #define MAX_PAGE_ORDER 0
 #define MAX_PAGES (1 << MAX_PAGE_ORDER)
-#define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
+#define MAX_BYTES (MAX_PAGES * PAGE_SIZE)
+#define MAX_EVENTS (MAX_BYTES * 8)
 
 /* Limit how long of an event name plus args within the subsystem. */
 #define MAX_EVENT_DESC 512
 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
 #define MAX_FIELD_ARRAY_SIZE 1024
 
+#define STATUS_BYTE(bit) ((bit) >> 3)
+#define STATUS_MASK(bit) (1 << ((bit) & 7))
+
+/* Internal bits to keep track of connected probes */
+#define EVENT_STATUS_FTRACE (1 << 0)
+#define EVENT_STATUS_PERF (1 << 1)
+#define EVENT_STATUS_OTHER (1 << 7)
+
 static char *register_page_data;
 
 static DEFINE_MUTEX(reg_mutex);
-static DEFINE_HASHTABLE(register_table, 4);
+static DEFINE_HASHTABLE(register_table, 8);
 static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
 
 /*
@@ -72,6 +81,7 @@ struct user_event {
 	int index;
 	int flags;
 	int min_size;
+	char status;
 };
 
 /*
@@ -106,6 +116,22 @@ static u32 user_event_key(char *name)
 	return jhash(name, strlen(name), 0);
 }
 
+static __always_inline
+void user_event_register_set(struct user_event *user)
+{
+	int i = user->index;
+
+	register_page_data[STATUS_BYTE(i)] |= STATUS_MASK(i);
+}
+
+static __always_inline
+void user_event_register_clear(struct user_event *user)
+{
+	int i = user->index;
+
+	register_page_data[STATUS_BYTE(i)] &= ~STATUS_MASK(i);
+}
+
 static __always_inline __must_check
 bool user_event_last_ref(struct user_event *user)
 {
@@ -648,7 +674,7 @@ static int destroy_user_event(struct user_event *user)
 
 	dyn_event_remove(&user->devent);
 
-	register_page_data[user->index] = 0;
+	user_event_register_clear(user);
 	clear_bit(user->index, page_bitmap);
 	hash_del(&user->node);
 
@@ -827,7 +853,12 @@ static void update_reg_page_for(struct user_event *user)
 		rcu_read_unlock_sched();
 	}
 
-	register_page_data[user->index] = status;
+	if (status)
+		user_event_register_set(user);
+	else
+		user_event_register_clear(user);
+
+	user->status = status;
 }
 
 /*
@@ -1332,7 +1363,17 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
 	if (size > PAGE_SIZE)
 		return -E2BIG;
 
-	return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
+	if (size < offsetofend(struct user_reg, write_index))
+		return -EINVAL;
+
+	ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
+
+	if (ret)
+		return ret;
+
+	kreg->size = size;
+
+	return 0;
 }
 
 /*
@@ -1376,7 +1417,8 @@ static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
 		return ret;
 
 	put_user((u32)ret, &ureg->write_index);
-	put_user(user->index, &ureg->status_index);
+	put_user(STATUS_BYTE(user->index), &ureg->status_index);
+	put_user(STATUS_MASK(user->index), &ureg->status_mask);
 
 	return 0;
 }
@@ -1485,7 +1527,7 @@ static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	unsigned long size = vma->vm_end - vma->vm_start;
 
-	if (size != MAX_EVENTS)
+	if (size != MAX_BYTES)
 		return -EINVAL;
 
 	return remap_pfn_range(vma, vma->vm_start,
@@ -1520,7 +1562,7 @@ static int user_seq_show(struct seq_file *m, void *p)
 	mutex_lock(&reg_mutex);
 
 	hash_for_each(register_table, i, user, node) {
-		status = register_page_data[user->index];
+		status = user->status;
 		flags = user->flags;
 
 		seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
diff --git a/samples/user_events/example.c b/samples/user_events/example.c
index 4f5778e441c0..e72260bf6e49 100644
--- a/samples/user_events/example.c
+++ b/samples/user_events/example.c
@@ -33,7 +33,8 @@ static int event_status(char **status)
 	return 0;
 }
 
-static int event_reg(int fd, const char *command, int *status, int *write)
+static int event_reg(int fd, const char *command, int *index, int *mask,
+		     int *write)
 {
 	struct user_reg reg = {0};
 
@@ -43,7 +44,8 @@ static int event_reg(int fd, const char *command, int *status, int *write)
 	if (ioctl(fd, DIAG_IOCSREG, &reg) == -1)
 		return -1;
 
-	*status = reg.status_index;
+	*index = reg.status_index;
+	*mask = reg.status_mask;
 	*write = reg.write_index;
 
 	return 0;
@@ -51,7 +53,7 @@ static int event_reg(int fd, const char *command, int *status, int *write)
 
 int main(int argc, char **argv)
 {
-	int data_fd, status, write;
+	int data_fd, index, mask, write;
 	char *status_page;
 	struct iovec io[2];
 	__u32 count = 0;
@@ -61,7 +63,7 @@ int main(int argc, char **argv)
 
 	data_fd = open(data_file, O_RDWR);
 
-	if (event_reg(data_fd, "test u32 count", &status, &write) == -1)
+	if (event_reg(data_fd, "test u32 count", &index, &mask, &write) == -1)
 		return errno;
 
 	/* Setup iovec */
@@ -75,7 +77,7 @@ int main(int argc, char **argv)
 	getchar();
 
 	/* Check if anyone is listening */
-	if (status_page[status]) {
+	if (user_event_enabled(status_page, index, mask)) {
 		/* Yep, trace out our data */
 		writev(data_fd, (const struct iovec *)io, 2);
 
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index a80fb5ef61d5..ba7a2757dcbd 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -197,12 +197,12 @@ TEST_F(user, register_events) {
 	/* Register should work */
 	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
 	ASSERT_EQ(0, reg.write_index);
-	ASSERT_NE(0, reg.status_index);
+	ASSERT_EQ(0, reg.status_index == 0 && reg.status_mask == 1);
 
 	/* Multiple registers should result in same index */
 	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
 	ASSERT_EQ(0, reg.write_index);
-	ASSERT_NE(0, reg.status_index);
+	ASSERT_EQ(0, reg.status_index == 0 && reg.status_mask == 1);
 
 	/* Ensure disabled */
 	self->enable_fd = open(enable_file, O_RDWR);
@@ -212,15 +212,15 @@ TEST_F(user, register_events) {
 	/* MMAP should work and be zero'd */
 	ASSERT_NE(MAP_FAILED, status_page);
 	ASSERT_NE(NULL, status_page);
-	ASSERT_EQ(0, status_page[reg.status_index]);
+	ASSERT_EQ(0, status_page[reg.status_index] & reg.status_mask);
 
 	/* Enable event and ensure bits updated in status */
 	ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
-	ASSERT_EQ(EVENT_STATUS_FTRACE, status_page[reg.status_index]);
+	ASSERT_NE(0, status_page[reg.status_index] & reg.status_mask);
 
 	/* Disable event and ensure bits updated in status */
 	ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0")))
-	ASSERT_EQ(0, status_page[reg.status_index]);
+	ASSERT_EQ(0, status_page[reg.status_index] & reg.status_mask);
 
 	/* File still open should return -EBUSY for delete */
 	ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
@@ -257,7 +257,7 @@ TEST_F(user, write_events) {
 	/* Register should work */
 	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
 	ASSERT_EQ(0, reg.write_index);
-	ASSERT_NE(0, reg.status_index);
+	ASSERT_EQ(0, reg.status_index == 0 && reg.status_mask == 1);
 
 	/* Write should fail on invalid slot with ENOENT */
 	io[0].iov_base = &field2;
@@ -298,7 +298,7 @@ TEST_F(user, write_fault) {
 	/* Register should work */
 	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
 	ASSERT_EQ(0, reg.write_index);
-	ASSERT_NE(0, reg.status_index);
+	ASSERT_EQ(0, reg.status_index == 0 && reg.status_mask == 1);
 
 	/* Write should work normally */
 	ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
@@ -322,7 +322,7 @@ TEST_F(user, write_validator) {
 	/* Register should work */
 	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
 	ASSERT_EQ(0, reg.write_index);
-	ASSERT_NE(0, reg.status_index);
+	ASSERT_EQ(0, reg.status_index == 0 && reg.status_mask == 1);
 
 	io[0].iov_base = &reg.write_index;
 	io[0].iov_len = sizeof(reg.write_index);
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
index 26851d51d6bb..81ceaf71e364 100644
--- a/tools/testing/selftests/user_events/perf_test.c
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -120,8 +120,8 @@ TEST_F(user, perf_write) {
 	/* Register should work */
 	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
 	ASSERT_EQ(0, reg.write_index);
-	ASSERT_NE(0, reg.status_index);
-	ASSERT_EQ(0, status_page[reg.status_index]);
+	ASSERT_EQ(0, reg.status_index == 0 && reg.status_mask == 1);
+	ASSERT_EQ(0, status_page[reg.status_index] & reg.status_mask);
 
 	/* Id should be there */
 	id = get_id();
@@ -144,7 +144,7 @@ TEST_F(user, perf_write) {
 	ASSERT_NE(MAP_FAILED, perf_page);
 
 	/* Status should be updated */
-	ASSERT_EQ(EVENT_STATUS_PERF, status_page[reg.status_index]);
+	ASSERT_NE(0, status_page[reg.status_index] & reg.status_mask);
 
 	event.index = reg.write_index;
 	event.field1 = 0xc001;
-- 
2.25.1


  parent reply	other threads:[~2022-04-01 23:43 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-01 23:43 [PATCH 0/7] tracing/user_events: Update user_events ABI from Beau Belgrave
2022-04-01 23:43 ` [PATCH 1/7] tracing/user_events: Fix repeated word in comments Beau Belgrave
2022-04-01 23:43 ` [PATCH 2/7] tracing/user_events: Use NULL for strstr checks Beau Belgrave
2022-04-01 23:43 ` [PATCH 3/7] tracing/user_events: Use WRITE instead of READ for io vector import Beau Belgrave
2022-04-01 23:43 ` [PATCH 4/7] tracing/user_events: Ensure user provided strings are safely formatted Beau Belgrave
2022-04-01 23:43 ` [PATCH 5/7] tracing/user_events: Use refcount instead of atomic for ref tracking Beau Belgrave
2022-04-01 23:43 ` Beau Belgrave [this message]
2022-04-19 14:35   ` [PATCH 6/7] tracing/user_events: Use bits vs bytes for enabled status page data Mathieu Desnoyers
2022-04-19 18:57     ` Beau Belgrave
2022-04-19 21:26       ` Mathieu Desnoyers
2022-04-19 23:48         ` Beau Belgrave
2022-04-20 17:53           ` Mathieu Desnoyers
2022-04-20 20:12             ` Beau Belgrave
2022-04-20 20:21               ` Mathieu Desnoyers
2022-04-01 23:43 ` [PATCH 7/7] tracing/user_events: Update ABI documentation to align to bits vs bytes Beau Belgrave

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220401234309.21252-7-beaub@linux.microsoft.com \
    --to=beaub@linux.microsoft.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.