linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sohil Mehta <sohil.mehta@intel.com>
To: x86@kernel.org
Cc: Sohil Mehta <sohil.mehta@intel.com>,
	Tony Luck <tony.luck@intel.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"H . Peter Anvin" <hpa@zytor.com>,
	Andy Lutomirski <luto@kernel.org>, Jens Axboe <axboe@kernel.dk>,
	Christian Brauner <christian@brauner.io>,
	Peter Zijlstra <peterz@infradead.org>,
	Shuah Khan <shuah@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
	Jonathan Corbet <corbet@lwn.net>, Ashok Raj <ashok.raj@intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	Gayatri Kammela <gayatri.kammela@intel.com>,
	Zeng Guang <guang.zeng@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Randy E Witt <randy.e.witt@intel.com>,
	Ravi V Shankar <ravi.v.shankar@intel.com>,
	Ramesh Thomas <ramesh.thomas@intel.com>,
	linux-api@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org
Subject: [RFC PATCH 09/13] x86/uintr: Introduce vector registration and uintr_fd syscall
Date: Mon, 13 Sep 2021 13:01:28 -0700	[thread overview]
Message-ID: <20210913200132.3396598-10-sohil.mehta@intel.com> (raw)
In-Reply-To: <20210913200132.3396598-1-sohil.mehta@intel.com>

Each receiving task has its own interrupt vector space of 64 vectors.
For each vector registered by a task create a uintr_fd. Only tasks that
have previously registered a user interrupt handler can register a
vector.

The sender for the user interrupt could be another userspace
application, kernel or an external source (like a device). Any sender
that wants to generate a user interrupt needs access to receiver's
vector number and UPID.  uintr_fd abstracts that information and allows
a sender with access to uintr_fd to connect and generate a user
interrupt. Upon interrupt delivery, the interrupt handler would be
invoked with the associated vector number pushed onto the stack.

Using an FD abstraction automatically provides a secure mechanism to
connect with a receiver. It also makes the tracking and management of
the interrupt vector resource easier for userspace.

uintr_fd can be useful in some of the usages where eventfd is used for
userspace event notifications. Though uintr_fd is nowhere close to a
drop-in replacement, the semantics are meant to be somewhat similar to
an eventfd or the write end of a pipe.

Access to uintr_fd can be achieved in the following ways:
- direct access if the task is part of the same thread group (process)
- inherited by a child process.
- explicitly shared using any of the FD sharing mechanisms.

If the sender is another userspace task, it can use the uintr_fd to send
user IPIs to the receiver. This works in conjunction with the SENDUIPI
instruction. The details related to this are covered later.

The exact APIs for the sender being a kernel or another external source
are still being worked upon. The general idea is that the receiver would
pass the uintr_fd to the kernel by extending some existing API (like
io_uring).

The vector associated with uintr_fd can be unregistered by closing all
references to the uintr_fd.

Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
---
 arch/x86/include/asm/uintr.h |  14 ++++
 arch/x86/kernel/uintr_core.c | 129 +++++++++++++++++++++++++++++++++--
 arch/x86/kernel/uintr_fd.c   |  94 +++++++++++++++++++++++++
 3 files changed, 232 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/uintr.h b/arch/x86/include/asm/uintr.h
index cef4dd81d40e..1f00e2a63da4 100644
--- a/arch/x86/include/asm/uintr.h
+++ b/arch/x86/include/asm/uintr.h
@@ -4,9 +4,23 @@
 
 #ifdef CONFIG_X86_USER_INTERRUPTS
 
+struct uintr_upid_ctx {
+	struct task_struct *task;	/* Receiver task */
+	struct uintr_upid *upid;
+	refcount_t refs;
+};
+
+struct uintr_receiver_info {
+	struct uintr_upid_ctx *upid_ctx;	/* UPID context */
+	struct callback_head twork;		/* Task work head */
+	u64 uvec;				/* Vector number */
+};
+
 bool uintr_arch_enabled(void);
 int do_uintr_register_handler(u64 handler);
 int do_uintr_unregister_handler(void);
+int do_uintr_register_vector(struct uintr_receiver_info *r_info);
+void do_uintr_unregister_vector(struct uintr_receiver_info *r_info);
 
 void uintr_free(struct task_struct *task);
 
diff --git a/arch/x86/kernel/uintr_core.c b/arch/x86/kernel/uintr_core.c
index a2a13f890139..9dcb9f60e5bc 100644
--- a/arch/x86/kernel/uintr_core.c
+++ b/arch/x86/kernel/uintr_core.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/slab.h>
+#include <linux/task_work.h>
 #include <linux/uaccess.h>
 
 #include <asm/apic.h>
@@ -20,6 +21,8 @@
 #include <asm/msr-index.h>
 #include <asm/uintr.h>
 
+#define UINTR_MAX_UVEC_NR 64
+
 /* User Posted Interrupt Descriptor (UPID) */
 struct uintr_upid {
 	struct {
@@ -36,13 +39,9 @@ struct uintr_upid {
 #define UPID_ON		0x0	/* Outstanding notification */
 #define UPID_SN		0x1	/* Suppressed notification */
 
-struct uintr_upid_ctx {
-	struct uintr_upid *upid;
-	refcount_t refs;
-};
-
 struct uintr_receiver {
 	struct uintr_upid_ctx *upid_ctx;
+	u64 uvec_mask;	/* track active vector per bit */
 };
 
 inline bool uintr_arch_enabled(void)
@@ -69,6 +68,7 @@ static inline u32 cpu_to_ndst(int cpu)
 
 static void free_upid(struct uintr_upid_ctx *upid_ctx)
 {
+	put_task_struct(upid_ctx->task);
 	kfree(upid_ctx->upid);
 	upid_ctx->upid = NULL;
 	kfree(upid_ctx);
@@ -93,6 +93,7 @@ static struct uintr_upid_ctx *alloc_upid(void)
 
 	upid_ctx->upid = upid;
 	refcount_set(&upid_ctx->refs, 1);
+	upid_ctx->task = get_task_struct(current);
 
 	return upid_ctx;
 }
@@ -103,6 +104,77 @@ static void put_upid_ref(struct uintr_upid_ctx *upid_ctx)
 		free_upid(upid_ctx);
 }
 
+static struct uintr_upid_ctx *get_upid_ref(struct uintr_upid_ctx *upid_ctx)
+{
+	refcount_inc(&upid_ctx->refs);
+	return upid_ctx;
+}
+
+static void __clear_vector_from_upid(u64 uvec, struct uintr_upid *upid)
+{
+	clear_bit(uvec, (unsigned long *)&upid->puir);
+}
+
+static void __clear_vector_from_task(u64 uvec)
+{
+	struct task_struct *t = current;
+
+	pr_debug("recv: task=%d free vector %llu\n", t->pid, uvec);
+
+	if (!(BIT_ULL(uvec) & t->thread.ui_recv->uvec_mask))
+		return;
+
+	clear_bit(uvec, (unsigned long *)&t->thread.ui_recv->uvec_mask);
+
+	if (!t->thread.ui_recv->uvec_mask)
+		pr_debug("recv: task=%d unregistered all user vectors\n", t->pid);
+}
+
+/* Callback to clear the vector structures when a vector is unregistered. */
+static void receiver_clear_uvec(struct callback_head *head)
+{
+	struct uintr_receiver_info *r_info;
+	struct uintr_upid_ctx *upid_ctx;
+	struct task_struct *t = current;
+	u64 uvec;
+
+	r_info = container_of(head, struct uintr_receiver_info, twork);
+	uvec = r_info->uvec;
+	upid_ctx = r_info->upid_ctx;
+
+	/*
+	 * If a task has unregistered the interrupt handler the vector
+	 * structures would have already been cleared.
+	 */
+	if (is_uintr_receiver(t)) {
+		/*
+		 * The UPID context in the callback might differ from the one
+		 * on the task if the task unregistered its interrupt handler
+		 * and then registered itself again. The vector structures
+		 * related to the previous UPID would have already been cleared
+		 * in that case.
+		 */
+		if (t->thread.ui_recv->upid_ctx != upid_ctx) {
+			pr_debug("recv: task %d is now using a different UPID\n",
+				 t->pid);
+			goto out_free;
+		}
+
+		/*
+		 * If the vector has been recognized in the UIRR don't modify
+		 * it. We need to disable User Interrupts before modifying the
+		 * UIRR. It might be better to just let that interrupt get
+		 * delivered.
+		 */
+		__clear_vector_from_upid(uvec, upid_ctx->upid);
+		__clear_vector_from_task(uvec);
+	}
+
+out_free:
+	put_upid_ref(upid_ctx);
+	kfree(r_info);
+}
+
 int do_uintr_unregister_handler(void)
 {
 	struct task_struct *t = current;
@@ -239,6 +311,53 @@ int do_uintr_register_handler(u64 handler)
 	return 0;
 }
 
+void do_uintr_unregister_vector(struct uintr_receiver_info *r_info)
+{
+	int ret;
+
+	pr_debug("recv: Adding task work to clear vector %llu added for task=%d\n",
+		 r_info->uvec, r_info->upid_ctx->task->pid);
+
+	init_task_work(&r_info->twork, receiver_clear_uvec);
+	ret = task_work_add(r_info->upid_ctx->task, &r_info->twork, true);
+	if (ret) {
+		pr_debug("recv: Clear vector task=%d has already exited\n",
+			 r_info->upid_ctx->task->pid);
+		put_upid_ref(r_info->upid_ctx);
+		kfree(r_info);
+		return;
+	}
+}
+
+int do_uintr_register_vector(struct uintr_receiver_info *r_info)
+{
+	struct uintr_receiver *ui_recv;
+	struct task_struct *t = current;
+
+	/*
+	 * A vector should only be registered by a task that
+	 * has an interrupt handler registered.
+	 */
+	if (!is_uintr_receiver(t))
+		return -EINVAL;
+
+	if (r_info->uvec >= UINTR_MAX_UVEC_NR)
+		return -ENOSPC;
+
+	ui_recv = t->thread.ui_recv;
+
+	if (ui_recv->uvec_mask & BIT_ULL(r_info->uvec))
+		return -EBUSY;
+
+	ui_recv->uvec_mask |= BIT_ULL(r_info->uvec);
+	pr_debug("recv: task %d new uvec=%llu, new mask %llx\n",
+		 t->pid, r_info->uvec, ui_recv->uvec_mask);
+
+	r_info->upid_ctx = get_upid_ref(ui_recv->upid_ctx);
+
+	return 0;
+}
+
 /* Suppress notifications since this task is being context switched out */
 void switch_uintr_prepare(struct task_struct *prev)
 {
diff --git a/arch/x86/kernel/uintr_fd.c b/arch/x86/kernel/uintr_fd.c
index a1a9c105fdab..f0548bbac776 100644
--- a/arch/x86/kernel/uintr_fd.c
+++ b/arch/x86/kernel/uintr_fd.c
@@ -6,11 +6,105 @@
  */
 #define pr_fmt(fmt)	"uintr: " fmt
 
+#include <linux/anon_inodes.h>
+#include <linux/fdtable.h>
 #include <linux/sched.h>
 #include <linux/syscalls.h>
 
 #include <asm/uintr.h>
 
+struct uintrfd_ctx {
+	struct uintr_receiver_info *r_info;
+};
+
+#ifdef CONFIG_PROC_FS
+static void uintrfd_show_fdinfo(struct seq_file *m, struct file *file)
+{
+	struct uintrfd_ctx *uintrfd_ctx = file->private_data;
+
+	/* Check: Should we print the receiver and sender info here? */
+	seq_printf(m, "user_vector:%llu\n", uintrfd_ctx->r_info->uvec);
+}
+#endif
+
+static int uintrfd_release(struct inode *inode, struct file *file)
+{
+	struct uintrfd_ctx *uintrfd_ctx = file->private_data;
+
+	pr_debug("recv: Release uintrfd for r_task %d uvec %llu\n",
+		 uintrfd_ctx->r_info->upid_ctx->task->pid,
+		 uintrfd_ctx->r_info->uvec);
+
+	do_uintr_unregister_vector(uintrfd_ctx->r_info);
+	kfree(uintrfd_ctx);
+
+	return 0;
+}
+
+static const struct file_operations uintrfd_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= uintrfd_show_fdinfo,
+#endif
+	.release	= uintrfd_release,
+	.llseek		= noop_llseek,
+};
+
+/*
+ * sys_uintr_create_fd - Create a uintr_fd for the registered interrupt vector.
+ */
+SYSCALL_DEFINE2(uintr_create_fd, u64, vector, unsigned int, flags)
+{
+	struct uintrfd_ctx *uintrfd_ctx;
+	int uintrfd;
+	int ret;
+
+	if (!uintr_arch_enabled())
+		return -EOPNOTSUPP;
+
+	if (flags)
+		return -EINVAL;
+
+	uintrfd_ctx = kzalloc(sizeof(*uintrfd_ctx), GFP_KERNEL);
+	if (!uintrfd_ctx)
+		return -ENOMEM;
+
+	uintrfd_ctx->r_info = kzalloc(sizeof(*uintrfd_ctx->r_info), GFP_KERNEL);
+	if (!uintrfd_ctx->r_info) {
+		ret = -ENOMEM;
+		goto out_free_ctx;
+	}
+
+	uintrfd_ctx->r_info->uvec = vector;
+	ret = do_uintr_register_vector(uintrfd_ctx->r_info);
+	if (ret) {
+		kfree(uintrfd_ctx->r_info);
+		goto out_free_ctx;
+	}
+
+	/* TODO: Get user input for flags - UFD_CLOEXEC */
+	/* Check: Do we need O_NONBLOCK? */
+	uintrfd = anon_inode_getfd("[uintrfd]", &uintrfd_fops, uintrfd_ctx,
+				   O_RDONLY | O_CLOEXEC | O_NONBLOCK);
+
+	if (uintrfd < 0) {
+		ret = uintrfd;
+		goto out_free_uvec;
+	}
+
+	pr_debug("recv: Alloc vector success uintrfd %d uvec %llu for task=%d\n",
+		 uintrfd, uintrfd_ctx->r_info->uvec, current->pid);
+
+	return uintrfd;
+
+out_free_uvec:
+	do_uintr_unregister_vector(uintrfd_ctx->r_info);
+out_free_ctx:
+	kfree(uintrfd_ctx);
+	pr_debug("recv: Alloc vector failed for task=%d ret %d\n",
+		 current->pid, ret);
+	return ret;
+}
+
 /*
  * sys_uintr_register_handler - setup user interrupt handler for receiver.
  */
-- 
2.33.0


  parent reply	other threads:[~2021-09-13 20:05 UTC|newest]

Thread overview: 87+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-13 20:01 [RFC PATCH 00/13] x86 User Interrupts support Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 01/13] x86/uintr/man-page: Include man pages draft for reference Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 02/13] Documentation/x86: Add documentation for User Interrupts Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 03/13] x86/cpu: Enumerate User Interrupts support Sohil Mehta
2021-09-23 22:24   ` Thomas Gleixner
2021-09-24 19:59     ` Sohil Mehta
2021-09-27 20:42     ` Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 04/13] x86/fpu/xstate: Enumerate User Interrupts supervisor state Sohil Mehta
2021-09-23 22:34   ` Thomas Gleixner
2021-09-27 22:25     ` Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 05/13] x86/irq: Reserve a user IPI notification vector Sohil Mehta
2021-09-23 23:07   ` Thomas Gleixner
2021-09-25 13:30     ` Thomas Gleixner
2021-09-26 12:39       ` Thomas Gleixner
2021-09-27 19:07         ` Sohil Mehta
2021-09-28  8:11           ` Thomas Gleixner
2021-09-27 19:26     ` Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 06/13] x86/uintr: Introduce uintr receiver syscalls Sohil Mehta
2021-09-23 12:26   ` Greg KH
2021-09-24  0:05     ` Thomas Gleixner
2021-09-27 23:20     ` Sohil Mehta
2021-09-28  4:39       ` Greg KH
2021-09-28 16:47         ` Sohil Mehta
2021-09-23 23:52   ` Thomas Gleixner
2021-09-27 23:57     ` Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 07/13] x86/process/64: Add uintr task context switch support Sohil Mehta
2021-09-24  0:41   ` Thomas Gleixner
2021-09-28  0:30     ` Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 08/13] x86/process/64: Clean up uintr task fork and exit paths Sohil Mehta
2021-09-24  1:02   ` Thomas Gleixner
2021-09-28  1:23     ` Sohil Mehta
2021-09-13 20:01 ` Sohil Mehta [this message]
2021-09-24 10:33   ` [RFC PATCH 09/13] x86/uintr: Introduce vector registration and uintr_fd syscall Thomas Gleixner
2021-09-28 20:40     ` Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 10/13] x86/uintr: Introduce user IPI sender syscalls Sohil Mehta
2021-09-23 12:28   ` Greg KH
2021-09-28 18:01     ` Sohil Mehta
2021-09-29  7:04       ` Greg KH
2021-09-29 14:27         ` Sohil Mehta
2021-09-24 10:54   ` Thomas Gleixner
2021-09-13 20:01 ` [RFC PATCH 11/13] x86/uintr: Introduce uintr_wait() syscall Sohil Mehta
2021-09-24 11:04   ` Thomas Gleixner
2021-09-25 12:08     ` Thomas Gleixner
2021-09-28 23:13       ` Sohil Mehta
2021-09-28 23:08     ` Sohil Mehta
2021-09-26 14:41   ` Thomas Gleixner
2021-09-29  1:09     ` Sohil Mehta
2021-09-29  3:30   ` Andy Lutomirski
2021-09-29  4:56     ` Sohil Mehta
2021-09-30 18:08       ` Andy Lutomirski
2021-09-30 19:29         ` Thomas Gleixner
2021-09-30 22:01           ` Andy Lutomirski
2021-10-01  0:01             ` Thomas Gleixner
2021-10-01  4:41               ` Andy Lutomirski
2021-10-01  9:56                 ` Thomas Gleixner
2021-10-01 15:13                   ` Andy Lutomirski
2021-10-01 18:04                     ` Sohil Mehta
2021-10-01 21:29                     ` Thomas Gleixner
2021-10-01 23:00                       ` Sohil Mehta
2021-10-01 23:04                       ` Andy Lutomirski
2021-09-13 20:01 ` [RFC PATCH 12/13] x86/uintr: Wire up the user interrupt syscalls Sohil Mehta
2021-09-13 20:01 ` [RFC PATCH 13/13] selftests/x86: Add basic tests for User IPI Sohil Mehta
2021-09-13 20:27 ` [RFC PATCH 00/13] x86 User Interrupts support Dave Hansen
2021-09-14 19:03   ` Mehta, Sohil
2021-09-23 12:19     ` Greg KH
2021-09-23 14:09       ` Greg KH
2021-09-23 14:46         ` Dave Hansen
2021-09-23 15:07           ` Greg KH
2021-09-23 23:24         ` Sohil Mehta
2021-09-23 23:09       ` Sohil Mehta
2021-09-24  0:17       ` Sohil Mehta
2021-09-23 14:39 ` Jens Axboe
2021-09-29  4:31 ` Andy Lutomirski
2021-09-30 16:30   ` Stefan Hajnoczi
2021-09-30 17:24     ` Sohil Mehta
2021-09-30 17:26       ` Andy Lutomirski
2021-10-01 16:35       ` Stefan Hajnoczi
2021-10-01 16:41         ` Richard Henderson
2021-09-30 16:26 ` Stefan Hajnoczi
2021-10-01  0:40   ` Sohil Mehta
2021-10-01  8:19 ` Pavel Machek
2021-11-18 22:19   ` Sohil Mehta
2021-11-16  3:49 ` Prakash Sangappa
2021-11-18 21:44   ` Sohil Mehta
2021-12-22 16:17 ` Chrisma Pakha
2022-01-07  2:08   ` Sohil Mehta
2022-01-17  1:14     ` Chrisma Pakha

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210913200132.3396598-10-sohil.mehta@intel.com \
    --to=sohil.mehta@intel.com \
    --cc=arnd@arndb.de \
    --cc=ashok.raj@intel.com \
    --cc=axboe@kernel.dk \
    --cc=bp@alien8.de \
    --cc=christian@brauner.io \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=gayatri.kammela@intel.com \
    --cc=guang.zeng@intel.com \
    --cc=hpa@zytor.com \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ramesh.thomas@intel.com \
    --cc=randy.e.witt@intel.com \
    --cc=ravi.v.shankar@intel.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).