kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Elena Afanasova <eafanasova@gmail.com>
To: kvm@vger.kernel.org
Cc: stefanha@redhat.com, jag.raman@oracle.com,
	elena.ufimtseva@oracle.com, pbonzini@redhat.com,
	jasowang@redhat.com, mst@redhat.com, cohuck@redhat.com,
	john.levon@nutanix.com, Elena Afanasova <eafanasova@gmail.com>
Subject: [RFC v3 4/5] KVM: add ioregionfd context
Date: Sun, 21 Feb 2021 15:04:40 +0300	[thread overview]
Message-ID: <4436ef071e55d88ff3996b134cc2303053581242.1613828727.git.eafanasova@gmail.com> (raw)
In-Reply-To: <cover.1613828726.git.eafanasova@gmail.com>

Add support for ioregionfd cmds/replies serialization.

Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
---
v3:
 - add comment
 - drop kvm_io_bus_finish/prepare()

 virt/kvm/ioregion.c | 164 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 135 insertions(+), 29 deletions(-)

diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
index 1e1c7772d274..d53e3d1cd2ff 100644
--- a/virt/kvm/ioregion.c
+++ b/virt/kvm/ioregion.c
@@ -1,10 +1,39 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kvm_host.h>
-#include <linux/fs.h>
+#include <linux/wait.h>
 #include <kvm/iodev.h>
 #include "eventfd.h"
 #include <uapi/linux/ioregion.h>
 
+/* ioregions that share the same rfd are serialized so that only one vCPU
+ * thread sends a struct ioregionfd_cmd to userspace at a time. This
+ * ensures that the struct ioregionfd_resp received from userspace will
+ * be processed by the one and only vCPU thread that sent it.
+ *
+ * A waitqueue is used to wake up waiting vCPU threads in order. Most of
+ * the time the waitqueue is unused and the lock is not contended.
+ * For best performance userspace should set up ioregionfds so that there
+ * is no contention (e.g. dedicated ioregionfds for queue doorbell
+ * registers on multi-queue devices).
+ */
+struct ioregionfd {
+	wait_queue_head_t	  wq;
+	struct file		 *rf;
+	struct kref		  kref;
+	bool			  busy;
+};
+
+struct ioregion {
+	struct list_head	  list;
+	u64			  paddr;   /* guest physical address */
+	u64			  size;    /* size in bytes */
+	struct file		 *wf;
+	u64			  user_data; /* opaque token used by userspace */
+	struct kvm_io_device	  dev;
+	bool			  posted_writes;
+	struct ioregionfd	 *ctx;
+};
+
 void
 kvm_ioregionfd_init(struct kvm *kvm)
 {
@@ -13,29 +42,28 @@ kvm_ioregionfd_init(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->ioregions_pio);
 }
 
-struct ioregion {
-	struct list_head     list;
-	u64                  paddr;  /* guest physical address */
-	u64                  size;   /* size in bytes */
-	struct file         *rf;
-	struct file         *wf;
-	u64                  user_data; /* opaque token used by userspace */
-	struct kvm_io_device dev;
-	bool                 posted_writes;
-};
-
 static inline struct ioregion *
 to_ioregion(struct kvm_io_device *dev)
 {
 	return container_of(dev, struct ioregion, dev);
 }
 
+/* assumes kvm->slots_lock held */
+static void ctx_free(struct kref *kref)
+{
+	struct ioregionfd *ctx = container_of(kref, struct ioregionfd, kref);
+
+	kfree(ctx);
+}
+
 /* assumes kvm->slots_lock held */
 static void
 ioregion_release(struct ioregion *p)
 {
-	if (p->rf)
-		fput(p->rf);
+	if (p->ctx) {
+		fput(p->ctx->rf);
+		kref_put(&p->ctx->kref, ctx_free);
+	}
 	fput(p->wf);
 	list_del(&p->list);
 	kfree(p);
@@ -90,6 +118,30 @@ ioregion_save_ctx(struct kvm_vcpu *vcpu, bool in, gpa_t addr, u8 state, void *va
 	vcpu->ioregion_ctx.in = in;
 }
 
+static inline void
+ioregion_lock_ctx(struct ioregionfd *ctx)
+{
+	if (!ctx)
+		return;
+
+	spin_lock(&ctx->wq.lock);
+	wait_event_interruptible_exclusive_locked(ctx->wq, !ctx->busy);
+	ctx->busy = true;
+	spin_unlock(&ctx->wq.lock);
+}
+
+static inline void
+ioregion_unlock_ctx(struct ioregionfd *ctx)
+{
+	if (!ctx)
+		return;
+
+	spin_lock(&ctx->wq.lock);
+	ctx->busy = false;
+	wake_up_locked(&ctx->wq);
+	spin_unlock(&ctx->wq.lock);
+}
+
 static int
 ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 	      int len, void *val)
@@ -115,11 +167,15 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 		}
 	}
 
+	ioregion_lock_ctx(p->ctx);
+
 send_cmd:
 	memset(&buf, 0, sizeof(buf));
 	if (!pack_cmd(&buf.cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ,
-		      1, p->user_data, NULL))
-		return -EOPNOTSUPP;
+		      1, p->user_data, NULL)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0);
 	state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD;
@@ -129,14 +185,15 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 	}
 	if (ret != sizeof(buf.cmd)) {
 		ret = (ret < 0) ? ret : -EIO;
-		return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+		ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+		goto out;
 	}
-	if (!p->rf)
+	if (!p->ctx)
 		return 0;
 
 get_repl:
 	memset(&buf, 0, sizeof(buf));
-	ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0);
+	ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0);
 	state = (ret == sizeof(buf.resp)) ? COMPLETE : GET_REPLY;
 	if (signal_pending(current) && state == GET_REPLY) {
 		ioregion_save_ctx(vcpu, 1, addr, state, val);
@@ -144,12 +201,17 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 	}
 	if (ret != sizeof(buf.resp)) {
 		ret = (ret < 0) ? ret : -EIO;
-		return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+		ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+		goto out;
 	}
 
 	memcpy(val, &buf.resp.data, len);
+	ret = 0;
 
-	return 0;
+out:
+	ioregion_unlock_ctx(p->ctx);
+
+	return ret;
 }
 
 static int
@@ -177,11 +239,15 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 		}
 	}
 
+	ioregion_lock_ctx(p->ctx);
+
 send_cmd:
 	memset(&buf, 0, sizeof(buf));
 	if (!pack_cmd(&buf.cmd, addr - p->paddr, len, IOREGIONFD_CMD_WRITE,
-		      p->posted_writes ? 0 : 1, p->user_data, val))
-		return -EOPNOTSUPP;
+		      p->posted_writes ? 0 : 1, p->user_data, val)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0);
 	state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD;
@@ -191,13 +257,14 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 	}
 	if (ret != sizeof(buf.cmd)) {
 		ret = (ret < 0) ? ret : -EIO;
-		return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+		ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+		goto out;
 	}
 
 get_repl:
 	if (!p->posted_writes) {
 		memset(&buf, 0, sizeof(buf));
-		ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0);
+		ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0);
 		state = (ret == sizeof(buf.resp)) ? COMPLETE : GET_REPLY;
 		if (signal_pending(current) && state == GET_REPLY) {
 			ioregion_save_ctx(vcpu, 0, addr, state, (void *)val);
@@ -205,11 +272,16 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 		}
 		if (ret != sizeof(buf.resp)) {
 			ret = (ret < 0) ? ret : -EIO;
-			return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+			ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+			goto out;
 		}
 	}
+	ret = 0;
 
-	return 0;
+out:
+	ioregion_unlock_ctx(p->ctx);
+
+	return ret;
 }
 
 /*
@@ -285,6 +357,33 @@ get_bus_from_flags(__u32 flags)
 	return KVM_MMIO_BUS;
 }
 
+/* assumes kvm->slots_lock held */
+static bool
+ioregion_get_ctx(struct kvm *kvm, struct ioregion *p, struct file *rf, int bus_idx)
+{
+	struct ioregion *_p;
+	struct list_head *ioregions;
+
+	ioregions = get_ioregion_list(kvm, bus_idx);
+	list_for_each_entry(_p, ioregions, list)
+		if (file_inode(_p->ctx->rf)->i_ino == file_inode(rf)->i_ino) {
+			p->ctx = _p->ctx;
+			kref_get(&p->ctx->kref);
+			return true;
+		}
+
+	p->ctx = kzalloc(sizeof(*p->ctx), GFP_KERNEL_ACCOUNT);
+	if (!p->ctx)
+		return false;
+
+	p->ctx->rf = rf;
+	p->ctx->busy = false;
+	init_waitqueue_head(&p->ctx->wq);
+	kref_get(&p->ctx->kref);
+
+	return true;
+}
+
 int
 kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bus_idx)
 {
@@ -309,11 +408,10 @@ kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bu
 	}
 
 	INIT_LIST_HEAD(&p->list);
+	p->wf = wfile;
 	p->paddr = args->guest_paddr;
 	p->size = args->memory_size;
 	p->user_data = args->user_data;
-	p->rf = rfile;
-	p->wf = wfile;
 	p->posted_writes = args->flags & KVM_IOREGION_POSTED_WRITES;
 
 	mutex_lock(&kvm->slots_lock);
@@ -322,6 +420,12 @@ kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bu
 		ret = -EEXIST;
 		goto unlock_fail;
 	}
+
+	if (rfile && !ioregion_get_ctx(kvm, p, rfile, bus_idx)) {
+		ret = -ENOMEM;
+		goto unlock_fail;
+	}
+
 	kvm_iodevice_init(&p->dev, &ioregion_ops);
 	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->paddr, p->size,
 				      &p->dev);
@@ -335,6 +439,8 @@ kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bu
 
 unlock_fail:
 	mutex_unlock(&kvm->slots_lock);
+	if (p->ctx)
+		kref_put(&p->ctx->kref, ctx_free);
 	kfree(p);
 fail:
 	if (rfile)
-- 
2.25.1


  parent reply	other threads:[~2021-02-21 12:12 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-21 12:04 [RFC v3 0/5] Introduce MMIO/PIO dispatch file descriptors (ioregionfd) Elena Afanasova
2021-02-21 12:04 ` [RFC v3 1/5] KVM: add initial support for KVM_SET_IOREGION Elena Afanasova
2021-02-24 10:06   ` Stefan Hajnoczi
2021-03-05 13:09   ` Cornelia Huck
2021-03-09  5:26   ` Jason Wang
2021-03-22  9:57     ` Stefan Hajnoczi
2021-02-21 12:04 ` [RFC v3 2/5] KVM: x86: add support for ioregionfd signal handling Elena Afanasova
2021-02-24 10:42   ` Stefan Hajnoczi
2021-03-09  5:51   ` Jason Wang
2021-03-17 14:19     ` Elena Afanasova
2021-03-26  6:00       ` Jason Wang
2021-02-21 12:04 ` [RFC v3 3/5] KVM: implement wire protocol Elena Afanasova
2021-02-24 11:02   ` Stefan Hajnoczi
2021-03-09  6:19   ` Jason Wang
2021-03-17 13:08     ` Elena Afanasova
2021-03-26  6:21       ` Jason Wang
2021-03-29 16:17         ` Stefan Hajnoczi
2021-02-21 12:04 ` Elena Afanasova [this message]
2021-02-24 11:27   ` [RFC v3 4/5] KVM: add ioregionfd context Stefan Hajnoczi
2021-03-09  7:54   ` Jason Wang
2021-03-09  8:01     ` Paolo Bonzini
2021-03-10 13:20       ` Elena Afanasova
2021-03-10 14:11         ` Paolo Bonzini
2021-03-10 16:41           ` Elena Afanasova
     [not found]             ` <6ff79d0b-3b6a-73d3-ffbd-e4af9758735f@redhat.com>
2021-03-17 10:46               ` Elena Afanasova
2021-03-26  6:47                 ` Jason Wang
2021-02-21 12:04 ` [RFC v3 5/5] KVM: enforce NR_IOBUS_DEVS limit if kmemcg is disabled Elena Afanasova
2021-02-21 17:06 ` [RFC v3 0/5] Introduce MMIO/PIO dispatch file descriptors (ioregionfd) Paolo Bonzini
2021-02-22 16:40   ` Elena Afanasova
2021-02-24 11:34 ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4436ef071e55d88ff3996b134cc2303053581242.1613828727.git.eafanasova@gmail.com \
    --to=eafanasova@gmail.com \
    --cc=cohuck@redhat.com \
    --cc=elena.ufimtseva@oracle.com \
    --cc=jag.raman@oracle.com \
    --cc=jasowang@redhat.com \
    --cc=john.levon@nutanix.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).