From: Elena Afanasova <eafanasova@gmail.com>
To: kvm@vger.kernel.org
Cc: stefanha@redhat.com, jag.raman@oracle.com,
elena.ufimtseva@oracle.com, pbonzini@redhat.com,
jasowang@redhat.com, mst@redhat.com, cohuck@redhat.com,
john.levon@nutanix.com, Elena Afanasova <eafanasova@gmail.com>
Subject: [RFC v3 4/5] KVM: add ioregionfd context
Date: Sun, 21 Feb 2021 15:04:40 +0300 [thread overview]
Message-ID: <4436ef071e55d88ff3996b134cc2303053581242.1613828727.git.eafanasova@gmail.com> (raw)
In-Reply-To: <cover.1613828726.git.eafanasova@gmail.com>
Add support for ioregionfd cmds/replies serialization.
Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
---
v3:
- add comment
- drop kvm_io_bus_finish/prepare()
virt/kvm/ioregion.c | 164 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 135 insertions(+), 29 deletions(-)
diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
index 1e1c7772d274..d53e3d1cd2ff 100644
--- a/virt/kvm/ioregion.c
+++ b/virt/kvm/ioregion.c
@@ -1,10 +1,39 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kvm_host.h>
-#include <linux/fs.h>
+#include <linux/wait.h>
#include <kvm/iodev.h>
#include "eventfd.h"
#include <uapi/linux/ioregion.h>
+/* ioregions that share the same rfd are serialized so that only one vCPU
+ * thread sends a struct ioregionfd_cmd to userspace at a time. This
+ * ensures that the struct ioregionfd_resp received from userspace will
+ * be processed by the one and only vCPU thread that sent it.
+ *
+ * A waitqueue is used to wake up waiting vCPU threads in order. Most of
+ * the time the waitqueue is unused and the lock is not contended.
+ * For best performance userspace should set up ioregionfds so that there
+ * is no contention (e.g. dedicated ioregionfds for queue doorbell
+ * registers on multi-queue devices).
+ */
+struct ioregionfd {
+ wait_queue_head_t wq;
+ struct file *rf;
+ struct kref kref;
+ bool busy;
+};
+
+struct ioregion {
+ struct list_head list;
+ u64 paddr; /* guest physical address */
+ u64 size; /* size in bytes */
+ struct file *wf;
+ u64 user_data; /* opaque token used by userspace */
+ struct kvm_io_device dev;
+ bool posted_writes;
+ struct ioregionfd *ctx;
+};
+
void
kvm_ioregionfd_init(struct kvm *kvm)
{
@@ -13,29 +42,28 @@ kvm_ioregionfd_init(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->ioregions_pio);
}
-struct ioregion {
- struct list_head list;
- u64 paddr; /* guest physical address */
- u64 size; /* size in bytes */
- struct file *rf;
- struct file *wf;
- u64 user_data; /* opaque token used by userspace */
- struct kvm_io_device dev;
- bool posted_writes;
-};
-
static inline struct ioregion *
to_ioregion(struct kvm_io_device *dev)
{
return container_of(dev, struct ioregion, dev);
}
+/* assumes kvm->slots_lock held */
+static void ctx_free(struct kref *kref)
+{
+ struct ioregionfd *ctx = container_of(kref, struct ioregionfd, kref);
+
+ kfree(ctx);
+}
+
/* assumes kvm->slots_lock held */
static void
ioregion_release(struct ioregion *p)
{
- if (p->rf)
- fput(p->rf);
+ if (p->ctx) {
+ fput(p->ctx->rf);
+ kref_put(&p->ctx->kref, ctx_free);
+ }
fput(p->wf);
list_del(&p->list);
kfree(p);
@@ -90,6 +118,30 @@ ioregion_save_ctx(struct kvm_vcpu *vcpu, bool in, gpa_t addr, u8 state, void *va
vcpu->ioregion_ctx.in = in;
}
+static inline void
+ioregion_lock_ctx(struct ioregionfd *ctx)
+{
+ if (!ctx)
+ return;
+
+ spin_lock(&ctx->wq.lock);
+ wait_event_interruptible_exclusive_locked(ctx->wq, !ctx->busy);
+ ctx->busy = true;
+ spin_unlock(&ctx->wq.lock);
+}
+
+static inline void
+ioregion_unlock_ctx(struct ioregionfd *ctx)
+{
+ if (!ctx)
+ return;
+
+ spin_lock(&ctx->wq.lock);
+ ctx->busy = false;
+ wake_up_locked(&ctx->wq);
+ spin_unlock(&ctx->wq.lock);
+}
+
static int
ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
int len, void *val)
@@ -115,11 +167,15 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
}
}
+ ioregion_lock_ctx(p->ctx);
+
send_cmd:
memset(&buf, 0, sizeof(buf));
if (!pack_cmd(&buf.cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ,
- 1, p->user_data, NULL))
- return -EOPNOTSUPP;
+ 1, p->user_data, NULL)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0);
state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD;
@@ -129,14 +185,15 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
}
if (ret != sizeof(buf.cmd)) {
ret = (ret < 0) ? ret : -EIO;
- return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ goto out;
}
- if (!p->rf)
+ if (!p->ctx)
return 0;
get_repl:
memset(&buf, 0, sizeof(buf));
- ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0);
+ ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0);
state = (ret == sizeof(buf.resp)) ? COMPLETE : GET_REPLY;
if (signal_pending(current) && state == GET_REPLY) {
ioregion_save_ctx(vcpu, 1, addr, state, val);
@@ -144,12 +201,17 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
}
if (ret != sizeof(buf.resp)) {
ret = (ret < 0) ? ret : -EIO;
- return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ goto out;
}
memcpy(val, &buf.resp.data, len);
+ ret = 0;
- return 0;
+out:
+ ioregion_unlock_ctx(p->ctx);
+
+ return ret;
}
static int
@@ -177,11 +239,15 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
}
}
+ ioregion_lock_ctx(p->ctx);
+
send_cmd:
memset(&buf, 0, sizeof(buf));
if (!pack_cmd(&buf.cmd, addr - p->paddr, len, IOREGIONFD_CMD_WRITE,
- p->posted_writes ? 0 : 1, p->user_data, val))
- return -EOPNOTSUPP;
+ p->posted_writes ? 0 : 1, p->user_data, val)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0);
state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD;
@@ -191,13 +257,14 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
}
if (ret != sizeof(buf.cmd)) {
ret = (ret < 0) ? ret : -EIO;
- return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ goto out;
}
get_repl:
if (!p->posted_writes) {
memset(&buf, 0, sizeof(buf));
- ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0);
+ ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0);
state = (ret == sizeof(buf.resp)) ? COMPLETE : GET_REPLY;
if (signal_pending(current) && state == GET_REPLY) {
ioregion_save_ctx(vcpu, 0, addr, state, (void *)val);
@@ -205,11 +272,16 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
}
if (ret != sizeof(buf.resp)) {
ret = (ret < 0) ? ret : -EIO;
- return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ ret = (ret == -EAGAIN || ret == -EWOULDBLOCK) ? -EINVAL : ret;
+ goto out;
}
}
+ ret = 0;
- return 0;
+out:
+ ioregion_unlock_ctx(p->ctx);
+
+ return ret;
}
/*
@@ -285,6 +357,33 @@ get_bus_from_flags(__u32 flags)
return KVM_MMIO_BUS;
}
+/* assumes kvm->slots_lock held */
+static bool
+ioregion_get_ctx(struct kvm *kvm, struct ioregion *p, struct file *rf, int bus_idx)
+{
+ struct ioregion *_p;
+ struct list_head *ioregions;
+
+ ioregions = get_ioregion_list(kvm, bus_idx);
+ list_for_each_entry(_p, ioregions, list)
+ if (file_inode(_p->ctx->rf)->i_ino == file_inode(rf)->i_ino) {
+ p->ctx = _p->ctx;
+ kref_get(&p->ctx->kref);
+ return true;
+ }
+
+ p->ctx = kzalloc(sizeof(*p->ctx), GFP_KERNEL_ACCOUNT);
+ if (!p->ctx)
+ return false;
+
+ p->ctx->rf = rf;
+ p->ctx->busy = false;
+ init_waitqueue_head(&p->ctx->wq);
+ kref_get(&p->ctx->kref);
+
+ return true;
+}
+
int
kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bus_idx)
{
@@ -309,11 +408,10 @@ kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bu
}
INIT_LIST_HEAD(&p->list);
+ p->wf = wfile;
p->paddr = args->guest_paddr;
p->size = args->memory_size;
p->user_data = args->user_data;
- p->rf = rfile;
- p->wf = wfile;
p->posted_writes = args->flags & KVM_IOREGION_POSTED_WRITES;
mutex_lock(&kvm->slots_lock);
@@ -322,6 +420,12 @@ kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bu
ret = -EEXIST;
goto unlock_fail;
}
+
+ if (rfile && !ioregion_get_ctx(kvm, p, rfile, bus_idx)) {
+ ret = -ENOMEM;
+ goto unlock_fail;
+ }
+
kvm_iodevice_init(&p->dev, &ioregion_ops);
ret = kvm_io_bus_register_dev(kvm, bus_idx, p->paddr, p->size,
&p->dev);
@@ -335,6 +439,8 @@ kvm_set_ioregion_idx(struct kvm *kvm, struct kvm_ioregion *args, enum kvm_bus bu
unlock_fail:
mutex_unlock(&kvm->slots_lock);
+ if (p->ctx)
+ kref_put(&p->ctx->kref, ctx_free);
kfree(p);
fail:
if (rfile)
--
2.25.1
next prev parent reply other threads:[~2021-02-21 12:12 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-21 12:04 [RFC v3 0/5] Introduce MMIO/PIO dispatch file descriptors (ioregionfd) Elena Afanasova
2021-02-21 12:04 ` [RFC v3 1/5] KVM: add initial support for KVM_SET_IOREGION Elena Afanasova
2021-02-24 10:06 ` Stefan Hajnoczi
2021-03-05 13:09 ` Cornelia Huck
2021-03-09 5:26 ` Jason Wang
2021-03-22 9:57 ` Stefan Hajnoczi
2021-02-21 12:04 ` [RFC v3 2/5] KVM: x86: add support for ioregionfd signal handling Elena Afanasova
2021-02-24 10:42 ` Stefan Hajnoczi
2021-03-09 5:51 ` Jason Wang
2021-03-17 14:19 ` Elena Afanasova
2021-03-26 6:00 ` Jason Wang
2021-02-21 12:04 ` [RFC v3 3/5] KVM: implement wire protocol Elena Afanasova
2021-02-24 11:02 ` Stefan Hajnoczi
2021-03-09 6:19 ` Jason Wang
2021-03-17 13:08 ` Elena Afanasova
2021-03-26 6:21 ` Jason Wang
2021-03-29 16:17 ` Stefan Hajnoczi
2021-02-21 12:04 ` Elena Afanasova [this message]
2021-02-24 11:27 ` [RFC v3 4/5] KVM: add ioregionfd context Stefan Hajnoczi
2021-03-09 7:54 ` Jason Wang
2021-03-09 8:01 ` Paolo Bonzini
2021-03-10 13:20 ` Elena Afanasova
2021-03-10 14:11 ` Paolo Bonzini
2021-03-10 16:41 ` Elena Afanasova
[not found] ` <6ff79d0b-3b6a-73d3-ffbd-e4af9758735f@redhat.com>
2021-03-17 10:46 ` Elena Afanasova
2021-03-26 6:47 ` Jason Wang
2021-02-21 12:04 ` [RFC v3 5/5] KVM: enforce NR_IOBUS_DEVS limit if kmemcg is disabled Elena Afanasova
2021-02-21 17:06 ` [RFC v3 0/5] Introduce MMIO/PIO dispatch file descriptors (ioregionfd) Paolo Bonzini
2021-02-22 16:40 ` Elena Afanasova
2021-02-24 11:34 ` Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4436ef071e55d88ff3996b134cc2303053581242.1613828727.git.eafanasova@gmail.com \
--to=eafanasova@gmail.com \
--cc=cohuck@redhat.com \
--cc=elena.ufimtseva@oracle.com \
--cc=jag.raman@oracle.com \
--cc=jasowang@redhat.com \
--cc=john.levon@nutanix.com \
--cc=kvm@vger.kernel.org \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).