From: Elena Ufimtseva <elena.ufimtseva@oracle.com>
To: qemu-devel@nongnu.org
Cc: elena.ufimtseva@oracle.com, john.g.johnson@oracle.com,
jag.raman@oracle.com, swapnil.ingle@nutanix.com,
john.levon@nutanix.com, alex.williamson@redhat.com,
stefanha@redhat.com
Subject: [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions
Date: Sun, 18 Jul 2021 23:27:42 -0700 [thread overview]
Message-ID: <cd9d5d6214d957db61120d9c3cbdc99e799a3baa.1626675354.git.elena.ufimtseva@oracle.com> (raw)
In-Reply-To: <cover.1626675354.git.elena.ufimtseva@oracle.com>
From: John G Johnson <john.g.johnson@oracle.com>
Add user.c and user.h files for vfio-user with the basic
send and receive functions.
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
hw/vfio/user.h | 120 ++++++++++++++
include/hw/vfio/vfio-common.h | 2 +
hw/vfio/user.c | 286 ++++++++++++++++++++++++++++++++++
MAINTAINERS | 4 +
hw/vfio/meson.build | 1 +
5 files changed, 413 insertions(+)
create mode 100644 hw/vfio/user.h
create mode 100644 hw/vfio/user.c
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
new file mode 100644
index 0000000000..cdbc074579
--- /dev/null
+++ b/hw/vfio/user.h
@@ -0,0 +1,120 @@
+#ifndef VFIO_USER_H
+#define VFIO_USER_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specfic data, such as the
+ * region and offset info for read and write commands.
+ */
+
+/* commands */
+enum vfio_user_command {
+ VFIO_USER_VERSION = 1,
+ VFIO_USER_DMA_MAP = 2,
+ VFIO_USER_DMA_UNMAP = 3,
+ VFIO_USER_DEVICE_GET_INFO = 4,
+ VFIO_USER_DEVICE_GET_REGION_INFO = 5,
+ VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6,
+ VFIO_USER_DEVICE_GET_IRQ_INFO = 7,
+ VFIO_USER_DEVICE_SET_IRQS = 8,
+ VFIO_USER_REGION_READ = 9,
+ VFIO_USER_REGION_WRITE = 10,
+ VFIO_USER_DMA_READ = 11,
+ VFIO_USER_DMA_WRITE = 12,
+ VFIO_USER_DEVICE_RESET = 13,
+ VFIO_USER_DIRTY_PAGES = 14,
+ VFIO_USER_MAX,
+};
+
+/* flags */
+#define VFIO_USER_REQUEST 0x0
+#define VFIO_USER_REPLY 0x1
+#define VFIO_USER_TYPE 0xF
+
+#define VFIO_USER_NO_REPLY 0x10
+#define VFIO_USER_ERROR 0x20
+
+typedef struct vfio_user_hdr {
+ uint16_t id;
+ uint16_t command;
+ uint32_t size;
+ uint32_t flags;
+ uint32_t error_reply;
+} vfio_user_hdr_t;
+
+/*
+ * VFIO_USER_VERSION
+ */
+#define VFIO_USER_MAJOR_VER 0
+#define VFIO_USER_MINOR_VER 0
+
+struct vfio_user_version {
+ vfio_user_hdr_t hdr;
+ uint16_t major;
+ uint16_t minor;
+ char capabilities[];
+};
+
+#define VFIO_USER_DEF_MAX_FDS 8
+#define VFIO_USER_MAX_MAX_FDS 16
+
+#define VFIO_USER_DEF_MAX_XFER (1024 * 1024)
+#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024)
+
+typedef struct VFIOUserFDs {
+ int send_fds;
+ int recv_fds;
+ int *fds;
+} VFIOUserFDs;
+
+typedef struct VFIOUserReply {
+ QTAILQ_ENTRY(VFIOUserReply) next;
+ vfio_user_hdr_t *msg;
+ VFIOUserFDs *fds;
+ int rsize;
+ uint32_t id;
+ QemuCond cv;
+ uint8_t complete;
+} VFIOUserReply;
+
+enum proxy_state {
+ CONNECTED = 1,
+ RECV_ERROR = 2,
+ CLOSING = 3,
+ CLOSED = 4,
+};
+
+typedef struct VFIOProxy {
+ QLIST_ENTRY(VFIOProxy) next;
+ char *sockname;
+ struct QIOChannel *ioc;
+ int (*request)(void *opaque, char *buf, VFIOUserFDs *fds);
+ void *reqarg;
+ int flags;
+ QemuCond close_cv;
+
+ /*
+ * above only changed when iolock is held
+ * below are protected by per-proxy lock
+ */
+ QemuMutex lock;
+ QTAILQ_HEAD(, VFIOUserReply) free;
+ QTAILQ_HEAD(, VFIOUserReply) pending;
+ enum proxy_state state;
+ int close_wait;
+} VFIOProxy;
+
+#define VFIO_PROXY_CLIENT 0x1
+
+void vfio_user_recv(void *opaque);
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret);
+#endif /* VFIO_USER_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 8af11b0a76..f43dc6e5d0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
} VFIOAddressSpace;
struct VFIOGroup;
+typedef struct VFIOProxy VFIOProxy;
typedef struct VFIOContainer {
VFIOAddressSpace *space;
@@ -143,6 +144,7 @@ typedef struct VFIODevice {
VFIOMigration *migration;
Error *migration_blocker;
OnOffAuto pre_copy_dirty_page_tracking;
+ VFIOProxy *proxy;
} VFIODevice;
struct VFIODeviceOps {
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
new file mode 100644
index 0000000000..021d5540e0
--- /dev/null
+++ b/hw/vfio/user.c
@@ -0,0 +1,286 @@
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/main-loop.h"
+#include "hw/hw.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/vfio/vfio.h"
+#include "qemu/sockets.h"
+#include "io/channel.h"
+#include "io/channel-util.h"
+#include "sysemu/iothread.h"
+#include "user.h"
+
+static uint64_t max_xfer_size = VFIO_USER_DEF_MAX_XFER;
+static IOThread *vfio_user_iothread;
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds);
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds);
+static void vfio_user_shutdown(VFIOProxy *proxy);
+
+static void vfio_user_shutdown(VFIOProxy *proxy)
+{
+ qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
+ qio_channel_set_aio_fd_handler(proxy->ioc,
+ iothread_get_aio_context(vfio_user_iothread),
+ NULL, NULL, NULL);
+}
+
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret)
+{
+ vfio_user_hdr_t *hdr = (vfio_user_hdr_t *)buf;
+
+ /*
+ * convert header to associated reply
+ * positive ret is reply size, negative is error code
+ */
+ hdr->flags = VFIO_USER_REPLY;
+ if (ret > 0) {
+ hdr->size = ret;
+ } else if (ret < 0) {
+ hdr->flags |= VFIO_USER_ERROR;
+ hdr->error_reply = -ret;
+ hdr->size = sizeof(*hdr);
+ }
+ vfio_user_send(proxy, hdr, NULL);
+}
+
+void vfio_user_recv(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOProxy *proxy = vbasedev->proxy;
+ VFIOUserReply *reply = NULL;
+ g_autofree int *fdp = NULL;
+ VFIOUserFDs reqfds = { 0, 0, fdp };
+ vfio_user_hdr_t msg;
+ struct iovec iov = {
+ .iov_base = &msg,
+ .iov_len = sizeof(msg),
+ };
+ int isreply, i, ret;
+ size_t msgleft, numfds = 0;
+ char *data = NULL;
+ g_autofree char *buf = NULL;
+ Error *local_err = NULL;
+
+ qemu_mutex_lock(&proxy->lock);
+ if (proxy->state == CLOSING) {
+ qemu_mutex_unlock(&proxy->lock);
+ return;
+ }
+
+ ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds,
+ &local_err);
+ if (ret <= 0) {
+ /* read error or other side closed connection */
+ error_setg_errno(&local_err, errno, "vfio_user_recv read error");
+ goto fatal;
+ }
+
+ if (ret < sizeof(msg)) {
+ error_setg(&local_err, "vfio_user_recv short read of header");
+ goto err;
+ }
+
+ /*
+ * For replies, find the matching pending request
+ */
+ switch (msg.flags & VFIO_USER_TYPE) {
+ case VFIO_USER_REQUEST:
+ isreply = 0;
+ break;
+ case VFIO_USER_REPLY:
+ isreply = 1;
+ break;
+ default:
+ error_setg(&local_err, "vfio_user_recv unknown message type");
+ goto err;
+ }
+
+ if (isreply) {
+ QTAILQ_FOREACH(reply, &proxy->pending, next) {
+ if (msg.id == reply->id) {
+ break;
+ }
+ }
+ if (reply == NULL) {
+ error_setg(&local_err, "vfio_user_recv unexpected reply");
+ goto err;
+ }
+ QTAILQ_REMOVE(&proxy->pending, reply, next);
+
+ /*
+ * Process any received FDs
+ */
+ if (numfds != 0) {
+ if (reply->fds == NULL || reply->fds->recv_fds < numfds) {
+ error_setg(&local_err, "vfio_user_recv unexpected FDs");
+ goto err;
+ }
+ reply->fds->recv_fds = numfds;
+ memcpy(reply->fds->fds, fdp, numfds * sizeof(int));
+ }
+
+ } else {
+ /*
+ * The client doesn't expect any FDs in requests, but
+ * they will be expected on the server
+ */
+ if (numfds != 0 && (proxy->flags & VFIO_PROXY_CLIENT)) {
+ error_setg(&local_err, "vfio_user_recv fd in client reply");
+ goto err;
+ }
+ reqfds.recv_fds = numfds;
+ }
+
+ /*
+ * put the whole message into a single buffer
+ */
+ msgleft = msg.size - sizeof(msg);
+ if (isreply) {
+ if (msg.size > reply->rsize) {
+ error_setg(&local_err,
+ "vfio_user_recv reply larger than recv buffer");
+ goto fatal;
+ }
+ *reply->msg = msg;
+ data = (char *)reply->msg + sizeof(msg);
+ } else {
+ if (msg.size > max_xfer_size) {
+ error_setg(&local_err, "vfio_user_recv request larger than max");
+ goto fatal;
+ }
+ buf = g_malloc0(msg.size);
+ memcpy(buf, &msg, sizeof(msg));
+ data = buf + sizeof(msg);
+ }
+
+ if (msgleft != 0) {
+ ret = qio_channel_read(proxy->ioc, data, msgleft, &local_err);
+ if (ret < 0) {
+ goto fatal;
+ }
+ if (ret != msgleft) {
+ error_setg(&local_err, "vfio_user_recv short read of msg body");
+ goto err;
+ }
+ }
+
+ /*
+ * Replies signal a waiter, requests get processed by vfio code
+ * that may assume the iothread lock is held.
+ */
+ qemu_mutex_unlock(&proxy->lock);
+ if (isreply) {
+ reply->complete = 1;
+ qemu_cond_signal(&reply->cv);
+ } else {
+ qemu_mutex_lock_iothread();
+ /*
+ * make sure proxy wasn't closed while we waited
+ * checking without holding the proxy lock is safe
+ * since state is only set to CLOSING when iolock is held
+ */
+ if (proxy->state != CLOSING) {
+ ret = proxy->request(proxy->reqarg, buf, &reqfds);
+ if (ret < 0 && !(msg.flags & VFIO_USER_NO_REPLY)) {
+ vfio_user_send_reply(proxy, buf, ret);
+ }
+ }
+ qemu_mutex_unlock_iothread();
+ }
+
+ return;
+ fatal:
+ vfio_user_shutdown(proxy);
+ proxy->state = RECV_ERROR;
+
+ err:
+ qemu_mutex_unlock(&proxy->lock);
+ for (i = 0; i < numfds; i++) {
+ close(fdp[i]);
+ }
+ if (reply != NULL) {
+ /* force an error to keep sending thread from hanging */
+ reply->msg->flags |= VFIO_USER_ERROR;
+ reply->msg->error_reply = EINVAL;
+ reply->complete = 1;
+ qemu_cond_signal(&reply->cv);
+ }
+ error_report_err(local_err);
+}
+
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds)
+{
+ struct iovec iov = {
+ .iov_base = msg,
+ .iov_len = msg->size,
+ };
+ size_t numfds = 0;
+ int msgleft, ret, *fdp = NULL;
+ char *buf;
+ Error *local_err = NULL;
+
+ if (proxy->state != CONNECTED) {
+ msg->flags |= VFIO_USER_ERROR;
+ msg->error_reply = ECONNRESET;
+ return;
+ }
+
+ if (fds != NULL && fds->send_fds != 0) {
+ numfds = fds->send_fds;
+ fdp = fds->fds;
+ }
+ ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, &local_err);
+ if (ret < 0) {
+ goto err;
+ }
+ if (ret == msg->size) {
+ return;
+ }
+
+ buf = iov.iov_base + ret;
+ msgleft = iov.iov_len - ret;
+ do {
+ ret = qio_channel_write(proxy->ioc, buf, msgleft, &local_err);
+ if (ret < 0) {
+ goto err;
+ }
+ buf += ret, msgleft -= ret;
+ } while (msgleft != 0);
+ return;
+
+ err:
+ error_report_err(local_err);
+}
+
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds)
+{
+ bool iolock = qemu_mutex_iothread_locked();
+
+ if (iolock) {
+ qemu_mutex_unlock_iothread();
+ }
+ qemu_mutex_lock(&proxy->lock);
+ vfio_user_send_locked(proxy, msg, fds);
+ qemu_mutex_unlock(&proxy->lock);
+ if (iolock) {
+ qemu_mutex_lock_iothread();
+ }
+}
diff --git a/MAINTAINERS b/MAINTAINERS
index 12d69f3a45..aa4df6c418 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1883,8 +1883,12 @@ L: qemu-s390x@nongnu.org
vfio-user
M: John G Johnson <john.g.johnson@oracle.com>
M: Thanos Makatos <thanos.makatos@nutanix.com>
+M: Elena Ufimtseva <elena.ufimtseva@oracle.com>
+M: Jagannathan Raman <jag.raman@oracle.com>
S: Supported
F: docs/devel/vfio-user.rst
+F: hw/vfio/user.c
+F: hw/vfio/user.h
vhost
M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index da9af297a0..739b30be73 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -8,6 +8,7 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
'pci-quirks.c',
'pci.c',
+ 'user.c',
))
vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
--
2.25.1
next prev parent reply other threads:[~2021-07-19 6:29 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-19 6:27 [PATCH RFC 00/19] vfio-user implementation Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 01/19] vfio-user: introduce vfio-user protocol specification Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 02/19] vfio-user: add VFIO base abstract class Elena Ufimtseva
2021-07-19 6:27 ` Elena Ufimtseva [this message]
2021-07-27 16:34 ` [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions Stefan Hajnoczi
2021-07-28 18:08 ` John Johnson
2021-07-29 8:06 ` Stefan Hajnoczi
2021-07-19 6:27 ` [PATCH RFC 04/19] vfio-user: Define type vfio_user_pci_dev_info Elena Ufimtseva
2021-07-28 10:16 ` Stefan Hajnoczi
2021-07-29 0:55 ` John Johnson
2021-07-29 8:22 ` Stefan Hajnoczi
2021-07-19 6:27 ` [PATCH RFC 05/19] vfio-user: connect vfio proxy to remote server Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 06/19] vfio-user: negotiate protocol with " Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 07/19] vfio-user: define vfio-user pci ops Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 08/19] vfio-user: VFIO container setup & teardown Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 09/19] vfio-user: get device info and get irq info Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 10/19] vfio-user: device region read/write Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 11/19] vfio-user: get region and DMA map/unmap operations Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 12/19] vfio-user: probe remote device's BARs Elena Ufimtseva
2021-07-19 22:59 ` Alex Williamson
2021-07-20 1:39 ` John Johnson
2021-07-20 3:01 ` Alex Williamson
2021-07-19 6:27 ` [PATCH RFC 13/19] vfio-user: respond to remote DMA read/write requests Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 14/19] vfio_user: setup MSI/X interrupts and PCI config operations Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 15/19] vfio-user: vfio user device realize Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 16/19] vfio-user: pci reset Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 17/19] vfio-user: probe remote device ROM BAR Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 18/19] vfio-user: migration support Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 19/19] vfio-user: add migration cli options and version negotiation Elena Ufimtseva
2021-07-19 20:00 ` [PATCH RFC server 00/11] vfio-user server in QEMU Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 01/11] vfio-user: build library Jagannathan Raman
2021-07-19 20:24 ` John Levon
2021-07-20 12:06 ` Jag Raman
2021-07-20 12:20 ` Marc-André Lureau
2021-07-20 13:09 ` John Levon
2021-07-19 20:00 ` [PATCH RFC server 02/11] vfio-user: define vfio-user object Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 03/11] vfio-user: instantiate vfio-user context Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 04/11] vfio-user: find and init PCI device Jagannathan Raman
2021-07-26 15:05 ` John Levon
2021-07-28 17:08 ` Jag Raman
2021-07-19 20:00 ` [PATCH RFC server 05/11] vfio-user: run vfio-user context Jagannathan Raman
2021-07-20 14:17 ` Thanos Makatos
2021-08-13 14:51 ` Jag Raman
2021-08-16 12:52 ` John Levon
2021-08-16 14:10 ` Jag Raman
2021-07-19 20:00 ` [PATCH RFC server 06/11] vfio-user: handle PCI config space accesses Jagannathan Raman
2021-07-26 15:10 ` John Levon
2021-07-19 20:00 ` [PATCH RFC server 07/11] vfio-user: handle DMA mappings Jagannathan Raman
2021-07-20 14:38 ` Thanos Makatos
2021-07-19 20:00 ` [PATCH RFC server 08/11] vfio-user: handle PCI BAR accesses Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 09/11] vfio-user: handle device interrupts Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 10/11] vfio-user: register handlers to facilitate migration Jagannathan Raman
2021-07-20 14:05 ` Thanos Makatos
2021-07-19 20:00 ` [PATCH RFC server 11/11] vfio-user: acceptance test Jagannathan Raman
2021-07-20 16:12 ` Thanos Makatos
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=cd9d5d6214d957db61120d9c3cbdc99e799a3baa.1626675354.git.elena.ufimtseva@oracle.com \
--to=elena.ufimtseva@oracle.com \
--cc=alex.williamson@redhat.com \
--cc=jag.raman@oracle.com \
--cc=john.g.johnson@oracle.com \
--cc=john.levon@nutanix.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
--cc=swapnil.ingle@nutanix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).