qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Elena Ufimtseva <elena.ufimtseva@oracle.com>
To: qemu-devel@nongnu.org
Cc: elena.ufimtseva@oracle.com, john.g.johnson@oracle.com,
	jag.raman@oracle.com, swapnil.ingle@nutanix.com,
	john.levon@nutanix.com, alex.williamson@redhat.com,
	stefanha@redhat.com
Subject: [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions
Date: Sun, 18 Jul 2021 23:27:42 -0700	[thread overview]
Message-ID: <cd9d5d6214d957db61120d9c3cbdc99e799a3baa.1626675354.git.elena.ufimtseva@oracle.com> (raw)
In-Reply-To: <cover.1626675354.git.elena.ufimtseva@oracle.com>

From: John G Johnson <john.g.johnson@oracle.com>

Add user.c and user.h files for vfio-user with the basic
send and receive functions.

Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/user.h                | 120 ++++++++++++++
 include/hw/vfio/vfio-common.h |   2 +
 hw/vfio/user.c                | 286 ++++++++++++++++++++++++++++++++++
 MAINTAINERS                   |   4 +
 hw/vfio/meson.build           |   1 +
 5 files changed, 413 insertions(+)
 create mode 100644 hw/vfio/user.h
 create mode 100644 hw/vfio/user.c

diff --git a/hw/vfio/user.h b/hw/vfio/user.h
new file mode 100644
index 0000000000..cdbc074579
--- /dev/null
+++ b/hw/vfio/user.h
@@ -0,0 +1,120 @@
+#ifndef VFIO_USER_H
+#define VFIO_USER_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specfic data, such as the
+ * region and offset info for read and write commands.
+ */
+
+/* commands */
+enum vfio_user_command {
+    VFIO_USER_VERSION                   = 1,
+    VFIO_USER_DMA_MAP                   = 2,
+    VFIO_USER_DMA_UNMAP                 = 3,
+    VFIO_USER_DEVICE_GET_INFO           = 4,
+    VFIO_USER_DEVICE_GET_REGION_INFO    = 5,
+    VFIO_USER_DEVICE_GET_REGION_IO_FDS  = 6,
+    VFIO_USER_DEVICE_GET_IRQ_INFO       = 7,
+    VFIO_USER_DEVICE_SET_IRQS           = 8,
+    VFIO_USER_REGION_READ               = 9,
+    VFIO_USER_REGION_WRITE              = 10,
+    VFIO_USER_DMA_READ                  = 11,
+    VFIO_USER_DMA_WRITE                 = 12,
+    VFIO_USER_DEVICE_RESET              = 13,
+    VFIO_USER_DIRTY_PAGES               = 14,
+    VFIO_USER_MAX,
+};
+
+/* flags */
+#define VFIO_USER_REQUEST       0x0
+#define VFIO_USER_REPLY         0x1
+#define VFIO_USER_TYPE          0xF
+
+#define VFIO_USER_NO_REPLY      0x10
+#define VFIO_USER_ERROR         0x20
+
+typedef struct vfio_user_hdr {
+    uint16_t id;
+    uint16_t command;
+    uint32_t size;
+    uint32_t flags;
+    uint32_t error_reply;
+} vfio_user_hdr_t;
+
+/*
+ * VFIO_USER_VERSION
+ */
+#define VFIO_USER_MAJOR_VER     0
+#define VFIO_USER_MINOR_VER     0
+
+struct vfio_user_version {
+    vfio_user_hdr_t hdr;
+    uint16_t major;
+    uint16_t minor;
+    char capabilities[];
+};
+
+#define VFIO_USER_DEF_MAX_FDS   8
+#define VFIO_USER_MAX_MAX_FDS   16
+
+#define VFIO_USER_DEF_MAX_XFER  (1024 * 1024)
+#define VFIO_USER_MAX_MAX_XFER  (64 * 1024 * 1024)
+
+typedef struct VFIOUserFDs {
+    int send_fds;
+    int recv_fds;
+    int *fds;
+} VFIOUserFDs;
+
+typedef struct VFIOUserReply {
+    QTAILQ_ENTRY(VFIOUserReply) next;
+    vfio_user_hdr_t *msg;
+    VFIOUserFDs *fds;
+    int rsize;
+    uint32_t id;
+    QemuCond cv;
+    uint8_t complete;
+} VFIOUserReply;
+
+enum proxy_state {
+    CONNECTED = 1,
+    RECV_ERROR = 2,
+    CLOSING = 3,
+    CLOSED = 4,
+};
+
+typedef struct VFIOProxy {
+    QLIST_ENTRY(VFIOProxy) next;
+    char *sockname;
+    struct QIOChannel *ioc;
+    int (*request)(void *opaque, char *buf, VFIOUserFDs *fds);
+    void *reqarg;
+    int flags;
+    QemuCond close_cv;
+
+    /*
+     * above only changed when iolock is held
+     * below are protected by per-proxy lock
+     */
+    QemuMutex lock;
+    QTAILQ_HEAD(, VFIOUserReply) free;
+    QTAILQ_HEAD(, VFIOUserReply) pending;
+    enum proxy_state state;
+    int close_wait;
+} VFIOProxy;
+
+#define VFIO_PROXY_CLIENT       0x1
+
+void vfio_user_recv(void *opaque);
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret);
+#endif /* VFIO_USER_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 8af11b0a76..f43dc6e5d0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
 } VFIOAddressSpace;
 
 struct VFIOGroup;
+typedef struct VFIOProxy VFIOProxy;
 
 typedef struct VFIOContainer {
     VFIOAddressSpace *space;
@@ -143,6 +144,7 @@ typedef struct VFIODevice {
     VFIOMigration *migration;
     Error *migration_blocker;
     OnOffAuto pre_copy_dirty_page_tracking;
+    VFIOProxy *proxy;
 } VFIODevice;
 
 struct VFIODeviceOps {
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
new file mode 100644
index 0000000000..021d5540e0
--- /dev/null
+++ b/hw/vfio/user.c
@@ -0,0 +1,286 @@
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/main-loop.h"
+#include "hw/hw.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/vfio/vfio.h"
+#include "qemu/sockets.h"
+#include "io/channel.h"
+#include "io/channel-util.h"
+#include "sysemu/iothread.h"
+#include "user.h"
+
+static uint64_t max_xfer_size = VFIO_USER_DEF_MAX_XFER;
+static IOThread *vfio_user_iothread;
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                                  VFIOUserFDs *fds);
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                           VFIOUserFDs *fds);
+static void vfio_user_shutdown(VFIOProxy *proxy);
+
+static void vfio_user_shutdown(VFIOProxy *proxy)
+{
+    qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
+    qio_channel_set_aio_fd_handler(proxy->ioc,
+                                   iothread_get_aio_context(vfio_user_iothread),
+                                   NULL, NULL, NULL);
+}
+
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret)
+{
+    vfio_user_hdr_t *hdr = (vfio_user_hdr_t *)buf;
+
+    /*
+     * convert header to associated reply
+     * positive ret is reply size, negative is error code
+     */
+    hdr->flags = VFIO_USER_REPLY;
+    if (ret > 0) {
+        hdr->size = ret;
+    } else if (ret < 0) {
+        hdr->flags |= VFIO_USER_ERROR;
+        hdr->error_reply = -ret;
+        hdr->size = sizeof(*hdr);
+    }
+    vfio_user_send(proxy, hdr, NULL);
+}
+
+void vfio_user_recv(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOProxy *proxy = vbasedev->proxy;
+    VFIOUserReply *reply = NULL;
+    g_autofree int *fdp = NULL;
+    VFIOUserFDs reqfds = { 0, 0, fdp };
+    vfio_user_hdr_t msg;
+    struct iovec iov = {
+        .iov_base = &msg,
+        .iov_len = sizeof(msg),
+    };
+    int isreply, i, ret;
+    size_t msgleft, numfds = 0;
+    char *data = NULL;
+    g_autofree char *buf = NULL;
+    Error *local_err = NULL;
+
+    qemu_mutex_lock(&proxy->lock);
+    if (proxy->state == CLOSING) {
+        qemu_mutex_unlock(&proxy->lock);
+        return;
+    }
+
+    ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds,
+                                 &local_err);
+    if (ret <= 0) {
+        /* read error or other side closed connection */
+        error_setg_errno(&local_err, errno, "vfio_user_recv read error");
+        goto fatal;
+    }
+
+    if (ret < sizeof(msg)) {
+        error_setg(&local_err, "vfio_user_recv short read of header");
+        goto err;
+    }
+
+    /*
+     * For replies, find the matching pending request
+     */
+    switch (msg.flags & VFIO_USER_TYPE) {
+    case VFIO_USER_REQUEST:
+        isreply = 0;
+        break;
+    case VFIO_USER_REPLY:
+        isreply = 1;
+        break;
+    default:
+        error_setg(&local_err, "vfio_user_recv unknown message type");
+        goto err;
+    }
+
+    if (isreply) {
+        QTAILQ_FOREACH(reply, &proxy->pending, next) {
+            if (msg.id == reply->id) {
+                break;
+            }
+        }
+        if (reply == NULL) {
+            error_setg(&local_err, "vfio_user_recv unexpected reply");
+            goto err;
+        }
+        QTAILQ_REMOVE(&proxy->pending, reply, next);
+
+        /*
+         * Process any received FDs
+         */
+        if (numfds != 0) {
+            if (reply->fds == NULL || reply->fds->recv_fds < numfds) {
+                error_setg(&local_err, "vfio_user_recv unexpected FDs");
+                goto err;
+            }
+            reply->fds->recv_fds = numfds;
+            memcpy(reply->fds->fds, fdp, numfds * sizeof(int));
+        }
+
+    } else {
+        /*
+         * The client doesn't expect any FDs in requests, but
+         * they will be expected on the server
+         */
+        if (numfds != 0 && (proxy->flags & VFIO_PROXY_CLIENT)) {
+            error_setg(&local_err, "vfio_user_recv fd in client reply");
+            goto err;
+        }
+        reqfds.recv_fds = numfds;
+    }
+
+    /*
+     * put the whole message into a single buffer
+     */
+    msgleft = msg.size - sizeof(msg);
+    if (isreply) {
+        if (msg.size > reply->rsize) {
+            error_setg(&local_err,
+                       "vfio_user_recv reply larger than recv buffer");
+            goto fatal;
+        }
+        *reply->msg = msg;
+        data = (char *)reply->msg + sizeof(msg);
+    } else {
+        if (msg.size > max_xfer_size) {
+            error_setg(&local_err, "vfio_user_recv request larger than max");
+            goto fatal;
+        }
+        buf = g_malloc0(msg.size);
+        memcpy(buf, &msg, sizeof(msg));
+        data = buf + sizeof(msg);
+    }
+
+    if (msgleft != 0) {
+        ret = qio_channel_read(proxy->ioc, data, msgleft, &local_err);
+        if (ret < 0) {
+            goto fatal;
+        }
+        if (ret != msgleft) {
+            error_setg(&local_err, "vfio_user_recv short read of msg body");
+            goto err;
+        }
+    }
+
+    /*
+     * Replies signal a waiter, requests get processed by vfio code
+     * that may assume the iothread lock is held.
+     */
+    qemu_mutex_unlock(&proxy->lock);
+    if (isreply) {
+        reply->complete = 1;
+        qemu_cond_signal(&reply->cv);
+    } else {
+        qemu_mutex_lock_iothread();
+        /*
+         * make sure proxy wasn't closed while we waited
+         * checking without holding the proxy lock is safe
+         * since state is only set to CLOSING when iolock is held
+         */
+        if (proxy->state != CLOSING) {
+            ret = proxy->request(proxy->reqarg, buf, &reqfds);
+            if (ret < 0 && !(msg.flags & VFIO_USER_NO_REPLY)) {
+                vfio_user_send_reply(proxy, buf, ret);
+            }
+        }
+        qemu_mutex_unlock_iothread();
+    }
+
+    return;
+ fatal:
+    vfio_user_shutdown(proxy);
+    proxy->state = RECV_ERROR;
+
+ err:
+    qemu_mutex_unlock(&proxy->lock);
+    for (i = 0; i < numfds; i++) {
+        close(fdp[i]);
+    }
+    if (reply != NULL) {
+        /* force an error to keep sending thread from hanging */
+        reply->msg->flags |= VFIO_USER_ERROR;
+        reply->msg->error_reply = EINVAL;
+        reply->complete = 1;
+        qemu_cond_signal(&reply->cv);
+    }
+    error_report_err(local_err);
+}
+
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                                  VFIOUserFDs *fds)
+{
+    struct iovec iov = {
+        .iov_base = msg,
+        .iov_len = msg->size,
+    };
+    size_t numfds = 0;
+    int msgleft, ret, *fdp = NULL;
+    char *buf;
+    Error *local_err = NULL;
+
+    if (proxy->state != CONNECTED) {
+        msg->flags |= VFIO_USER_ERROR;
+        msg->error_reply = ECONNRESET;
+        return;
+    }
+
+    if (fds != NULL && fds->send_fds != 0) {
+        numfds = fds->send_fds;
+        fdp = fds->fds;
+    }
+    ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, &local_err);
+    if (ret < 0) {
+        goto err;
+    }
+    if (ret == msg->size) {
+        return;
+    }
+
+    buf = iov.iov_base + ret;
+    msgleft = iov.iov_len - ret;
+    do {
+        ret = qio_channel_write(proxy->ioc, buf, msgleft, &local_err);
+        if (ret < 0) {
+            goto err;
+        }
+        buf += ret, msgleft -= ret;
+    } while (msgleft != 0);
+    return;
+
+ err:
+    error_report_err(local_err);
+}
+
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+                           VFIOUserFDs *fds)
+{
+    bool iolock = qemu_mutex_iothread_locked();
+
+    if (iolock) {
+        qemu_mutex_unlock_iothread();
+    }
+    qemu_mutex_lock(&proxy->lock);
+    vfio_user_send_locked(proxy, msg, fds);
+    qemu_mutex_unlock(&proxy->lock);
+    if (iolock) {
+        qemu_mutex_lock_iothread();
+    }
+}
diff --git a/MAINTAINERS b/MAINTAINERS
index 12d69f3a45..aa4df6c418 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1883,8 +1883,12 @@ L: qemu-s390x@nongnu.org
 vfio-user
 M: John G Johnson <john.g.johnson@oracle.com>
 M: Thanos Makatos <thanos.makatos@nutanix.com>
+M: Elena Ufimtseva <elena.ufimtseva@oracle.com>
+M: Jagannathan Raman <jag.raman@oracle.com>
 S: Supported
 F: docs/devel/vfio-user.rst
+F: hw/vfio/user.c
+F: hw/vfio/user.h
 
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index da9af297a0..739b30be73 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -8,6 +8,7 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
   'display.c',
   'pci-quirks.c',
   'pci.c',
+  'user.c',
 ))
 vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
 vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
-- 
2.25.1



  parent reply	other threads:[~2021-07-19  6:29 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-19  6:27 [PATCH RFC 00/19] vfio-user implementation Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 01/19] vfio-user: introduce vfio-user protocol specification Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 02/19] vfio-user: add VFIO base abstract class Elena Ufimtseva
2021-07-19  6:27 ` Elena Ufimtseva [this message]
2021-07-27 16:34   ` [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions Stefan Hajnoczi
2021-07-28 18:08     ` John Johnson
2021-07-29  8:06       ` Stefan Hajnoczi
2021-07-19  6:27 ` [PATCH RFC 04/19] vfio-user: Define type vfio_user_pci_dev_info Elena Ufimtseva
2021-07-28 10:16   ` Stefan Hajnoczi
2021-07-29  0:55     ` John Johnson
2021-07-29  8:22       ` Stefan Hajnoczi
2021-07-19  6:27 ` [PATCH RFC 05/19] vfio-user: connect vfio proxy to remote server Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 06/19] vfio-user: negotiate protocol with " Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 07/19] vfio-user: define vfio-user pci ops Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 08/19] vfio-user: VFIO container setup & teardown Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 09/19] vfio-user: get device info and get irq info Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 10/19] vfio-user: device region read/write Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 11/19] vfio-user: get region and DMA map/unmap operations Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 12/19] vfio-user: probe remote device's BARs Elena Ufimtseva
2021-07-19 22:59   ` Alex Williamson
2021-07-20  1:39     ` John Johnson
2021-07-20  3:01       ` Alex Williamson
2021-07-19  6:27 ` [PATCH RFC 13/19] vfio-user: respond to remote DMA read/write requests Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 14/19] vfio_user: setup MSI/X interrupts and PCI config operations Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 15/19] vfio-user: vfio user device realize Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 16/19] vfio-user: pci reset Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 17/19] vfio-user: probe remote device ROM BAR Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 18/19] vfio-user: migration support Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 19/19] vfio-user: add migration cli options and version negotiation Elena Ufimtseva
2021-07-19 20:00 ` [PATCH RFC server 00/11] vfio-user server in QEMU Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 01/11] vfio-user: build library Jagannathan Raman
2021-07-19 20:24     ` John Levon
2021-07-20 12:06       ` Jag Raman
2021-07-20 12:20         ` Marc-André Lureau
2021-07-20 13:09           ` John Levon
2021-07-19 20:00   ` [PATCH RFC server 02/11] vfio-user: define vfio-user object Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 03/11] vfio-user: instantiate vfio-user context Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 04/11] vfio-user: find and init PCI device Jagannathan Raman
2021-07-26 15:05     ` John Levon
2021-07-28 17:08       ` Jag Raman
2021-07-19 20:00   ` [PATCH RFC server 05/11] vfio-user: run vfio-user context Jagannathan Raman
2021-07-20 14:17     ` Thanos Makatos
2021-08-13 14:51       ` Jag Raman
2021-08-16 12:52         ` John Levon
2021-08-16 14:10           ` Jag Raman
2021-07-19 20:00   ` [PATCH RFC server 06/11] vfio-user: handle PCI config space accesses Jagannathan Raman
2021-07-26 15:10     ` John Levon
2021-07-19 20:00   ` [PATCH RFC server 07/11] vfio-user: handle DMA mappings Jagannathan Raman
2021-07-20 14:38     ` Thanos Makatos
2021-07-19 20:00   ` [PATCH RFC server 08/11] vfio-user: handle PCI BAR accesses Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 09/11] vfio-user: handle device interrupts Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 10/11] vfio-user: register handlers to facilitate migration Jagannathan Raman
2021-07-20 14:05     ` Thanos Makatos
2021-07-19 20:00   ` [PATCH RFC server 11/11] vfio-user: acceptance test Jagannathan Raman
2021-07-20 16:12     ` Thanos Makatos

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cd9d5d6214d957db61120d9c3cbdc99e799a3baa.1626675354.git.elena.ufimtseva@oracle.com \
    --to=elena.ufimtseva@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=jag.raman@oracle.com \
    --cc=john.g.johnson@oracle.com \
    --cc=john.levon@nutanix.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=swapnil.ingle@nutanix.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).