All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: "Eugenio Pérez" <eperezma@redhat.com>, qemu-devel@nongnu.org
Cc: Laurent Vivier <lvivier@redhat.com>,
	Parav Pandit <parav@mellanox.com>, Cindy Lu <lulu@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Markus Armbruster <armbru@redhat.com>,
	Gautam Dawar <gdawar@xilinx.com>,
	virtualization@lists.linux-foundation.org,
	Eduardo Habkost <ehabkost@redhat.com>,
	Harpreet Singh Anand <hanand@xilinx.com>,
	Xiao W Wang <xiao.w.wang@intel.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Eli Cohen <eli@mellanox.com>, Paolo Bonzini <pbonzini@redhat.com>,
	Zhu Lingshan <lingshan.zhu@intel.com>,
	Eric Blake <eblake@redhat.com>
Subject: Re: [PATCH 18/31] vhost: Shadow virtqueue buffers forwarding
Date: Sun, 30 Jan 2022 12:42:44 +0800	[thread overview]
Message-ID: <6c98f4e8-8695-ab83-ae37-2d6293a1fafa@redhat.com> (raw)
In-Reply-To: <20220121202733.404989-19-eperezma@redhat.com>


在 2022/1/22 上午4:27, Eugenio Pérez 写道:
> Initial version of shadow virtqueue that actually forward buffers. There
> is no iommu support at the moment, and that will be addressed in future
> patches of this series. Since all vhost-vdpa devices use forced IOMMU,
> this means that SVQ is not usable at this point of the series on any
> device.
>
> For simplicity it only supports modern devices, that expects vring
> in little endian, with split ring and no event idx or indirect
> descriptors. Support for them will not be added in this series.
>
> It reuses the VirtQueue code for the device part. The driver part is
> based on Linux's virtio_ring driver, but with stripped functionality
> and optimizations so it's easier to review.
>
> However, forwarding buffers have some particular pieces: One of the most
> unexpected ones is that a guest's buffer can expand through more than
> one descriptor in SVQ. While this is handled gracefully by qemu's
> emulated virtio devices, it may cause unexpected SVQ queue full. This
> patch also solves it by checking for this condition at both guest's
> kicks and device's calls. The code may be more elegant in the future if
> SVQ code runs in its own iocontext.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   hw/virtio/vhost-shadow-virtqueue.h |   2 +
>   hw/virtio/vhost-shadow-virtqueue.c | 365 ++++++++++++++++++++++++++++-
>   hw/virtio/vhost-vdpa.c             | 111 ++++++++-
>   3 files changed, 462 insertions(+), 16 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> index 39aef5ffdf..19c934af49 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -33,6 +33,8 @@ uint16_t vhost_svq_get_num(const VhostShadowVirtqueue *svq);
>   size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
>   size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
>   
> +void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
> +                     VirtQueue *vq);
>   void vhost_svq_stop(VhostShadowVirtqueue *svq);
>   
>   VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize);
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index 7c168075d7..a1a404f68f 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -9,6 +9,8 @@
>   
>   #include "qemu/osdep.h"
>   #include "hw/virtio/vhost-shadow-virtqueue.h"
> +#include "hw/virtio/vhost.h"
> +#include "hw/virtio/virtio-access.h"
>   #include "standard-headers/linux/vhost_types.h"
>   
>   #include "qemu/error-report.h"
> @@ -36,6 +38,33 @@ typedef struct VhostShadowVirtqueue {
>   
>       /* Guest's call notifier, where SVQ calls guest. */
>       EventNotifier svq_call;
> +
> +    /* Virtio queue shadowing */
> +    VirtQueue *vq;
> +
> +    /* Virtio device */
> +    VirtIODevice *vdev;
> +
> +    /* Map for returning guest's descriptors */
> +    VirtQueueElement **ring_id_maps;
> +
> +    /* Next VirtQueue element that guest made available */
> +    VirtQueueElement *next_guest_avail_elem;
> +
> +    /* Next head to expose to device */
> +    uint16_t avail_idx_shadow;
> +
> +    /* Next free descriptor */
> +    uint16_t free_head;
> +
> +    /* Last seen used idx */
> +    uint16_t shadow_used_idx;
> +
> +    /* Next head to consume from device */
> +    uint16_t last_used_idx;
> +
> +    /* Cache for the exposed notification flag */
> +    bool notification;
>   } VhostShadowVirtqueue;
>   
>   #define INVALID_SVQ_KICK_FD -1
> @@ -148,30 +177,294 @@ bool vhost_svq_ack_guest_features(uint64_t dev_features,
>       return true;
>   }
>   
> -/* Forward guest notifications */
> -static void vhost_handle_guest_kick(EventNotifier *n)
> +/**
> + * Number of descriptors that SVQ can make available from the guest.
> + *
> + * @svq   The svq
> + */
> +static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
>   {
> -    VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
> -                                             svq_kick);
> +    return svq->vring.num - (svq->avail_idx_shadow - svq->shadow_used_idx);
> +}
> +
> +static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
> +{
> +    uint16_t notification_flag;
>   
> -    if (unlikely(!event_notifier_test_and_clear(n))) {
> +    if (svq->notification == enable) {
> +        return;
> +    }
> +
> +    notification_flag = cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
> +
> +    svq->notification = enable;
> +    if (enable) {
> +        svq->vring.avail->flags &= ~notification_flag;
> +    } else {
> +        svq->vring.avail->flags |= notification_flag;
> +    }
> +}
> +
> +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> +                                    const struct iovec *iovec,
> +                                    size_t num, bool more_descs, bool write)
> +{
> +    uint16_t i = svq->free_head, last = svq->free_head;
> +    unsigned n;
> +    uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
> +    vring_desc_t *descs = svq->vring.desc;
> +
> +    if (num == 0) {
> +        return;
> +    }
> +
> +    for (n = 0; n < num; n++) {
> +        if (more_descs || (n + 1 < num)) {
> +            descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
> +        } else {
> +            descs[i].flags = flags;
> +        }
> +        descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
> +        descs[i].len = cpu_to_le32(iovec[n].iov_len);
> +
> +        last = i;
> +        i = cpu_to_le16(descs[i].next);
> +    }
> +
> +    svq->free_head = le16_to_cpu(descs[last].next);
> +}
> +
> +static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq,
> +                                    VirtQueueElement *elem)
> +{
> +    int head;
> +    unsigned avail_idx;
> +    vring_avail_t *avail = svq->vring.avail;
> +
> +    head = svq->free_head;
> +
> +    /* We need some descriptors here */
> +    assert(elem->out_num || elem->in_num);


Looks like this could be triggered by guest, we need fail instead assert 
here.


> +
> +    vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
> +                            elem->in_num > 0, false);
> +    vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
> +
> +    /*
> +     * Put entry in available array (but don't update avail->idx until they
> +     * do sync).
> +     */
> +    avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1);
> +    avail->ring[avail_idx] = cpu_to_le16(head);
> +    svq->avail_idx_shadow++;
> +
> +    /* Update avail index after the descriptor is wrote */
> +    smp_wmb();
> +    avail->idx = cpu_to_le16(svq->avail_idx_shadow);
> +
> +    return head;
> +}
> +
> +static void vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
> +{
> +    unsigned qemu_head = vhost_svq_add_split(svq, elem);
> +
> +    svq->ring_id_maps[qemu_head] = elem;
> +}
> +
> +static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> +{
> +    /* We need to expose available array entries before checking used flags */
> +    smp_mb();
> +    if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
>           return;
>       }
>   
>       event_notifier_set(&svq->hdev_kick);
>   }
>   
> -/* Forward vhost notifications */
> +/**
> + * Forward available buffers.
> + *
> + * @svq Shadow VirtQueue
> + *
> + * Note that this function does not guarantee that all guest's available
> + * buffers are available to the device in SVQ avail ring. The guest may have
> + * exposed a GPA / GIOVA congiuous buffer, but it may not be contiguous in qemu
> + * vaddr.
> + *
> + * If that happens, guest's kick notifications will be disabled until device
> + * makes some buffers used.
> + */
> +static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
> +{
> +    /* Clear event notifier */
> +    event_notifier_test_and_clear(&svq->svq_kick);
> +
> +    /* Make available as many buffers as possible */
> +    do {
> +        if (virtio_queue_get_notification(svq->vq)) {
> +            virtio_queue_set_notification(svq->vq, false);


This looks like an optimization the should belong to 
virtio_queue_set_notification() itself.


> +        }
> +
> +        while (true) {
> +            VirtQueueElement *elem;
> +
> +            if (svq->next_guest_avail_elem) {
> +                elem = g_steal_pointer(&svq->next_guest_avail_elem);
> +            } else {
> +                elem = virtqueue_pop(svq->vq, sizeof(*elem));
> +            }
> +
> +            if (!elem) {
> +                break;
> +            }
> +
> +            if (elem->out_num + elem->in_num >
> +                vhost_svq_available_slots(svq)) {
> +                /*
> +                 * This condition is possible since a contiguous buffer in GPA
> +                 * does not imply a contiguous buffer in qemu's VA
> +                 * scatter-gather segments. If that happen, the buffer exposed
> +                 * to the device needs to be a chain of descriptors at this
> +                 * moment.
> +                 *
> +                 * SVQ cannot hold more available buffers if we are here:
> +                 * queue the current guest descriptor and ignore further kicks
> +                 * until some elements are used.
> +                 */
> +                svq->next_guest_avail_elem = elem;
> +                return;
> +            }
> +
> +            vhost_svq_add(svq, elem);
> +            vhost_svq_kick(svq);
> +        }
> +
> +        virtio_queue_set_notification(svq->vq, true);
> +    } while (!virtio_queue_empty(svq->vq));
> +}
> +
> +/**
> + * Handle guest's kick.
> + *
> + * @n guest kick event notifier, the one that guest set to notify svq.
> + */
> +static void vhost_handle_guest_kick_notifier(EventNotifier *n)
> +{
> +    VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
> +                                             svq_kick);
> +    vhost_handle_guest_kick(svq);
> +}
> +
> +static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
> +{
> +    if (svq->last_used_idx != svq->shadow_used_idx) {
> +        return true;
> +    }
> +
> +    svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
> +
> +    return svq->last_used_idx != svq->shadow_used_idx;
> +}
> +
> +static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> +{
> +    vring_desc_t *descs = svq->vring.desc;
> +    const vring_used_t *used = svq->vring.used;
> +    vring_used_elem_t used_elem;
> +    uint16_t last_used;
> +
> +    if (!vhost_svq_more_used(svq)) {
> +        return NULL;
> +    }
> +
> +    /* Only get used array entries after they have been exposed by dev */
> +    smp_rmb();
> +    last_used = svq->last_used_idx & (svq->vring.num - 1);
> +    used_elem.id = le32_to_cpu(used->ring[last_used].id);
> +    used_elem.len = le32_to_cpu(used->ring[last_used].len);
> +
> +    svq->last_used_idx++;
> +    if (unlikely(used_elem.id >= svq->vring.num)) {
> +        error_report("Device %s says index %u is used", svq->vdev->name,
> +                     used_elem.id);
> +        return NULL;
> +    }
> +
> +    if (unlikely(!svq->ring_id_maps[used_elem.id])) {
> +        error_report(
> +            "Device %s says index %u is used, but it was not available",
> +            svq->vdev->name, used_elem.id);
> +        return NULL;
> +    }
> +
> +    descs[used_elem.id].next = svq->free_head;
> +    svq->free_head = used_elem.id;
> +
> +    svq->ring_id_maps[used_elem.id]->len = used_elem.len;
> +    return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
> +}
> +
> +static void vhost_svq_flush(VhostShadowVirtqueue *svq,
> +                            bool check_for_avail_queue)
> +{
> +    VirtQueue *vq = svq->vq;
> +
> +    /* Make as many buffers as possible used. */
> +    do {
> +        unsigned i = 0;
> +
> +        vhost_svq_set_notification(svq, false);
> +        while (true) {
> +            g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq);
> +            if (!elem) {
> +                break;
> +            }
> +
> +            if (unlikely(i >= svq->vring.num)) {
> +                virtio_error(svq->vdev,
> +                         "More than %u used buffers obtained in a %u size SVQ",
> +                         i, svq->vring.num);
> +                virtqueue_fill(vq, elem, elem->len, i);
> +                virtqueue_flush(vq, i);


Let's simply use virtqueue_push() here?


> +                i = 0;


Do we need to bail out here?


> +            }
> +            virtqueue_fill(vq, elem, elem->len, i++);
> +        }
> +
> +        virtqueue_flush(vq, i);
> +        event_notifier_set(&svq->svq_call);
> +
> +        if (check_for_avail_queue && svq->next_guest_avail_elem) {
> +            /*
> +             * Avail ring was full when vhost_svq_flush was called, so it's a
> +             * good moment to make more descriptors available if possible
> +             */
> +            vhost_handle_guest_kick(svq);


Is there better to have a similar check as vhost_handle_guest_kick() did?

             if (elem->out_num + elem->in_num >
                 vhost_svq_available_slots(svq)) {


> +        }
> +
> +        vhost_svq_set_notification(svq, true);


A mb() is needed here? Otherwise we may lost a call here (where 
vhost_svq_more_used() is run before vhost_svq_set_notification()).


> +    } while (vhost_svq_more_used(svq));
> +}
> +
> +/**
> + * Forward used buffers.
> + *
> + * @n hdev call event notifier, the one that device set to notify svq.
> + *
> + * Note that we are not making any buffers available in the loop, there is no
> + * way that it runs more than virtqueue size times.
> + */
>   static void vhost_svq_handle_call(EventNotifier *n)
>   {
>       VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
>                                                hdev_call);
>   
> -    if (unlikely(!event_notifier_test_and_clear(n))) {
> -        return;
> -    }
> +    /* Clear event notifier */
> +    event_notifier_test_and_clear(n);


Any reason that we remove the above check?


>   
> -    event_notifier_set(&svq->svq_call);
> +    vhost_svq_flush(svq, true);
>   }
>   
>   /**
> @@ -258,13 +551,38 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
>        * need to explicitely check for them.
>        */
>       event_notifier_init_fd(&svq->svq_kick, svq_kick_fd);
> -    event_notifier_set_handler(&svq->svq_kick, vhost_handle_guest_kick);
> +    event_notifier_set_handler(&svq->svq_kick,
> +                               vhost_handle_guest_kick_notifier);
>   
>       if (!check_old || event_notifier_test_and_clear(&tmp)) {
>           event_notifier_set(&svq->hdev_kick);
>       }
>   }
>   
> +/**
> + * Start shadow virtqueue operation.
> + *
> + * @svq Shadow Virtqueue
> + * @vdev        VirtIO device
> + * @vq          Virtqueue to shadow
> + */
> +void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
> +                     VirtQueue *vq)
> +{
> +    svq->next_guest_avail_elem = NULL;
> +    svq->avail_idx_shadow = 0;
> +    svq->shadow_used_idx = 0;
> +    svq->last_used_idx = 0;
> +    svq->vdev = vdev;
> +    svq->vq = vq;
> +
> +    memset(svq->vring.avail, 0, sizeof(*svq->vring.avail));
> +    memset(svq->vring.used, 0, sizeof(*svq->vring.avail));
> +    for (unsigned i = 0; i < svq->vring.num - 1; i++) {
> +        svq->vring.desc[i].next = cpu_to_le16(i + 1);
> +    }
> +}
> +
>   /**
>    * Stop shadow virtqueue operation.
>    * @svq Shadow Virtqueue
> @@ -272,6 +590,28 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
>   void vhost_svq_stop(VhostShadowVirtqueue *svq)
>   {
>       event_notifier_set_handler(&svq->svq_kick, NULL);
> +    g_autofree VirtQueueElement *next_avail_elem = NULL;
> +
> +    if (!svq->vq) {
> +        return;
> +    }
> +
> +    /* Send all pending used descriptors to guest */
> +    vhost_svq_flush(svq, false);
> +
> +    for (unsigned i = 0; i < svq->vring.num; ++i) {
> +        g_autofree VirtQueueElement *elem = NULL;
> +        elem = g_steal_pointer(&svq->ring_id_maps[i]);
> +        if (elem) {
> +            virtqueue_detach_element(svq->vq, elem, elem->len);
> +        }
> +    }
> +
> +    next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
> +    if (next_avail_elem) {
> +        virtqueue_detach_element(svq->vq, next_avail_elem,
> +                                 next_avail_elem->len);
> +    }
>   }
>   
>   /**
> @@ -316,7 +656,7 @@ VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize)
>       memset(svq->vring.desc, 0, driver_size);
>       svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
>       memset(svq->vring.used, 0, device_size);
> -
> +    svq->ring_id_maps = g_new0(VirtQueueElement *, qsize);
>       event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
>       return g_steal_pointer(&svq);
>   
> @@ -335,6 +675,7 @@ void vhost_svq_free(VhostShadowVirtqueue *vq)
>       event_notifier_cleanup(&vq->hdev_kick);
>       event_notifier_set_handler(&vq->hdev_call, NULL);
>       event_notifier_cleanup(&vq->hdev_call);
> +    g_free(vq->ring_id_maps);
>       qemu_vfree(vq->vring.desc);
>       qemu_vfree(vq->vring.used);
>       g_free(vq);
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 53e14bafa0..0e5c00ed7e 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -752,9 +752,9 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
>    * Note that this function does not rewind kick file descriptor if cannot set
>    * call one.
>    */
> -static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
> -                                VhostShadowVirtqueue *svq,
> -                                unsigned idx)
> +static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
> +                                  VhostShadowVirtqueue *svq,
> +                                  unsigned idx)
>   {
>       struct vhost_vring_file file = {
>           .index = dev->vq_index + idx,
> @@ -767,7 +767,7 @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
>       r = vhost_vdpa_set_vring_dev_kick(dev, &file);
>       if (unlikely(r != 0)) {
>           error_report("Can't set device kick fd (%d)", -r);
> -        return false;
> +        return r;
>       }
>   
>       event_notifier = vhost_svq_get_svq_call_notifier(svq);
> @@ -777,6 +777,99 @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
>           error_report("Can't set device call fd (%d)", -r);
>       }
>   
> +    return r;
> +}
> +
> +/**
> + * Unmap SVQ area in the device
> + */
> +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
> +                                      hwaddr size)
> +{
> +    int r;
> +
> +    size = ROUND_UP(size, qemu_real_host_page_size);
> +    r = vhost_vdpa_dma_unmap(v, iova, size);
> +    return r == 0;
> +}
> +
> +static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
> +                                       const VhostShadowVirtqueue *svq)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    struct vhost_vring_addr svq_addr;
> +    size_t device_size = vhost_svq_device_area_size(svq);
> +    size_t driver_size = vhost_svq_driver_area_size(svq);
> +    bool ok;
> +
> +    vhost_svq_get_vring_addr(svq, &svq_addr);
> +
> +    ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
> +}
> +
> +/**
> + * Map shadow virtqueue rings in device
> + *
> + * @dev   The vhost device
> + * @svq   The shadow virtqueue
> + */
> +static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
> +                                     const VhostShadowVirtqueue *svq)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    struct vhost_vring_addr svq_addr;
> +    size_t device_size = vhost_svq_device_area_size(svq);
> +    size_t driver_size = vhost_svq_driver_area_size(svq);
> +    int r;
> +
> +    vhost_svq_get_vring_addr(svq, &svq_addr);
> +
> +    r = vhost_vdpa_dma_map(v, svq_addr.desc_user_addr, driver_size,
> +                           (void *)svq_addr.desc_user_addr, true);
> +    if (unlikely(r != 0)) {
> +        return false;
> +    }
> +
> +    r = vhost_vdpa_dma_map(v, svq_addr.used_user_addr, device_size,
> +                           (void *)svq_addr.used_user_addr, false);


Do we need unmap the driver area if we fail here?

Thanks


> +    return r == 0;
> +}
> +
> +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
> +                                VhostShadowVirtqueue *svq,
> +                                unsigned idx)
> +{
> +    uint16_t vq_index = dev->vq_index + idx;
> +    struct vhost_vring_state s = {
> +        .index = vq_index,
> +    };
> +    int r;
> +    bool ok;
> +
> +    r = vhost_vdpa_set_dev_vring_base(dev, &s);
> +    if (unlikely(r)) {
> +        error_report("Can't set vring base (%d)", r);
> +        return false;
> +    }
> +
> +    s.num = vhost_svq_get_num(svq);
> +    r = vhost_vdpa_set_dev_vring_num(dev, &s);
> +    if (unlikely(r)) {
> +        error_report("Can't set vring num (%d)", r);
> +        return false;
> +    }
> +
> +    ok = vhost_vdpa_svq_map_rings(dev, svq);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    r = vhost_vdpa_svq_set_fds(dev, svq, idx);
>       return r == 0;
>   }
>   
> @@ -788,14 +881,24 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>       if (started) {
>           vhost_vdpa_host_notifiers_init(dev);
>           for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
> +            VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
>               VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
>               bool ok = vhost_vdpa_svq_setup(dev, svq, i);
>               if (unlikely(!ok)) {
>                   return -1;
>               }
> +            vhost_svq_start(svq, dev->vdev, vq);
>           }
>           vhost_vdpa_set_vring_ready(dev);
>       } else {
> +        for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
> +            VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
> +                                                          i);
> +            bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
> +            if (unlikely(!ok)) {
> +                return -1;
> +            }
> +        }
>           vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
>       }
>   

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

WARNING: multiple messages have this Message-ID (diff)
From: Jason Wang <jasowang@redhat.com>
To: "Eugenio Pérez" <eperezma@redhat.com>, qemu-devel@nongnu.org
Cc: Laurent Vivier <lvivier@redhat.com>,
	Parav Pandit <parav@mellanox.com>, Cindy Lu <lulu@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Juan Quintela <quintela@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Markus Armbruster <armbru@redhat.com>,
	Gautam Dawar <gdawar@xilinx.com>,
	virtualization@lists.linux-foundation.org,
	Eduardo Habkost <ehabkost@redhat.com>,
	Harpreet Singh Anand <hanand@xilinx.com>,
	Xiao W Wang <xiao.w.wang@intel.com>, Peter Xu <peterx@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Eli Cohen <eli@mellanox.com>, Paolo Bonzini <pbonzini@redhat.com>,
	Zhu Lingshan <lingshan.zhu@intel.com>,
	Eric Blake <eblake@redhat.com>,
	Stefano Garzarella <sgarzare@redhat.com>
Subject: Re: [PATCH 18/31] vhost: Shadow virtqueue buffers forwarding
Date: Sun, 30 Jan 2022 12:42:44 +0800	[thread overview]
Message-ID: <6c98f4e8-8695-ab83-ae37-2d6293a1fafa@redhat.com> (raw)
In-Reply-To: <20220121202733.404989-19-eperezma@redhat.com>


在 2022/1/22 上午4:27, Eugenio Pérez 写道:
> Initial version of shadow virtqueue that actually forward buffers. There
> is no iommu support at the moment, and that will be addressed in future
> patches of this series. Since all vhost-vdpa devices use forced IOMMU,
> this means that SVQ is not usable at this point of the series on any
> device.
>
> For simplicity it only supports modern devices, that expects vring
> in little endian, with split ring and no event idx or indirect
> descriptors. Support for them will not be added in this series.
>
> It reuses the VirtQueue code for the device part. The driver part is
> based on Linux's virtio_ring driver, but with stripped functionality
> and optimizations so it's easier to review.
>
> However, forwarding buffers have some particular pieces: One of the most
> unexpected ones is that a guest's buffer can expand through more than
> one descriptor in SVQ. While this is handled gracefully by qemu's
> emulated virtio devices, it may cause unexpected SVQ queue full. This
> patch also solves it by checking for this condition at both guest's
> kicks and device's calls. The code may be more elegant in the future if
> SVQ code runs in its own iocontext.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   hw/virtio/vhost-shadow-virtqueue.h |   2 +
>   hw/virtio/vhost-shadow-virtqueue.c | 365 ++++++++++++++++++++++++++++-
>   hw/virtio/vhost-vdpa.c             | 111 ++++++++-
>   3 files changed, 462 insertions(+), 16 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> index 39aef5ffdf..19c934af49 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -33,6 +33,8 @@ uint16_t vhost_svq_get_num(const VhostShadowVirtqueue *svq);
>   size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
>   size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
>   
> +void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
> +                     VirtQueue *vq);
>   void vhost_svq_stop(VhostShadowVirtqueue *svq);
>   
>   VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize);
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index 7c168075d7..a1a404f68f 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -9,6 +9,8 @@
>   
>   #include "qemu/osdep.h"
>   #include "hw/virtio/vhost-shadow-virtqueue.h"
> +#include "hw/virtio/vhost.h"
> +#include "hw/virtio/virtio-access.h"
>   #include "standard-headers/linux/vhost_types.h"
>   
>   #include "qemu/error-report.h"
> @@ -36,6 +38,33 @@ typedef struct VhostShadowVirtqueue {
>   
>       /* Guest's call notifier, where SVQ calls guest. */
>       EventNotifier svq_call;
> +
> +    /* Virtio queue shadowing */
> +    VirtQueue *vq;
> +
> +    /* Virtio device */
> +    VirtIODevice *vdev;
> +
> +    /* Map for returning guest's descriptors */
> +    VirtQueueElement **ring_id_maps;
> +
> +    /* Next VirtQueue element that guest made available */
> +    VirtQueueElement *next_guest_avail_elem;
> +
> +    /* Next head to expose to device */
> +    uint16_t avail_idx_shadow;
> +
> +    /* Next free descriptor */
> +    uint16_t free_head;
> +
> +    /* Last seen used idx */
> +    uint16_t shadow_used_idx;
> +
> +    /* Next head to consume from device */
> +    uint16_t last_used_idx;
> +
> +    /* Cache for the exposed notification flag */
> +    bool notification;
>   } VhostShadowVirtqueue;
>   
>   #define INVALID_SVQ_KICK_FD -1
> @@ -148,30 +177,294 @@ bool vhost_svq_ack_guest_features(uint64_t dev_features,
>       return true;
>   }
>   
> -/* Forward guest notifications */
> -static void vhost_handle_guest_kick(EventNotifier *n)
> +/**
> + * Number of descriptors that SVQ can make available from the guest.
> + *
> + * @svq   The svq
> + */
> +static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
>   {
> -    VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
> -                                             svq_kick);
> +    return svq->vring.num - (svq->avail_idx_shadow - svq->shadow_used_idx);
> +}
> +
> +static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
> +{
> +    uint16_t notification_flag;
>   
> -    if (unlikely(!event_notifier_test_and_clear(n))) {
> +    if (svq->notification == enable) {
> +        return;
> +    }
> +
> +    notification_flag = cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
> +
> +    svq->notification = enable;
> +    if (enable) {
> +        svq->vring.avail->flags &= ~notification_flag;
> +    } else {
> +        svq->vring.avail->flags |= notification_flag;
> +    }
> +}
> +
> +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> +                                    const struct iovec *iovec,
> +                                    size_t num, bool more_descs, bool write)
> +{
> +    uint16_t i = svq->free_head, last = svq->free_head;
> +    unsigned n;
> +    uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
> +    vring_desc_t *descs = svq->vring.desc;
> +
> +    if (num == 0) {
> +        return;
> +    }
> +
> +    for (n = 0; n < num; n++) {
> +        if (more_descs || (n + 1 < num)) {
> +            descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
> +        } else {
> +            descs[i].flags = flags;
> +        }
> +        descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
> +        descs[i].len = cpu_to_le32(iovec[n].iov_len);
> +
> +        last = i;
> +        i = cpu_to_le16(descs[i].next);
> +    }
> +
> +    svq->free_head = le16_to_cpu(descs[last].next);
> +}
> +
> +static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq,
> +                                    VirtQueueElement *elem)
> +{
> +    int head;
> +    unsigned avail_idx;
> +    vring_avail_t *avail = svq->vring.avail;
> +
> +    head = svq->free_head;
> +
> +    /* We need some descriptors here */
> +    assert(elem->out_num || elem->in_num);


Looks like this could be triggered by guest, we need fail instead assert 
here.


> +
> +    vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
> +                            elem->in_num > 0, false);
> +    vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
> +
> +    /*
> +     * Put entry in available array (but don't update avail->idx until they
> +     * do sync).
> +     */
> +    avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1);
> +    avail->ring[avail_idx] = cpu_to_le16(head);
> +    svq->avail_idx_shadow++;
> +
> +    /* Update avail index after the descriptor is wrote */
> +    smp_wmb();
> +    avail->idx = cpu_to_le16(svq->avail_idx_shadow);
> +
> +    return head;
> +}
> +
> +static void vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
> +{
> +    unsigned qemu_head = vhost_svq_add_split(svq, elem);
> +
> +    svq->ring_id_maps[qemu_head] = elem;
> +}
> +
> +static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> +{
> +    /* We need to expose available array entries before checking used flags */
> +    smp_mb();
> +    if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
>           return;
>       }
>   
>       event_notifier_set(&svq->hdev_kick);
>   }
>   
> -/* Forward vhost notifications */
> +/**
> + * Forward available buffers.
> + *
> + * @svq Shadow VirtQueue
> + *
> + * Note that this function does not guarantee that all guest's available
> + * buffers are available to the device in SVQ avail ring. The guest may have
> + * exposed a GPA / GIOVA congiuous buffer, but it may not be contiguous in qemu
> + * vaddr.
> + *
> + * If that happens, guest's kick notifications will be disabled until device
> + * makes some buffers used.
> + */
> +static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
> +{
> +    /* Clear event notifier */
> +    event_notifier_test_and_clear(&svq->svq_kick);
> +
> +    /* Make available as many buffers as possible */
> +    do {
> +        if (virtio_queue_get_notification(svq->vq)) {
> +            virtio_queue_set_notification(svq->vq, false);


This looks like an optimization the should belong to 
virtio_queue_set_notification() itself.


> +        }
> +
> +        while (true) {
> +            VirtQueueElement *elem;
> +
> +            if (svq->next_guest_avail_elem) {
> +                elem = g_steal_pointer(&svq->next_guest_avail_elem);
> +            } else {
> +                elem = virtqueue_pop(svq->vq, sizeof(*elem));
> +            }
> +
> +            if (!elem) {
> +                break;
> +            }
> +
> +            if (elem->out_num + elem->in_num >
> +                vhost_svq_available_slots(svq)) {
> +                /*
> +                 * This condition is possible since a contiguous buffer in GPA
> +                 * does not imply a contiguous buffer in qemu's VA
> +                 * scatter-gather segments. If that happen, the buffer exposed
> +                 * to the device needs to be a chain of descriptors at this
> +                 * moment.
> +                 *
> +                 * SVQ cannot hold more available buffers if we are here:
> +                 * queue the current guest descriptor and ignore further kicks
> +                 * until some elements are used.
> +                 */
> +                svq->next_guest_avail_elem = elem;
> +                return;
> +            }
> +
> +            vhost_svq_add(svq, elem);
> +            vhost_svq_kick(svq);
> +        }
> +
> +        virtio_queue_set_notification(svq->vq, true);
> +    } while (!virtio_queue_empty(svq->vq));
> +}
> +
> +/**
> + * Handle guest's kick.
> + *
> + * @n guest kick event notifier, the one that guest set to notify svq.
> + */
> +static void vhost_handle_guest_kick_notifier(EventNotifier *n)
> +{
> +    VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
> +                                             svq_kick);
> +    vhost_handle_guest_kick(svq);
> +}
> +
> +static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
> +{
> +    if (svq->last_used_idx != svq->shadow_used_idx) {
> +        return true;
> +    }
> +
> +    svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
> +
> +    return svq->last_used_idx != svq->shadow_used_idx;
> +}
> +
> +static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> +{
> +    vring_desc_t *descs = svq->vring.desc;
> +    const vring_used_t *used = svq->vring.used;
> +    vring_used_elem_t used_elem;
> +    uint16_t last_used;
> +
> +    if (!vhost_svq_more_used(svq)) {
> +        return NULL;
> +    }
> +
> +    /* Only get used array entries after they have been exposed by dev */
> +    smp_rmb();
> +    last_used = svq->last_used_idx & (svq->vring.num - 1);
> +    used_elem.id = le32_to_cpu(used->ring[last_used].id);
> +    used_elem.len = le32_to_cpu(used->ring[last_used].len);
> +
> +    svq->last_used_idx++;
> +    if (unlikely(used_elem.id >= svq->vring.num)) {
> +        error_report("Device %s says index %u is used", svq->vdev->name,
> +                     used_elem.id);
> +        return NULL;
> +    }
> +
> +    if (unlikely(!svq->ring_id_maps[used_elem.id])) {
> +        error_report(
> +            "Device %s says index %u is used, but it was not available",
> +            svq->vdev->name, used_elem.id);
> +        return NULL;
> +    }
> +
> +    descs[used_elem.id].next = svq->free_head;
> +    svq->free_head = used_elem.id;
> +
> +    svq->ring_id_maps[used_elem.id]->len = used_elem.len;
> +    return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
> +}
> +
> +static void vhost_svq_flush(VhostShadowVirtqueue *svq,
> +                            bool check_for_avail_queue)
> +{
> +    VirtQueue *vq = svq->vq;
> +
> +    /* Make as many buffers as possible used. */
> +    do {
> +        unsigned i = 0;
> +
> +        vhost_svq_set_notification(svq, false);
> +        while (true) {
> +            g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq);
> +            if (!elem) {
> +                break;
> +            }
> +
> +            if (unlikely(i >= svq->vring.num)) {
> +                virtio_error(svq->vdev,
> +                         "More than %u used buffers obtained in a %u size SVQ",
> +                         i, svq->vring.num);
> +                virtqueue_fill(vq, elem, elem->len, i);
> +                virtqueue_flush(vq, i);


Let's simply use virtqueue_push() here?


> +                i = 0;


Do we need to bail out here?


> +            }
> +            virtqueue_fill(vq, elem, elem->len, i++);
> +        }
> +
> +        virtqueue_flush(vq, i);
> +        event_notifier_set(&svq->svq_call);
> +
> +        if (check_for_avail_queue && svq->next_guest_avail_elem) {
> +            /*
> +             * Avail ring was full when vhost_svq_flush was called, so it's a
> +             * good moment to make more descriptors available if possible
> +             */
> +            vhost_handle_guest_kick(svq);


Is there better to have a similar check as vhost_handle_guest_kick() did?

             if (elem->out_num + elem->in_num >
                 vhost_svq_available_slots(svq)) {


> +        }
> +
> +        vhost_svq_set_notification(svq, true);


A mb() is needed here? Otherwise we may lost a call here (where 
vhost_svq_more_used() is run before vhost_svq_set_notification()).


> +    } while (vhost_svq_more_used(svq));
> +}
> +
> +/**
> + * Forward used buffers.
> + *
> + * @n hdev call event notifier, the one that device set to notify svq.
> + *
> + * Note that we are not making any buffers available in the loop, there is no
> + * way that it runs more than virtqueue size times.
> + */
>   static void vhost_svq_handle_call(EventNotifier *n)
>   {
>       VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
>                                                hdev_call);
>   
> -    if (unlikely(!event_notifier_test_and_clear(n))) {
> -        return;
> -    }
> +    /* Clear event notifier */
> +    event_notifier_test_and_clear(n);


Any reason that we remove the above check?


>   
> -    event_notifier_set(&svq->svq_call);
> +    vhost_svq_flush(svq, true);
>   }
>   
>   /**
> @@ -258,13 +551,38 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
>        * need to explicitely check for them.
>        */
>       event_notifier_init_fd(&svq->svq_kick, svq_kick_fd);
> -    event_notifier_set_handler(&svq->svq_kick, vhost_handle_guest_kick);
> +    event_notifier_set_handler(&svq->svq_kick,
> +                               vhost_handle_guest_kick_notifier);
>   
>       if (!check_old || event_notifier_test_and_clear(&tmp)) {
>           event_notifier_set(&svq->hdev_kick);
>       }
>   }
>   
> +/**
> + * Start shadow virtqueue operation.
> + *
> + * @svq Shadow Virtqueue
> + * @vdev        VirtIO device
> + * @vq          Virtqueue to shadow
> + */
> +void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
> +                     VirtQueue *vq)
> +{
> +    svq->next_guest_avail_elem = NULL;
> +    svq->avail_idx_shadow = 0;
> +    svq->shadow_used_idx = 0;
> +    svq->last_used_idx = 0;
> +    svq->vdev = vdev;
> +    svq->vq = vq;
> +
> +    memset(svq->vring.avail, 0, sizeof(*svq->vring.avail));
> +    memset(svq->vring.used, 0, sizeof(*svq->vring.avail));
> +    for (unsigned i = 0; i < svq->vring.num - 1; i++) {
> +        svq->vring.desc[i].next = cpu_to_le16(i + 1);
> +    }
> +}
> +
>   /**
>    * Stop shadow virtqueue operation.
>    * @svq Shadow Virtqueue
> @@ -272,6 +590,28 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
>   void vhost_svq_stop(VhostShadowVirtqueue *svq)
>   {
>       event_notifier_set_handler(&svq->svq_kick, NULL);
> +    g_autofree VirtQueueElement *next_avail_elem = NULL;
> +
> +    if (!svq->vq) {
> +        return;
> +    }
> +
> +    /* Send all pending used descriptors to guest */
> +    vhost_svq_flush(svq, false);
> +
> +    for (unsigned i = 0; i < svq->vring.num; ++i) {
> +        g_autofree VirtQueueElement *elem = NULL;
> +        elem = g_steal_pointer(&svq->ring_id_maps[i]);
> +        if (elem) {
> +            virtqueue_detach_element(svq->vq, elem, elem->len);
> +        }
> +    }
> +
> +    next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
> +    if (next_avail_elem) {
> +        virtqueue_detach_element(svq->vq, next_avail_elem,
> +                                 next_avail_elem->len);
> +    }
>   }
>   
>   /**
> @@ -316,7 +656,7 @@ VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize)
>       memset(svq->vring.desc, 0, driver_size);
>       svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
>       memset(svq->vring.used, 0, device_size);
> -
> +    svq->ring_id_maps = g_new0(VirtQueueElement *, qsize);
>       event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
>       return g_steal_pointer(&svq);
>   
> @@ -335,6 +675,7 @@ void vhost_svq_free(VhostShadowVirtqueue *vq)
>       event_notifier_cleanup(&vq->hdev_kick);
>       event_notifier_set_handler(&vq->hdev_call, NULL);
>       event_notifier_cleanup(&vq->hdev_call);
> +    g_free(vq->ring_id_maps);
>       qemu_vfree(vq->vring.desc);
>       qemu_vfree(vq->vring.used);
>       g_free(vq);
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 53e14bafa0..0e5c00ed7e 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -752,9 +752,9 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
>    * Note that this function does not rewind kick file descriptor if cannot set
>    * call one.
>    */
> -static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
> -                                VhostShadowVirtqueue *svq,
> -                                unsigned idx)
> +static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
> +                                  VhostShadowVirtqueue *svq,
> +                                  unsigned idx)
>   {
>       struct vhost_vring_file file = {
>           .index = dev->vq_index + idx,
> @@ -767,7 +767,7 @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
>       r = vhost_vdpa_set_vring_dev_kick(dev, &file);
>       if (unlikely(r != 0)) {
>           error_report("Can't set device kick fd (%d)", -r);
> -        return false;
> +        return r;
>       }
>   
>       event_notifier = vhost_svq_get_svq_call_notifier(svq);
> @@ -777,6 +777,99 @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
>           error_report("Can't set device call fd (%d)", -r);
>       }
>   
> +    return r;
> +}
> +
> +/**
> + * Unmap SVQ area in the device
> + */
> +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
> +                                      hwaddr size)
> +{
> +    int r;
> +
> +    size = ROUND_UP(size, qemu_real_host_page_size);
> +    r = vhost_vdpa_dma_unmap(v, iova, size);
> +    return r == 0;
> +}
> +
> +static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
> +                                       const VhostShadowVirtqueue *svq)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    struct vhost_vring_addr svq_addr;
> +    size_t device_size = vhost_svq_device_area_size(svq);
> +    size_t driver_size = vhost_svq_driver_area_size(svq);
> +    bool ok;
> +
> +    vhost_svq_get_vring_addr(svq, &svq_addr);
> +
> +    ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
> +}
> +
> +/**
> + * Map shadow virtqueue rings in device
> + *
> + * @dev   The vhost device
> + * @svq   The shadow virtqueue
> + */
> +static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
> +                                     const VhostShadowVirtqueue *svq)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    struct vhost_vring_addr svq_addr;
> +    size_t device_size = vhost_svq_device_area_size(svq);
> +    size_t driver_size = vhost_svq_driver_area_size(svq);
> +    int r;
> +
> +    vhost_svq_get_vring_addr(svq, &svq_addr);
> +
> +    r = vhost_vdpa_dma_map(v, svq_addr.desc_user_addr, driver_size,
> +                           (void *)svq_addr.desc_user_addr, true);
> +    if (unlikely(r != 0)) {
> +        return false;
> +    }
> +
> +    r = vhost_vdpa_dma_map(v, svq_addr.used_user_addr, device_size,
> +                           (void *)svq_addr.used_user_addr, false);


Do we need unmap the driver area if we fail here?

Thanks


> +    return r == 0;
> +}
> +
> +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
> +                                VhostShadowVirtqueue *svq,
> +                                unsigned idx)
> +{
> +    uint16_t vq_index = dev->vq_index + idx;
> +    struct vhost_vring_state s = {
> +        .index = vq_index,
> +    };
> +    int r;
> +    bool ok;
> +
> +    r = vhost_vdpa_set_dev_vring_base(dev, &s);
> +    if (unlikely(r)) {
> +        error_report("Can't set vring base (%d)", r);
> +        return false;
> +    }
> +
> +    s.num = vhost_svq_get_num(svq);
> +    r = vhost_vdpa_set_dev_vring_num(dev, &s);
> +    if (unlikely(r)) {
> +        error_report("Can't set vring num (%d)", r);
> +        return false;
> +    }
> +
> +    ok = vhost_vdpa_svq_map_rings(dev, svq);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    r = vhost_vdpa_svq_set_fds(dev, svq, idx);
>       return r == 0;
>   }
>   
> @@ -788,14 +881,24 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>       if (started) {
>           vhost_vdpa_host_notifiers_init(dev);
>           for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
> +            VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
>               VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
>               bool ok = vhost_vdpa_svq_setup(dev, svq, i);
>               if (unlikely(!ok)) {
>                   return -1;
>               }
> +            vhost_svq_start(svq, dev->vdev, vq);
>           }
>           vhost_vdpa_set_vring_ready(dev);
>       } else {
> +        for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
> +            VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
> +                                                          i);
> +            bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
> +            if (unlikely(!ok)) {
> +                return -1;
> +            }
> +        }
>           vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
>       }
>   



  reply	other threads:[~2022-01-30  4:43 UTC|newest]

Thread overview: 182+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-21 20:27 [PATCH 00/31] vDPA shadow virtqueue Eugenio Pérez
2022-01-21 20:27 ` [PATCH 01/31] vdpa: Reorder virtio/vhost-vdpa.c functions Eugenio Pérez
2022-01-28  5:59   ` Jason Wang
2022-01-28  5:59     ` Jason Wang
2022-01-28  7:57     ` Eugenio Perez Martin
2022-02-21  7:31       ` Jason Wang
2022-02-21  7:31         ` Jason Wang
2022-02-21  7:42         ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 02/31] vhost: Add VhostShadowVirtqueue Eugenio Pérez
2022-01-26  8:53   ` Eugenio Perez Martin
2022-01-28  6:00   ` Jason Wang
2022-01-28  6:00     ` Jason Wang
2022-01-28  8:10     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 03/31] vdpa: Add vhost_svq_get_dev_kick_notifier Eugenio Pérez
2022-01-28  6:03   ` Jason Wang
2022-01-28  6:03     ` Jason Wang
2022-01-31  9:33     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 04/31] vdpa: Add vhost_svq_set_svq_kick_fd Eugenio Pérez
2022-01-28  6:29   ` Jason Wang
2022-01-28  6:29     ` Jason Wang
2022-01-31 10:18     ` Eugenio Perez Martin
2022-02-08  8:47       ` Jason Wang
2022-02-08  8:47         ` Jason Wang
2022-02-18 18:22         ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 05/31] vhost: Add Shadow VirtQueue kick forwarding capabilities Eugenio Pérez
2022-01-28  6:32   ` Jason Wang
2022-01-28  6:32     ` Jason Wang
2022-01-31 10:48     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 06/31] vhost: Route guest->host notification through shadow virtqueue Eugenio Pérez
2022-01-28  6:56   ` Jason Wang
2022-01-28  6:56     ` Jason Wang
2022-01-31 11:33     ` Eugenio Perez Martin
2022-02-08  9:02       ` Jason Wang
2022-02-08  9:02         ` Jason Wang
2022-01-21 20:27 ` [PATCH 07/31] vhost: dd vhost_svq_get_svq_call_notifier Eugenio Pérez
2022-01-29  7:57   ` Jason Wang
2022-01-29  7:57     ` Jason Wang
2022-01-29 17:49     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 08/31] vhost: Add vhost_svq_set_guest_call_notifier Eugenio Pérez
2022-01-21 20:27 ` [PATCH 09/31] vhost-vdpa: Take into account SVQ in vhost_vdpa_set_vring_call Eugenio Pérez
2022-01-29  8:05   ` Jason Wang
2022-01-29  8:05     ` Jason Wang
2022-01-31 15:34     ` Eugenio Perez Martin
2022-02-08  3:23       ` Jason Wang
2022-02-08  3:23         ` Jason Wang
2022-02-18 12:35         ` Eugenio Perez Martin
2022-02-21  7:39           ` Jason Wang
2022-02-21  7:39             ` Jason Wang
2022-02-21  8:01             ` Eugenio Perez Martin
2022-02-22  7:18               ` Jason Wang
2022-02-22  7:18                 ` Jason Wang
2022-01-21 20:27 ` [PATCH 10/31] vhost: Route host->guest notification through shadow virtqueue Eugenio Pérez
2022-01-21 20:27 ` [PATCH 11/31] vhost: Add vhost_svq_valid_device_features to shadow vq Eugenio Pérez
2022-01-29  8:11   ` Jason Wang
2022-01-29  8:11     ` Jason Wang
2022-01-31 15:49     ` Eugenio Perez Martin
2022-02-01 10:57       ` Eugenio Perez Martin
2022-02-08  3:37         ` Jason Wang
2022-02-08  3:37           ` Jason Wang
2022-02-26  9:11   ` Liuxiangdong via
2022-02-26 11:12     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 12/31] vhost: Add vhost_svq_valid_guest_features " Eugenio Pérez
2022-01-21 20:27 ` [PATCH 13/31] vhost: Add vhost_svq_ack_guest_features " Eugenio Pérez
2022-01-21 20:27 ` [PATCH 14/31] virtio: Add vhost_shadow_vq_get_vring_addr Eugenio Pérez
2022-01-21 20:27 ` [PATCH 15/31] vdpa: Add vhost_svq_get_num Eugenio Pérez
2022-01-29  8:14   ` Jason Wang
2022-01-29  8:14     ` Jason Wang
2022-01-31 16:36     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 16/31] vhost: pass queue index to vhost_vq_get_addr Eugenio Pérez
2022-01-29  8:20   ` Jason Wang
2022-01-29  8:20     ` Jason Wang
2022-01-31 17:44     ` Eugenio Perez Martin
2022-02-08  6:58       ` Jason Wang
2022-02-08  6:58         ` Jason Wang
2022-01-21 20:27 ` [PATCH 17/31] vdpa: adapt vhost_ops callbacks to svq Eugenio Pérez
2022-01-30  4:03   ` Jason Wang
2022-01-30  4:03     ` Jason Wang
2022-01-31 18:58     ` Eugenio Perez Martin
2022-02-08  3:57       ` Jason Wang
2022-02-08  3:57         ` Jason Wang
2022-02-17 17:13         ` Eugenio Perez Martin
2022-02-21  7:15           ` Jason Wang
2022-02-21  7:15             ` Jason Wang
2022-02-21 17:22             ` Eugenio Perez Martin
2022-02-22  3:16               ` Jason Wang
2022-02-22  3:16                 ` Jason Wang
2022-02-22  7:42                 ` Eugenio Perez Martin
2022-02-22  7:59                   ` Jason Wang
2022-02-22  7:59                     ` Jason Wang
2022-01-21 20:27 ` [PATCH 18/31] vhost: Shadow virtqueue buffers forwarding Eugenio Pérez
2022-01-30  4:42   ` Jason Wang [this message]
2022-01-30  4:42     ` Jason Wang
2022-02-01 17:08     ` Eugenio Perez Martin
2022-02-08  8:11       ` Jason Wang
2022-02-08  8:11         ` Jason Wang
2022-02-22 19:01         ` Eugenio Perez Martin
2022-02-23  2:03           ` Jason Wang
2022-02-23  2:03             ` Jason Wang
2022-01-30  6:46   ` Jason Wang
2022-01-30  6:46     ` Jason Wang
2022-02-01 11:25     ` Eugenio Perez Martin
2022-02-08  8:15       ` Jason Wang
2022-02-08  8:15         ` Jason Wang
2022-02-17 12:48         ` Eugenio Perez Martin
2022-02-21  7:43           ` Jason Wang
2022-02-21  7:43             ` Jason Wang
2022-02-21  8:15             ` Eugenio Perez Martin
2022-02-22  7:26               ` Jason Wang
2022-02-22  7:26                 ` Jason Wang
2022-02-22  8:55                 ` Eugenio Perez Martin
2022-02-23  2:26                   ` Jason Wang
2022-02-23  2:26                     ` Jason Wang
2022-01-21 20:27 ` [PATCH 19/31] utils: Add internal DMAMap to iova-tree Eugenio Pérez
2022-01-21 20:27 ` [PATCH 20/31] util: Store DMA entries in a list Eugenio Pérez
2022-01-21 20:27 ` [PATCH 21/31] util: Add iova_tree_alloc Eugenio Pérez
2022-01-24  4:32   ` Peter Xu
2022-01-24  4:32     ` Peter Xu
2022-01-24  9:20     ` Eugenio Perez Martin
2022-01-24 11:07       ` Peter Xu
2022-01-24 11:07         ` Peter Xu
2022-01-25  9:40         ` Eugenio Perez Martin
2022-01-27  8:06           ` Peter Xu
2022-01-27  8:06             ` Peter Xu
2022-01-27  9:24             ` Eugenio Perez Martin
2022-01-28  3:57               ` Peter Xu
2022-01-28  3:57                 ` Peter Xu
2022-01-28  5:55                 ` Jason Wang
2022-01-28  5:55                   ` Jason Wang
2022-01-28  7:48                   ` Eugenio Perez Martin
2022-02-15 19:34                   ` Eugenio Pérez
2022-02-15 19:34                   ` [PATCH] util: Add iova_tree_alloc Eugenio Pérez
2022-02-16  7:25                     ` Peter Xu
2022-01-30  5:06       ` [PATCH 21/31] " Jason Wang
2022-01-30  5:06         ` Jason Wang
2022-01-21 20:27 ` [PATCH 22/31] vhost: Add VhostIOVATree Eugenio Pérez
2022-01-30  5:21   ` Jason Wang
2022-01-30  5:21     ` Jason Wang
2022-02-01 17:27     ` Eugenio Perez Martin
2022-02-08  8:17       ` Jason Wang
2022-02-08  8:17         ` Jason Wang
2022-01-21 20:27 ` [PATCH 23/31] vdpa: Add custom IOTLB translations to SVQ Eugenio Pérez
2022-01-30  5:57   ` Jason Wang
2022-01-30  5:57     ` Jason Wang
2022-01-31 19:11     ` Eugenio Perez Martin
2022-02-08  8:19       ` Jason Wang
2022-02-08  8:19         ` Jason Wang
2022-01-21 20:27 ` [PATCH 24/31] vhost: Add vhost_svq_get_last_used_idx Eugenio Pérez
2022-01-21 20:27 ` [PATCH 25/31] vdpa: Adapt vhost_vdpa_get_vring_base to SVQ Eugenio Pérez
2022-01-21 20:27 ` [PATCH 26/31] vdpa: Clear VHOST_VRING_F_LOG at vhost_vdpa_set_vring_addr in SVQ Eugenio Pérez
2022-01-21 20:27 ` [PATCH 27/31] vdpa: Never set log_base addr if SVQ is enabled Eugenio Pérez
2022-01-21 20:27 ` [PATCH 28/31] vdpa: Expose VHOST_F_LOG_ALL on SVQ Eugenio Pérez
2022-01-30  6:50   ` Jason Wang
2022-01-30  6:50     ` Jason Wang
2022-02-01 11:45     ` Eugenio Perez Martin
2022-02-08  8:25       ` Jason Wang
2022-02-08  8:25         ` Jason Wang
2022-02-16 15:53         ` Eugenio Perez Martin
2022-02-17  6:02           ` Jason Wang
2022-02-17  6:02             ` Jason Wang
2022-02-17  8:22             ` Eugenio Perez Martin
2022-02-22  7:41               ` Jason Wang
2022-02-22  7:41                 ` Jason Wang
2022-02-22  8:05                 ` Eugenio Perez Martin
2022-02-23  3:46                   ` Jason Wang
2022-02-23  3:46                     ` Jason Wang
2022-02-23  8:06                     ` Eugenio Perez Martin
2022-02-24  3:45                       ` Jason Wang
2022-02-24  3:45                         ` Jason Wang
2022-01-21 20:27 ` [PATCH 29/31] vdpa: Make ncs autofree Eugenio Pérez
2022-01-30  6:51   ` Jason Wang
2022-01-30  6:51     ` Jason Wang
2022-02-01 17:10     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 30/31] vdpa: Move vhost_vdpa_get_iova_range to net/vhost-vdpa.c Eugenio Pérez
2022-01-30  6:53   ` Jason Wang
2022-01-30  6:53     ` Jason Wang
2022-02-01 17:11     ` Eugenio Perez Martin
2022-01-21 20:27 ` [PATCH 31/31] vdpa: Add x-svq to NetdevVhostVDPAOptions Eugenio Pérez
2022-01-28  6:02 ` [PATCH 00/31] vDPA shadow virtqueue Jason Wang
2022-01-28  6:02   ` Jason Wang
2022-01-31  9:15   ` Eugenio Perez Martin
2022-02-08  8:27     ` Jason Wang
2022-02-08  8:27       ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6c98f4e8-8695-ab83-ae37-2d6293a1fafa@redhat.com \
    --to=jasowang@redhat.com \
    --cc=armbru@redhat.com \
    --cc=eblake@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=eli@mellanox.com \
    --cc=eperezma@redhat.com \
    --cc=gdawar@xilinx.com \
    --cc=hanand@xilinx.com \
    --cc=lingshan.zhu@intel.com \
    --cc=lulu@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=mst@redhat.com \
    --cc=parav@mellanox.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=xiao.w.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.