All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: "Eugenio Pérez" <eperezma@redhat.com>, qemu-devel@nongnu.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
	virtualization@lists.linux-foundation.org,
	Eli Cohen <eli@mellanox.com>, Eric Blake <eblake@redhat.com>,
	Parav Pandit <parav@mellanox.com>, Cindy Lu <lulu@redhat.com>,
	"Fangyi \(Eric\)" <eric.fangyi@huawei.com>,
	Markus Armbruster <armbru@redhat.com>,
	yebiaoxiang@huawei.com, Liuxiangdong <liuxiangdong5@huawei.com>,
	Laurent Vivier <lvivier@redhat.com>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Gautam Dawar <gdawar@xilinx.com>,
	Xiao W Wang <xiao.w.wang@intel.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Harpreet Singh Anand <hanand@xilinx.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Lingshan <lingshan.zhu@intel.com>
Subject: Re: [PATCH v2 10/14] vdpa: Add custom IOTLB translations to SVQ
Date: Mon, 28 Feb 2022 15:36:58 +0800	[thread overview]
Message-ID: <af6d14b3-6bdf-8717-90b5-bc685896cc8a@redhat.com> (raw)
In-Reply-To: <20220227134111.3254066-11-eperezma@redhat.com>


在 2022/2/27 下午9:41, Eugenio Pérez 写道:
> Use translations added in VhostIOVATree in SVQ.
>
> Only introduce usage here, not allocation and deallocation. As with
> previous patches, we use the dead code paths of shadow_vqs_enabled to
> avoid commiting too many changes at once. These are impossible to take
> at the moment.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   hw/virtio/vhost-shadow-virtqueue.h |   6 +-
>   include/hw/virtio/vhost-vdpa.h     |   3 +
>   hw/virtio/vhost-shadow-virtqueue.c |  76 ++++++++++++++++-
>   hw/virtio/vhost-vdpa.c             | 128 ++++++++++++++++++++++++-----
>   4 files changed, 187 insertions(+), 26 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> index 04c67685fd..b2f722d101 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -13,6 +13,7 @@
>   #include "qemu/event_notifier.h"
>   #include "hw/virtio/virtio.h"
>   #include "standard-headers/linux/vhost_types.h"
> +#include "hw/virtio/vhost-iova-tree.h"
>   
>   /* Shadow virtqueue to relay notifications */
>   typedef struct VhostShadowVirtqueue {
> @@ -43,6 +44,9 @@ typedef struct VhostShadowVirtqueue {
>       /* Virtio device */
>       VirtIODevice *vdev;
>   
> +    /* IOVA mapping */
> +    VhostIOVATree *iova_tree;
> +
>       /* Map for use the guest's descriptors */
>       VirtQueueElement **ring_id_maps;
>   
> @@ -78,7 +82,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
>                        VirtQueue *vq);
>   void vhost_svq_stop(VhostShadowVirtqueue *svq);
>   
> -VhostShadowVirtqueue *vhost_svq_new(void);
> +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
>   
>   void vhost_svq_free(gpointer vq);
>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index 009a9f3b6b..ee8e939ad0 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -14,6 +14,7 @@
>   
>   #include <gmodule.h>
>   
> +#include "hw/virtio/vhost-iova-tree.h"
>   #include "hw/virtio/virtio.h"
>   #include "standard-headers/linux/vhost_types.h"
>   
> @@ -30,6 +31,8 @@ typedef struct vhost_vdpa {
>       MemoryListener listener;
>       struct vhost_vdpa_iova_range iova_range;
>       bool shadow_vqs_enabled;
> +    /* IOVA mapping used by the Shadow Virtqueue */
> +    VhostIOVATree *iova_tree;
>       GPtrArray *shadow_vqs;
>       struct vhost_dev *dev;
>       VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index a38d313755..7e073773d1 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -11,6 +11,7 @@
>   #include "hw/virtio/vhost-shadow-virtqueue.h"
>   
>   #include "qemu/error-report.h"
> +#include "qemu/log.h"
>   #include "qemu/main-loop.h"
>   #include "qemu/log.h"
>   #include "linux-headers/linux/vhost.h"
> @@ -84,7 +85,58 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
>       }
>   }
>   
> +/**
> + * Translate addresses between the qemu's virtual address and the SVQ IOVA
> + *
> + * @svq    Shadow VirtQueue
> + * @vaddr  Translated IOVA addresses
> + * @iovec  Source qemu's VA addresses
> + * @num    Length of iovec and minimum length of vaddr
> + */
> +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
> +                                     void **addrs, const struct iovec *iovec,
> +                                     size_t num)
> +{
> +    if (num == 0) {
> +        return true;
> +    }
> +
> +    for (size_t i = 0; i < num; ++i) {
> +        DMAMap needle = {
> +            .translated_addr = (hwaddr)iovec[i].iov_base,
> +            .size = iovec[i].iov_len,
> +        };
> +        size_t off;
> +
> +        const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
> +        /*
> +         * Map cannot be NULL since iova map contains all guest space and
> +         * qemu already has a physical address mapped
> +         */
> +        if (unlikely(!map)) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "Invalid address 0x%"HWADDR_PRIx" given by guest",
> +                          needle.translated_addr);
> +            return false;
> +        }
> +
> +        off = needle.translated_addr - map->translated_addr;
> +        addrs[i] = (void *)(map->iova + off);
> +
> +        if (unlikely(int128_gt(int128_add(needle.translated_addr,
> +                                          iovec[i].iov_len),
> +                               map->translated_addr + map->size))) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "Guest buffer expands over iova range");
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
>   static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> +                                    void * const *vaddr_sg,


Nit: it looks to me we are not passing vaddr but iova here, so it might 
be better to use "sg"?


>                                       const struct iovec *iovec,
>                                       size_t num, bool more_descs, bool write)
>   {
> @@ -103,7 +155,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
>           } else {
>               descs[i].flags = flags;
>           }
> -        descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
> +        descs[i].addr = cpu_to_le64((hwaddr)vaddr_sg[n]);
>           descs[i].len = cpu_to_le32(iovec[n].iov_len);
>   
>           last = i;
> @@ -119,6 +171,8 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>   {
>       unsigned avail_idx;
>       vring_avail_t *avail = svq->vring.avail;
> +    bool ok;
> +    g_autofree void **sgs = g_new(void *, MAX(elem->out_num, elem->in_num));
>   
>       *head = svq->free_head;
>   
> @@ -129,9 +183,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>           return false;
>       }
>   
> -    vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
> +    ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +    vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
>                               elem->in_num > 0, false);
> -    vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
> +
> +
> +    ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
>   
>       /*
>        * Put the entry in the available array (but don't update avail->idx until
> @@ -514,11 +579,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
>    * Creates vhost shadow virtqueue, and instructs the vhost device to use the
>    * shadow methods and file descriptors.
>    *
> + * @iova_tree Tree to perform descriptors translations
> + *
>    * Returns the new virtqueue or NULL.
>    *
>    * In case of error, reason is reported through error_report.
>    */
> -VhostShadowVirtqueue *vhost_svq_new(void)
> +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
>   {
>       g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
>       int r;
> @@ -539,6 +606,7 @@ VhostShadowVirtqueue *vhost_svq_new(void)
>   
>       event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
>       event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
> +    svq->iova_tree = iova_tree;
>       return g_steal_pointer(&svq);
>   
>   err_init_hdev_call:
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 435b9c2e9e..56f9f125cd 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -209,6 +209,21 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>                                            vaddr, section->readonly);
>   
>       llsize = int128_sub(llend, int128_make64(iova));
> +    if (v->shadow_vqs_enabled) {
> +        DMAMap mem_region = {
> +            .translated_addr = (hwaddr)vaddr,
> +            .size = int128_get64(llsize) - 1,
> +            .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
> +        };
> +
> +        int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
> +        if (unlikely(r != IOVA_OK)) {
> +            error_report("Can't allocate a mapping (%d)", r);
> +            goto fail;
> +        }
> +
> +        iova = mem_region.iova;
> +    }
>   
>       vhost_vdpa_iotlb_batch_begin_once(v);
>       ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> @@ -261,6 +276,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>   
>       llsize = int128_sub(llend, int128_make64(iova));
>   
> +    if (v->shadow_vqs_enabled) {
> +        const DMAMap *result;
> +        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
> +            section->offset_within_region +
> +            (iova - section->offset_within_address_space);
> +        DMAMap mem_region = {
> +            .translated_addr = (hwaddr)vaddr,
> +            .size = int128_get64(llsize) - 1,
> +        };
> +
> +        result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
> +        iova = result->iova;
> +        vhost_iova_tree_remove(v->iova_tree, &mem_region);
> +    }
>       vhost_vdpa_iotlb_batch_begin_once(v);
>       ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
>       if (ret) {
> @@ -383,7 +412,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
>   
>       shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
>       for (unsigned n = 0; n < hdev->nvqs; ++n) {
> -        g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
> +        g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
>   
>           if (unlikely(!svq)) {
>               error_setg(errp, "Cannot create svq %u", n);
> @@ -834,37 +863,78 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
>   /**
>    * Unmap a SVQ area in the device
>    */
> -static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
> -                                      hwaddr size)
> +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
> +                                      const DMAMap *needle)
>   {
> +    const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
> +    hwaddr size;
>       int r;
>   
> -    size = ROUND_UP(size, qemu_real_host_page_size);
> -    r = vhost_vdpa_dma_unmap(v, iova, size);
> +    if (unlikely(!result)) {
> +        error_report("Unable to find SVQ address to unmap");
> +        return false;
> +    }
> +
> +    size = ROUND_UP(result->size, qemu_real_host_page_size);
> +    r = vhost_vdpa_dma_unmap(v, result->iova, size);
>       return r == 0;
>   }
>   
>   static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
>                                          const VhostShadowVirtqueue *svq)
>   {
> +    DMAMap needle;
>       struct vhost_vdpa *v = dev->opaque;
>       struct vhost_vring_addr svq_addr;
> -    size_t device_size = vhost_svq_device_area_size(svq);
> -    size_t driver_size = vhost_svq_driver_area_size(svq);
>       bool ok;
>   
>       vhost_svq_get_vring_addr(svq, &svq_addr);
>   
> -    ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
> +    needle = (DMAMap) {
> +        .translated_addr = svq_addr.desc_user_addr,
> +    };


Let's simply initialize the member to zero during start of this function 
then we can use needle->transalted_addr = XXX here.


> +    ok = vhost_vdpa_svq_unmap_ring(v, &needle);
>       if (unlikely(!ok)) {
>           return false;
>       }
>   
> -    return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
> +    needle = (DMAMap) {
> +        .translated_addr = svq_addr.used_user_addr,
> +    };
> +    return vhost_vdpa_svq_unmap_ring(v, &needle);
> +}
> +
> +/**
> + * Map the SVQ area in the device
> + *
> + * @v          Vhost-vdpa device
> + * @needle     The area to search iova
> + * @errorp     Error pointer
> + */
> +static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
> +                                    Error **errp)
> +{
> +    int r;
> +
> +    r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
> +    if (unlikely(r != IOVA_OK)) {
> +        error_setg(errp, "Cannot allocate iova (%d)", r);
> +        return false;
> +    }
> +
> +    r = vhost_vdpa_dma_map(v, needle->iova, needle->size,
> +                           (void *)needle->translated_addr,
> +                           !(needle->perm & IOMMU_ACCESS_FLAG(0, 1)));


Let's simply use needle->perm == IOMMU_RO here?


> +    if (unlikely(r != 0)) {
> +        error_setg_errno(errp, -r, "Cannot map region to device");
> +        vhost_iova_tree_remove(v->iova_tree, needle);
> +    }
> +
> +    return r == 0;
>   }
>   
>   /**
> - * Map shadow virtqueue rings in device
> + * Map the shadow virtqueue rings in the device
>    *
>    * @dev   The vhost device
>    * @svq   The shadow virtqueue
> @@ -876,28 +946,44 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
>                                        struct vhost_vring_addr *addr,
>                                        Error **errp)
>   {
> +    DMAMap device_region, driver_region;
> +    struct vhost_vring_addr svq_addr;
>       struct vhost_vdpa *v = dev->opaque;
>       size_t device_size = vhost_svq_device_area_size(svq);
>       size_t driver_size = vhost_svq_driver_area_size(svq);
> -    int r;
> +    size_t avail_offset;
> +    bool ok;
>   
>       ERRP_GUARD();
> -    vhost_svq_get_vring_addr(svq, addr);
> +    vhost_svq_get_vring_addr(svq, &svq_addr);
>   
> -    r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
> -                           (void *)addr->desc_user_addr, true);
> -    if (unlikely(r != 0)) {
> -        error_setg_errno(errp, -r, "Cannot create vq driver region: ");
> +    driver_region = (DMAMap) {
> +        .translated_addr = svq_addr.desc_user_addr,
> +        .size = driver_size - 1,


Any reason for the "-1" here? I see several places do things like that, 
it's probably hint of wrong API somehwere.

Thanks


> +        .perm = IOMMU_RO,
> +    };
> +    ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
> +    if (unlikely(!ok)) {
> +        error_prepend(errp, "Cannot create vq driver region: ");
>           return false;
>       }
> +    addr->desc_user_addr = driver_region.iova;
> +    avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
> +    addr->avail_user_addr = driver_region.iova + avail_offset;
>   
> -    r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
> -                           (void *)addr->used_user_addr, false);
> -    if (unlikely(r != 0)) {
> -        error_setg_errno(errp, -r, "Cannot create vq device region: ");
> +    device_region = (DMAMap) {
> +        .translated_addr = svq_addr.used_user_addr,
> +        .size = device_size - 1,
> +        .perm = IOMMU_RW,
> +    };
> +    ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
> +    if (unlikely(!ok)) {
> +        error_prepend(errp, "Cannot create vq device region: ");
> +        vhost_vdpa_svq_unmap_ring(v, &driver_region);
>       }
> +    addr->used_user_addr = device_region.iova;
>   
> -    return r == 0;
> +    return ok;
>   }
>   
>   static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

WARNING: multiple messages have this Message-ID (diff)
From: Jason Wang <jasowang@redhat.com>
To: "Eugenio Pérez" <eperezma@redhat.com>, qemu-devel@nongnu.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
	Peter Xu <peterx@redhat.com>,
	virtualization@lists.linux-foundation.org,
	Eli Cohen <eli@mellanox.com>, Eric Blake <eblake@redhat.com>,
	Parav Pandit <parav@mellanox.com>, Cindy Lu <lulu@redhat.com>,
	"Fangyi \(Eric\)" <eric.fangyi@huawei.com>,
	Markus Armbruster <armbru@redhat.com>,
	yebiaoxiang@huawei.com, Liuxiangdong <liuxiangdong5@huawei.com>,
	Stefano Garzarella <sgarzare@redhat.com>,
	Laurent Vivier <lvivier@redhat.com>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Gautam Dawar <gdawar@xilinx.com>,
	Xiao W Wang <xiao.w.wang@intel.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Juan Quintela <quintela@redhat.com>,
	Harpreet Singh Anand <hanand@xilinx.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Lingshan <lingshan.zhu@intel.com>
Subject: Re: [PATCH v2 10/14] vdpa: Add custom IOTLB translations to SVQ
Date: Mon, 28 Feb 2022 15:36:58 +0800	[thread overview]
Message-ID: <af6d14b3-6bdf-8717-90b5-bc685896cc8a@redhat.com> (raw)
In-Reply-To: <20220227134111.3254066-11-eperezma@redhat.com>


在 2022/2/27 下午9:41, Eugenio Pérez 写道:
> Use translations added in VhostIOVATree in SVQ.
>
> Only introduce usage here, not allocation and deallocation. As with
> previous patches, we use the dead code paths of shadow_vqs_enabled to
> avoid commiting too many changes at once. These are impossible to take
> at the moment.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   hw/virtio/vhost-shadow-virtqueue.h |   6 +-
>   include/hw/virtio/vhost-vdpa.h     |   3 +
>   hw/virtio/vhost-shadow-virtqueue.c |  76 ++++++++++++++++-
>   hw/virtio/vhost-vdpa.c             | 128 ++++++++++++++++++++++++-----
>   4 files changed, 187 insertions(+), 26 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> index 04c67685fd..b2f722d101 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -13,6 +13,7 @@
>   #include "qemu/event_notifier.h"
>   #include "hw/virtio/virtio.h"
>   #include "standard-headers/linux/vhost_types.h"
> +#include "hw/virtio/vhost-iova-tree.h"
>   
>   /* Shadow virtqueue to relay notifications */
>   typedef struct VhostShadowVirtqueue {
> @@ -43,6 +44,9 @@ typedef struct VhostShadowVirtqueue {
>       /* Virtio device */
>       VirtIODevice *vdev;
>   
> +    /* IOVA mapping */
> +    VhostIOVATree *iova_tree;
> +
>       /* Map for use the guest's descriptors */
>       VirtQueueElement **ring_id_maps;
>   
> @@ -78,7 +82,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
>                        VirtQueue *vq);
>   void vhost_svq_stop(VhostShadowVirtqueue *svq);
>   
> -VhostShadowVirtqueue *vhost_svq_new(void);
> +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
>   
>   void vhost_svq_free(gpointer vq);
>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index 009a9f3b6b..ee8e939ad0 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -14,6 +14,7 @@
>   
>   #include <gmodule.h>
>   
> +#include "hw/virtio/vhost-iova-tree.h"
>   #include "hw/virtio/virtio.h"
>   #include "standard-headers/linux/vhost_types.h"
>   
> @@ -30,6 +31,8 @@ typedef struct vhost_vdpa {
>       MemoryListener listener;
>       struct vhost_vdpa_iova_range iova_range;
>       bool shadow_vqs_enabled;
> +    /* IOVA mapping used by the Shadow Virtqueue */
> +    VhostIOVATree *iova_tree;
>       GPtrArray *shadow_vqs;
>       struct vhost_dev *dev;
>       VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index a38d313755..7e073773d1 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -11,6 +11,7 @@
>   #include "hw/virtio/vhost-shadow-virtqueue.h"
>   
>   #include "qemu/error-report.h"
> +#include "qemu/log.h"
>   #include "qemu/main-loop.h"
>   #include "qemu/log.h"
>   #include "linux-headers/linux/vhost.h"
> @@ -84,7 +85,58 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
>       }
>   }
>   
> +/**
> + * Translate addresses between the qemu's virtual address and the SVQ IOVA
> + *
> + * @svq    Shadow VirtQueue
> + * @vaddr  Translated IOVA addresses
> + * @iovec  Source qemu's VA addresses
> + * @num    Length of iovec and minimum length of vaddr
> + */
> +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
> +                                     void **addrs, const struct iovec *iovec,
> +                                     size_t num)
> +{
> +    if (num == 0) {
> +        return true;
> +    }
> +
> +    for (size_t i = 0; i < num; ++i) {
> +        DMAMap needle = {
> +            .translated_addr = (hwaddr)iovec[i].iov_base,
> +            .size = iovec[i].iov_len,
> +        };
> +        size_t off;
> +
> +        const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
> +        /*
> +         * Map cannot be NULL since iova map contains all guest space and
> +         * qemu already has a physical address mapped
> +         */
> +        if (unlikely(!map)) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "Invalid address 0x%"HWADDR_PRIx" given by guest",
> +                          needle.translated_addr);
> +            return false;
> +        }
> +
> +        off = needle.translated_addr - map->translated_addr;
> +        addrs[i] = (void *)(map->iova + off);
> +
> +        if (unlikely(int128_gt(int128_add(needle.translated_addr,
> +                                          iovec[i].iov_len),
> +                               map->translated_addr + map->size))) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "Guest buffer expands over iova range");
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
>   static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> +                                    void * const *vaddr_sg,


Nit: it looks to me we are not passing vaddr but iova here, so it might 
be better to use "sg"?


>                                       const struct iovec *iovec,
>                                       size_t num, bool more_descs, bool write)
>   {
> @@ -103,7 +155,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
>           } else {
>               descs[i].flags = flags;
>           }
> -        descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
> +        descs[i].addr = cpu_to_le64((hwaddr)vaddr_sg[n]);
>           descs[i].len = cpu_to_le32(iovec[n].iov_len);
>   
>           last = i;
> @@ -119,6 +171,8 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>   {
>       unsigned avail_idx;
>       vring_avail_t *avail = svq->vring.avail;
> +    bool ok;
> +    g_autofree void **sgs = g_new(void *, MAX(elem->out_num, elem->in_num));
>   
>       *head = svq->free_head;
>   
> @@ -129,9 +183,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>           return false;
>       }
>   
> -    vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
> +    ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +    vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
>                               elem->in_num > 0, false);
> -    vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
> +
> +
> +    ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
>   
>       /*
>        * Put the entry in the available array (but don't update avail->idx until
> @@ -514,11 +579,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
>    * Creates vhost shadow virtqueue, and instructs the vhost device to use the
>    * shadow methods and file descriptors.
>    *
> + * @iova_tree Tree to perform descriptors translations
> + *
>    * Returns the new virtqueue or NULL.
>    *
>    * In case of error, reason is reported through error_report.
>    */
> -VhostShadowVirtqueue *vhost_svq_new(void)
> +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
>   {
>       g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
>       int r;
> @@ -539,6 +606,7 @@ VhostShadowVirtqueue *vhost_svq_new(void)
>   
>       event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
>       event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
> +    svq->iova_tree = iova_tree;
>       return g_steal_pointer(&svq);
>   
>   err_init_hdev_call:
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 435b9c2e9e..56f9f125cd 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -209,6 +209,21 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>                                            vaddr, section->readonly);
>   
>       llsize = int128_sub(llend, int128_make64(iova));
> +    if (v->shadow_vqs_enabled) {
> +        DMAMap mem_region = {
> +            .translated_addr = (hwaddr)vaddr,
> +            .size = int128_get64(llsize) - 1,
> +            .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
> +        };
> +
> +        int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
> +        if (unlikely(r != IOVA_OK)) {
> +            error_report("Can't allocate a mapping (%d)", r);
> +            goto fail;
> +        }
> +
> +        iova = mem_region.iova;
> +    }
>   
>       vhost_vdpa_iotlb_batch_begin_once(v);
>       ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> @@ -261,6 +276,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>   
>       llsize = int128_sub(llend, int128_make64(iova));
>   
> +    if (v->shadow_vqs_enabled) {
> +        const DMAMap *result;
> +        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
> +            section->offset_within_region +
> +            (iova - section->offset_within_address_space);
> +        DMAMap mem_region = {
> +            .translated_addr = (hwaddr)vaddr,
> +            .size = int128_get64(llsize) - 1,
> +        };
> +
> +        result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
> +        iova = result->iova;
> +        vhost_iova_tree_remove(v->iova_tree, &mem_region);
> +    }
>       vhost_vdpa_iotlb_batch_begin_once(v);
>       ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
>       if (ret) {
> @@ -383,7 +412,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
>   
>       shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
>       for (unsigned n = 0; n < hdev->nvqs; ++n) {
> -        g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
> +        g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
>   
>           if (unlikely(!svq)) {
>               error_setg(errp, "Cannot create svq %u", n);
> @@ -834,37 +863,78 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
>   /**
>    * Unmap a SVQ area in the device
>    */
> -static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
> -                                      hwaddr size)
> +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
> +                                      const DMAMap *needle)
>   {
> +    const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
> +    hwaddr size;
>       int r;
>   
> -    size = ROUND_UP(size, qemu_real_host_page_size);
> -    r = vhost_vdpa_dma_unmap(v, iova, size);
> +    if (unlikely(!result)) {
> +        error_report("Unable to find SVQ address to unmap");
> +        return false;
> +    }
> +
> +    size = ROUND_UP(result->size, qemu_real_host_page_size);
> +    r = vhost_vdpa_dma_unmap(v, result->iova, size);
>       return r == 0;
>   }
>   
>   static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
>                                          const VhostShadowVirtqueue *svq)
>   {
> +    DMAMap needle;
>       struct vhost_vdpa *v = dev->opaque;
>       struct vhost_vring_addr svq_addr;
> -    size_t device_size = vhost_svq_device_area_size(svq);
> -    size_t driver_size = vhost_svq_driver_area_size(svq);
>       bool ok;
>   
>       vhost_svq_get_vring_addr(svq, &svq_addr);
>   
> -    ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
> +    needle = (DMAMap) {
> +        .translated_addr = svq_addr.desc_user_addr,
> +    };


Let's simply initialize the member to zero during start of this function 
then we can use needle->transalted_addr = XXX here.


> +    ok = vhost_vdpa_svq_unmap_ring(v, &needle);
>       if (unlikely(!ok)) {
>           return false;
>       }
>   
> -    return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
> +    needle = (DMAMap) {
> +        .translated_addr = svq_addr.used_user_addr,
> +    };
> +    return vhost_vdpa_svq_unmap_ring(v, &needle);
> +}
> +
> +/**
> + * Map the SVQ area in the device
> + *
> + * @v          Vhost-vdpa device
> + * @needle     The area to search iova
> + * @errorp     Error pointer
> + */
> +static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
> +                                    Error **errp)
> +{
> +    int r;
> +
> +    r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
> +    if (unlikely(r != IOVA_OK)) {
> +        error_setg(errp, "Cannot allocate iova (%d)", r);
> +        return false;
> +    }
> +
> +    r = vhost_vdpa_dma_map(v, needle->iova, needle->size,
> +                           (void *)needle->translated_addr,
> +                           !(needle->perm & IOMMU_ACCESS_FLAG(0, 1)));


Let's simply use needle->perm == IOMMU_RO here?


> +    if (unlikely(r != 0)) {
> +        error_setg_errno(errp, -r, "Cannot map region to device");
> +        vhost_iova_tree_remove(v->iova_tree, needle);
> +    }
> +
> +    return r == 0;
>   }
>   
>   /**
> - * Map shadow virtqueue rings in device
> + * Map the shadow virtqueue rings in the device
>    *
>    * @dev   The vhost device
>    * @svq   The shadow virtqueue
> @@ -876,28 +946,44 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
>                                        struct vhost_vring_addr *addr,
>                                        Error **errp)
>   {
> +    DMAMap device_region, driver_region;
> +    struct vhost_vring_addr svq_addr;
>       struct vhost_vdpa *v = dev->opaque;
>       size_t device_size = vhost_svq_device_area_size(svq);
>       size_t driver_size = vhost_svq_driver_area_size(svq);
> -    int r;
> +    size_t avail_offset;
> +    bool ok;
>   
>       ERRP_GUARD();
> -    vhost_svq_get_vring_addr(svq, addr);
> +    vhost_svq_get_vring_addr(svq, &svq_addr);
>   
> -    r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
> -                           (void *)addr->desc_user_addr, true);
> -    if (unlikely(r != 0)) {
> -        error_setg_errno(errp, -r, "Cannot create vq driver region: ");
> +    driver_region = (DMAMap) {
> +        .translated_addr = svq_addr.desc_user_addr,
> +        .size = driver_size - 1,


Any reason for the "-1" here? I see several places do things like that, 
it's probably hint of wrong API somehwere.

Thanks


> +        .perm = IOMMU_RO,
> +    };
> +    ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
> +    if (unlikely(!ok)) {
> +        error_prepend(errp, "Cannot create vq driver region: ");
>           return false;
>       }
> +    addr->desc_user_addr = driver_region.iova;
> +    avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
> +    addr->avail_user_addr = driver_region.iova + avail_offset;
>   
> -    r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
> -                           (void *)addr->used_user_addr, false);
> -    if (unlikely(r != 0)) {
> -        error_setg_errno(errp, -r, "Cannot create vq device region: ");
> +    device_region = (DMAMap) {
> +        .translated_addr = svq_addr.used_user_addr,
> +        .size = device_size - 1,
> +        .perm = IOMMU_RW,
> +    };
> +    ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
> +    if (unlikely(!ok)) {
> +        error_prepend(errp, "Cannot create vq device region: ");
> +        vhost_vdpa_svq_unmap_ring(v, &driver_region);
>       }
> +    addr->used_user_addr = device_region.iova;
>   
> -    return r == 0;
> +    return ok;
>   }
>   
>   static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,



  reply	other threads:[~2022-02-28  7:37 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-27 13:40 [PATCH v2 00/14] vDPA shadow virtqueue Eugenio Pérez
2022-02-27 13:40 ` [PATCH v2 01/14] vhost: Add VhostShadowVirtqueue Eugenio Pérez
2022-02-27 13:40 ` [PATCH v2 02/14] vhost: Add Shadow VirtQueue kick forwarding capabilities Eugenio Pérez
2022-02-28  2:57   ` Jason Wang
2022-02-28  2:57     ` Jason Wang
2022-03-01 18:49     ` Eugenio Perez Martin
2022-03-03  7:12       ` Jason Wang
2022-03-03  7:12         ` Jason Wang
2022-03-03  9:24         ` Eugenio Perez Martin
2022-03-04  1:39           ` Jason Wang
2022-03-04  1:39             ` Jason Wang
2022-02-27 13:41 ` [PATCH v2 03/14] vhost: Add Shadow VirtQueue call " Eugenio Pérez
2022-02-28  3:18   ` Jason Wang
2022-02-28  3:18     ` Jason Wang
2022-03-01 11:18     ` Eugenio Perez Martin
2022-02-27 13:41 ` [PATCH v2 04/14] vhost: Add vhost_svq_valid_features to shadow vq Eugenio Pérez
2022-02-28  3:25   ` Jason Wang
2022-02-28  3:25     ` Jason Wang
2022-03-01 19:18     ` Eugenio Perez Martin
2022-02-27 13:41 ` [PATCH v2 05/14] virtio: Add vhost_shadow_vq_get_vring_addr Eugenio Pérez
2022-02-27 13:41 ` [PATCH v2 06/14] vdpa: adapt vhost_ops callbacks to svq Eugenio Pérez
2022-02-28  3:59   ` Jason Wang
2022-02-28  3:59     ` Jason Wang
2022-03-01 19:31     ` Eugenio Perez Martin
2022-02-27 13:41 ` [PATCH v2 07/14] vhost: Shadow virtqueue buffers forwarding Eugenio Pérez
2022-02-28  5:39   ` Jason Wang
2022-02-28  5:39     ` Jason Wang
2022-03-02 18:23     ` Eugenio Perez Martin
2022-03-03  7:35       ` Jason Wang
2022-03-03  7:35         ` Jason Wang
2022-02-27 13:41 ` [PATCH v2 08/14] util: Add iova_tree_alloc Eugenio Pérez
2022-02-28  6:39   ` Jason Wang
2022-02-28  6:39     ` Jason Wang
2022-03-01 10:06     ` Eugenio Perez Martin
2022-03-03  7:16       ` Jason Wang
2022-03-03  7:16         ` Jason Wang
2022-02-27 13:41 ` [PATCH v2 09/14] vhost: Add VhostIOVATree Eugenio Pérez
2022-02-28  7:06   ` Jason Wang
2022-02-28  7:06     ` Jason Wang
2022-03-03 16:32     ` Eugenio Perez Martin
2022-03-04  2:04       ` Jason Wang
2022-03-04  2:04         ` Jason Wang
2022-03-04  8:01         ` Eugenio Perez Martin
2022-03-07  3:41           ` Jason Wang
2022-03-07  3:41             ` Jason Wang
2022-03-07  8:56             ` Eugenio Perez Martin
2022-02-27 13:41 ` [PATCH v2 10/14] vdpa: Add custom IOTLB translations to SVQ Eugenio Pérez
2022-02-28  7:36   ` Jason Wang [this message]
2022-02-28  7:36     ` Jason Wang
2022-03-01  8:50     ` Eugenio Perez Martin
2022-03-03  7:33       ` Jason Wang
2022-03-03  7:33         ` Jason Wang
2022-03-03 11:35         ` Eugenio Perez Martin
2022-03-07  4:24           ` Jason Wang
2022-03-07  4:24             ` Jason Wang
2022-03-07  7:44             ` Eugenio Perez Martin
2022-02-27 13:41 ` [PATCH v2 11/14] vdpa: Adapt vhost_vdpa_get_vring_base " Eugenio Pérez
2022-02-28  7:38   ` Jason Wang
2022-02-28  7:38     ` Jason Wang
2022-03-01  7:51     ` Eugenio Perez Martin
2022-02-27 13:41 ` [PATCH v2 12/14] vdpa: Never set log_base addr if SVQ is enabled Eugenio Pérez
2022-02-27 13:41 ` [PATCH v2 13/14] vdpa: Expose VHOST_F_LOG_ALL on SVQ Eugenio Pérez
2022-02-27 13:41 ` [PATCH v2 14/14] vdpa: Add x-svq to NetdevVhostVDPAOptions Eugenio Pérez
2022-02-28  2:32 ` [PATCH v2 00/14] vDPA shadow virtqueue Jason Wang
2022-02-28  2:32   ` Jason Wang
2022-03-01 11:36   ` Eugenio Perez Martin
2022-02-28  7:41 ` Jason Wang
2022-02-28  7:41   ` Jason Wang
2022-03-02 20:30   ` Eugenio Perez Martin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=af6d14b3-6bdf-8717-90b5-bc685896cc8a@redhat.com \
    --to=jasowang@redhat.com \
    --cc=armbru@redhat.com \
    --cc=eblake@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=eli@mellanox.com \
    --cc=eperezma@redhat.com \
    --cc=eric.fangyi@huawei.com \
    --cc=gdawar@xilinx.com \
    --cc=hanand@xilinx.com \
    --cc=lingshan.zhu@intel.com \
    --cc=liuxiangdong5@huawei.com \
    --cc=lulu@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=mst@redhat.com \
    --cc=parav@mellanox.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=xiao.w.wang@intel.com \
    --cc=yebiaoxiang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.