From: Jason Wang <jasowang@redhat.com>
To: Xie Yongji <xieyongji@bytedance.com>,
mst@redhat.com, stefanha@redhat.com, sgarzare@redhat.com,
parav@nvidia.com, hch@infradead.org,
christian.brauner@canonical.com, rdunlap@infradead.org,
willy@infradead.org, viro@zeniv.linux.org.uk, axboe@kernel.dk,
bcrl@kvack.org, corbet@lwn.net, mika.penttila@nextfour.com,
dan.carpenter@oracle.com
Cc: virtualization@lists.linux-foundation.org,
netdev@vger.kernel.org, kvm@vger.kernel.org,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH v6 07/10] vdpa: Support transferring virtual addressing during DMA mapping
Date: Thu, 8 Apr 2021 10:36:30 +0800 [thread overview]
Message-ID: <e9bdee99-49b1-3e3b-8769-6e8a9783c418@redhat.com> (raw)
In-Reply-To: <20210331080519.172-8-xieyongji@bytedance.com>
在 2021/3/31 下午4:05, Xie Yongji 写道:
> This patch introduces an attribute for vDPA device to indicate
> whether virtual address can be used. If vDPA device driver set
> it, vhost-vdpa bus driver will not pin user page and transfer
> userspace virtual address instead of physical address during
> DMA mapping. And corresponding vma->vm_file and offset will be
> also passed as an opaque pointer.
>
> Suggested-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Acked-by: Jason Wang <jasowang@redhat.com>
> ---
> drivers/vdpa/ifcvf/ifcvf_main.c | 2 +-
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +-
> drivers/vdpa/vdpa.c | 9 +++-
> drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +-
> drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +-
> drivers/vhost/vdpa.c | 99 ++++++++++++++++++++++++++++++++++-----
> include/linux/vdpa.h | 19 ++++++--
> 7 files changed, 116 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
> index d555a6a5d1ba..aee013f3eb5f 100644
> --- a/drivers/vdpa/ifcvf/ifcvf_main.c
> +++ b/drivers/vdpa/ifcvf/ifcvf_main.c
> @@ -431,7 +431,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> }
>
> adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
> - dev, &ifc_vdpa_ops, NULL);
> + dev, &ifc_vdpa_ops, NULL, false);
> if (adapter == NULL) {
> IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
> return -ENOMEM;
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 71397fdafa6a..fb62ebcf464a 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -1982,7 +1982,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
> max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>
> ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> - NULL);
> + NULL, false);
> if (IS_ERR(ndev))
> return PTR_ERR(ndev);
>
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 5cffce67cab0..97fbac276c72 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -71,6 +71,7 @@ static void vdpa_release_dev(struct device *d)
> * @config: the bus operations that is supported by this device
> * @size: size of the parent structure that contains private data
> * @name: name of the vdpa device; optional.
> + * @use_va: indicate whether virtual address must be used by this device
> *
> * Driver should use vdpa_alloc_device() wrapper macro instead of
> * using this directly.
> @@ -80,7 +81,8 @@ static void vdpa_release_dev(struct device *d)
> */
> struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> const struct vdpa_config_ops *config,
> - size_t size, const char *name)
> + size_t size, const char *name,
> + bool use_va)
> {
> struct vdpa_device *vdev;
> int err = -EINVAL;
> @@ -91,6 +93,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> if (!!config->dma_map != !!config->dma_unmap)
> goto err;
>
> + /* It should only work for the device that use on-chip IOMMU */
> + if (use_va && !(config->dma_map || config->set_map))
> + goto err;
> +
> err = -ENOMEM;
> vdev = kzalloc(size, GFP_KERNEL);
> if (!vdev)
> @@ -106,6 +112,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> vdev->index = err;
> vdev->config = config;
> vdev->features_valid = false;
> + vdev->use_va = use_va;
>
> if (name)
> err = dev_set_name(&vdev->dev, "%s", name);
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index ff331f088baf..d26334e9a412 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
> ops = &vdpasim_config_ops;
>
> vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
> - dev_attr->name);
> + dev_attr->name, false);
> if (!vdpasim)
> goto err_alloc;
>
> diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
> index 1321a2fcd088..03b36aed48d6 100644
> --- a/drivers/vdpa/virtio_pci/vp_vdpa.c
> +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
> @@ -377,7 +377,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> return ret;
>
> vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
> - dev, &vp_vdpa_ops, NULL);
> + dev, &vp_vdpa_ops, NULL, false);
> if (vp_vdpa == NULL) {
> dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
> return -ENOMEM;
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index f9aab9013745..613ea400e0e5 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -505,8 +505,28 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> }
> }
>
> +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> +{
> + struct vhost_dev *dev = &v->vdev;
> + struct vhost_iotlb *iotlb = dev->iotlb;
> + struct vhost_iotlb_map *map;
> + struct vdpa_map_file *map_file;
> +
> + while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
> + map_file = (struct vdpa_map_file *)map->opaque;
> + fput(map_file->file);
> + kfree(map_file);
> + vhost_iotlb_map_free(iotlb, map);
> + }
> +}
> +
> static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> {
> + struct vdpa_device *vdpa = v->vdpa;
> +
> + if (vdpa->use_va)
> + return vhost_vdpa_va_unmap(v, start, last);
> +
> return vhost_vdpa_pa_unmap(v, start, last);
> }
>
> @@ -541,21 +561,21 @@ static int perm_to_iommu_flags(u32 perm)
> return flags | IOMMU_CACHE;
> }
>
> -static int vhost_vdpa_map(struct vhost_vdpa *v,
> - u64 iova, u64 size, u64 pa, u32 perm)
> +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
> + u64 size, u64 pa, u32 perm, void *opaque)
> {
> struct vhost_dev *dev = &v->vdev;
> struct vdpa_device *vdpa = v->vdpa;
> const struct vdpa_config_ops *ops = vdpa->config;
> int r = 0;
>
> - r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
> - pa, perm);
> + r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
> + pa, perm, opaque);
> if (r)
> return r;
>
> if (ops->dma_map) {
> - r = ops->dma_map(vdpa, iova, size, pa, perm, NULL);
> + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
> } else if (ops->set_map) {
> if (!v->in_batch)
> r = ops->set_map(vdpa, dev->iotlb);
> @@ -563,13 +583,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> r = iommu_map(v->domain, iova, pa, size,
> perm_to_iommu_flags(perm));
> }
> -
> - if (r)
> + if (r) {
> vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
> - else
> + return r;
> + }
> +
> + if (!vdpa->use_va)
> atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
>
> - return r;
> + return 0;
> }
>
> static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
> @@ -590,6 +612,56 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
> }
> }
>
> +static int vhost_vdpa_va_map(struct vhost_vdpa *v,
> + u64 iova, u64 size, u64 uaddr, u32 perm)
> +{
> + struct vhost_dev *dev = &v->vdev;
> + u64 offset, map_size, map_iova = iova;
> + struct vdpa_map_file *map_file;
> + struct vm_area_struct *vma;
> + int ret;
> +
> + mmap_read_lock(dev->mm);
> +
> + while (size) {
> + vma = find_vma(dev->mm, uaddr);
> + if (!vma) {
> + ret = -EINVAL;
> + break;
> + }
> + map_size = min(size, vma->vm_end - uaddr);
> + if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
> + !(vma->vm_flags & (VM_IO | VM_PFNMAP))))
> + goto next;
> +
> + map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
> + if (!map_file) {
> + ret = -ENOMEM;
> + break;
> + }
> + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
> + map_file->offset = offset;
> + map_file->file = get_file(vma->vm_file);
> + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
> + perm, map_file);
> + if (ret) {
> + fput(map_file->file);
> + kfree(map_file);
> + break;
> + }
> +next:
> + size -= map_size;
> + uaddr += map_size;
> + map_iova += map_size;
> + }
> + if (ret)
> + vhost_vdpa_unmap(v, iova, map_iova - iova);
> +
> + mmap_read_unlock(dev->mm);
> +
> + return ret;
> +}
> +
> static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
> u64 iova, u64 size, u64 uaddr, u32 perm)
> {
> @@ -656,7 +728,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
> ret = vhost_vdpa_map(v, iova, csize,
> map_pfn << PAGE_SHIFT,
> - perm);
> + perm, NULL);
> if (ret) {
> /*
> * Unpin the pages that are left unmapped
> @@ -685,7 +757,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
>
> /* Pin the rest chunk */
> ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
> - map_pfn << PAGE_SHIFT, perm);
> + map_pfn << PAGE_SHIFT, perm, NULL);
> out:
> if (ret) {
> if (nchunks) {
> @@ -718,6 +790,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> struct vhost_iotlb_msg *msg)
> {
> struct vhost_dev *dev = &v->vdev;
> + struct vdpa_device *vdpa = v->vdpa;
> struct vhost_iotlb *iotlb = dev->iotlb;
>
> if (msg->iova < v->range.first ||
> @@ -728,6 +801,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> msg->iova + msg->size - 1))
> return -EEXIST;
>
> + if (vdpa->use_va)
> + return vhost_vdpa_va_map(v, msg->iova, msg->size,
> + msg->uaddr, msg->perm);
> +
> return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr,
> msg->perm);
> }
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index b01f7c9096bf..e67404e4b23e 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -44,6 +44,7 @@ struct vdpa_mgmt_dev;
> * @config: the configuration ops for this device.
> * @index: device index
> * @features_valid: were features initialized? for legacy guests
> + * @use_va: indicate whether virtual address must be used by this device
> * @nvqs: maximum number of supported virtqueues
> * @mdev: management device pointer; caller must setup when registering device as part
> * of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
> @@ -54,6 +55,7 @@ struct vdpa_device {
> const struct vdpa_config_ops *config;
> unsigned int index;
> bool features_valid;
> + bool use_va;
> int nvqs;
> struct vdpa_mgmt_dev *mdev;
> };
> @@ -69,6 +71,16 @@ struct vdpa_iova_range {
> };
>
> /**
> + * Corresponding file area for device memory mapping
> + * @file: vma->vm_file for the mapping
> + * @offset: mapping offset in the vm_file
> + */
> +struct vdpa_map_file {
> + struct file *file;
> + u64 offset;
> +};
> +
> +/**
> * vDPA_config_ops - operations for configuring a vDPA device.
> * Note: vDPA device drivers are required to implement all of the
> * operations unless it is mentioned to be optional in the following
> @@ -250,14 +262,15 @@ struct vdpa_config_ops {
>
> struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> const struct vdpa_config_ops *config,
> - size_t size, const char *name);
> + size_t size, const char *name,
> + bool use_va);
>
> -#define vdpa_alloc_device(dev_struct, member, parent, config, name) \
> +#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \
> container_of(__vdpa_alloc_device( \
> parent, config, \
> sizeof(dev_struct) + \
> BUILD_BUG_ON_ZERO(offsetof( \
> - dev_struct, member)), name), \
> + dev_struct, member)), name, use_va), \
> dev_struct, member)
>
> int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
next prev parent reply other threads:[~2021-04-08 2:36 UTC|newest]
Thread overview: 62+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-31 8:05 [PATCH v6 00/10] Introduce VDUSE - vDPA Device in Userspace Xie Yongji
2021-03-31 8:05 ` [PATCH v6 01/10] file: Export receive_fd() to modules Xie Yongji
2021-03-31 9:15 ` Christian Brauner
2021-03-31 9:26 ` Dan Carpenter
2021-03-31 9:28 ` Christian Brauner
2021-03-31 11:32 ` Yongji Xie
2021-03-31 12:23 ` Christian Brauner
2021-03-31 13:59 ` Yongji Xie
2021-03-31 14:07 ` Christian Brauner
2021-03-31 14:37 ` Yongji Xie
2021-03-31 8:05 ` [PATCH v6 02/10] eventfd: Increase the recursion depth of eventfd_signal() Xie Yongji
2021-03-31 8:05 ` [PATCH v6 03/10] vhost-vdpa: protect concurrent access to vhost device iotlb Xie Yongji
2021-04-09 16:15 ` Michael S. Tsirkin
2021-04-11 5:36 ` Yongji Xie
2021-04-11 20:48 ` Michael S. Tsirkin
2021-04-12 2:29 ` Yongji Xie
2021-04-12 9:00 ` Michael S. Tsirkin
2021-03-31 8:05 ` [PATCH v6 04/10] vhost-iotlb: Add an opaque pointer for vhost IOTLB Xie Yongji
2021-03-31 8:05 ` [PATCH v6 05/10] vdpa: Add an opaque pointer for vdpa_config_ops.dma_map() Xie Yongji
2021-03-31 8:05 ` [PATCH v6 06/10] vdpa: factor out vhost_vdpa_pa_map() and vhost_vdpa_pa_unmap() Xie Yongji
2021-03-31 8:05 ` [PATCH v6 07/10] vdpa: Support transferring virtual addressing during DMA mapping Xie Yongji
2021-04-08 2:36 ` Jason Wang [this message]
2021-03-31 8:05 ` [PATCH v6 08/10] vduse: Implement an MMU-based IOMMU driver Xie Yongji
2021-04-08 3:25 ` Jason Wang
2021-04-08 5:27 ` Yongji Xie
2021-03-31 8:05 ` [PATCH v6 09/10] vduse: Introduce VDUSE - vDPA Device in Userspace Xie Yongji
2021-04-08 6:57 ` Jason Wang
2021-04-08 9:36 ` Yongji Xie
2021-04-09 5:36 ` Jason Wang
2021-04-09 8:02 ` Yongji Xie
2021-04-12 7:16 ` Jason Wang
2021-04-12 8:02 ` Yongji Xie
2021-04-12 9:37 ` Jason Wang
2021-04-12 9:59 ` Yongji Xie
2021-04-13 3:35 ` Jason Wang
2021-04-13 4:28 ` Yongji Xie
2021-04-14 8:18 ` Jason Wang
2021-04-16 3:24 ` Jason Wang
2021-04-16 8:43 ` Yongji Xie
2021-03-31 8:05 ` [PATCH v6 10/10] Documentation: Add documentation for VDUSE Xie Yongji
2021-04-08 7:18 ` Jason Wang
2021-04-08 8:09 ` Yongji Xie
2021-04-14 14:14 ` Stefan Hajnoczi
2021-04-15 5:38 ` Yongji Xie
2021-04-15 7:19 ` Stefan Hajnoczi
2021-04-15 8:33 ` Yongji Xie
2021-04-15 14:17 ` Stefan Hajnoczi
2021-04-15 8:36 ` Jason Wang
2021-04-15 9:04 ` Jason Wang
2021-04-15 11:17 ` Yongji Xie
2021-04-16 2:20 ` Jason Wang
2021-04-16 2:58 ` Yongji Xie
2021-04-16 3:02 ` Jason Wang
2021-04-16 3:18 ` Yongji Xie
2021-04-15 14:38 ` Stefan Hajnoczi
2021-04-16 2:23 ` Jason Wang
2021-04-16 3:19 ` Yongji Xie
2021-04-16 5:39 ` Jason Wang
2021-04-16 3:13 ` Yongji Xie
2021-04-14 7:34 ` [PATCH v6 00/10] Introduce VDUSE - vDPA Device in Userspace Michael S. Tsirkin
2021-04-14 7:49 ` Jason Wang
2021-04-14 7:54 ` Yongji Xie
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e9bdee99-49b1-3e3b-8769-6e8a9783c418@redhat.com \
--to=jasowang@redhat.com \
--cc=axboe@kernel.dk \
--cc=bcrl@kvack.org \
--cc=christian.brauner@canonical.com \
--cc=corbet@lwn.net \
--cc=dan.carpenter@oracle.com \
--cc=hch@infradead.org \
--cc=kvm@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=mika.penttila@nextfour.com \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=parav@nvidia.com \
--cc=rdunlap@infradead.org \
--cc=sgarzare@redhat.com \
--cc=stefanha@redhat.com \
--cc=viro@zeniv.linux.org.uk \
--cc=virtualization@lists.linux-foundation.org \
--cc=willy@infradead.org \
--cc=xieyongji@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).