All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yongji Xie <xieyongji@bytedance.com>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>,
	Liu Xiaodong <xiaodong.liu@intel.com>,
	Maxime Coquelin <maxime.coquelin@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	virtualization <virtualization@lists.linux-foundation.org>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	songmuchun@bytedance.com
Subject: Re: [PATCH 5/6] vduse: Support registering userspace memory for IOTLB
Date: Wed, 29 Jun 2022 18:19:31 +0800	[thread overview]
Message-ID: <CACycT3vaNLYRid5SsT11LuVCaGXbBfV=q7c7SUp1+r9BcRpwkw@mail.gmail.com> (raw)
In-Reply-To: <20220629055241-mutt-send-email-mst@kernel.org>

On Wed, Jun 29, 2022 at 5:54 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Jun 29, 2022 at 05:26:04PM +0800, Yongji Xie wrote:
> > On Wed, Jun 29, 2022 at 4:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Jun 29, 2022 at 04:25:40PM +0800, Xie Yongji wrote:
> > > > Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and
> > > > VDUSE_IOTLB_DEREG_UMEM to support registering
> > > > and de-registering userspace memory for IOTLB
> > > > in virtio-vdpa case.
> > > >
> > > > Now it only supports registering userspace memory
> > > > for IOTLB as bounce buffer.
> > > >
> > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > > ---
> > > >  drivers/vdpa/vdpa_user/vduse_dev.c | 138 +++++++++++++++++++++++++++++
> > > >  include/uapi/linux/vduse.h         |  28 ++++++
> > > >  2 files changed, 166 insertions(+)
> > > >
> > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > index c47a5d9765cf..7b2ea7612da9 100644
> > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > @@ -21,6 +21,7 @@
> > > >  #include <linux/uio.h>
> > > >  #include <linux/vdpa.h>
> > > >  #include <linux/nospec.h>
> > > > +#include <linux/sched/mm.h>
> > > >  #include <uapi/linux/vduse.h>
> > > >  #include <uapi/linux/vdpa.h>
> > > >  #include <uapi/linux/virtio_config.h>
> > > > @@ -64,6 +65,13 @@ struct vduse_vdpa {
> > > >       struct vduse_dev *dev;
> > > >  };
> > > >
> > > > +struct vduse_iotlb_mem {
> > > > +     unsigned long iova;
> > > > +     unsigned long npages;
> > > > +     struct page **pages;
> > > > +     struct mm_struct *mm;
> > > > +};
> > > > +
> > > >  struct vduse_dev {
> > > >       struct vduse_vdpa *vdev;
> > > >       struct device *dev;
> > > > @@ -95,6 +103,8 @@ struct vduse_dev {
> > > >       u8 status;
> > > >       u32 vq_num;
> > > >       u32 vq_align;
> > > > +     struct vduse_iotlb_mem *iotlb_mem;
> > > > +     struct mutex mem_lock;
> > > >  };
> > > >
> > > >  struct vduse_dev_msg {
> > > > @@ -917,6 +927,100 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > > >       return ret;
> > > >  }
> > > >
> > > > +static int vduse_dev_dereg_iotlb_mem(struct vduse_dev *dev,
> > > > +                                  u64 iova, u64 size)
> > > > +{
> > > > +     int ret;
> > > > +
> > > > +     mutex_lock(&dev->mem_lock);
> > > > +     ret = -ENOENT;
> > > > +     if (!dev->iotlb_mem)
> > > > +             goto unlock;
> > > > +
> > > > +     ret = -EINVAL;
> > > > +     if (dev->iotlb_mem->iova != iova || size != dev->domain->bounce_size)
> > > > +             goto unlock;
> > > > +
> > > > +     vduse_domain_remove_user_bounce_pages(dev->domain);
> > > > +     unpin_user_pages(dev->iotlb_mem->pages, dev->iotlb_mem->npages);
> > >
> > > I notice you don't mark the pages dirty. This is going to be a problem.
> > >
> >
> > Thanks for pointing out this, I will use unpin_user_pages_dirty_lock() instead.
> >
> > > > +     atomic64_sub(dev->iotlb_mem->npages, &dev->iotlb_mem->mm->pinned_vm);
> > > > +     mmdrop(dev->iotlb_mem->mm);
> > > > +     vfree(dev->iotlb_mem->pages);
> > > > +     kfree(dev->iotlb_mem);
> > > > +     dev->iotlb_mem = NULL;
> > > > +     ret = 0;
> > > > +unlock:
> > > > +     mutex_unlock(&dev->mem_lock);
> > > > +     return ret;
> > > > +}
> > > > +
> > > > +static int vduse_dev_reg_iotlb_mem(struct vduse_dev *dev,
> > > > +                                u64 iova, u64 uaddr, u64 size)
> > > > +{
> > > > +     struct page **page_list = NULL;
> > > > +     struct vduse_iotlb_mem *mem = NULL;
> > > > +     long pinned = 0;
> > > > +     unsigned long npages, lock_limit;
> > > > +     int ret;
> > > > +
> > > > +     if (size != dev->domain->bounce_size ||
> > > > +         iova != 0 || uaddr & ~PAGE_MASK)
> > > > +             return -EINVAL;
> > > > +
> > > > +     mutex_lock(&dev->mem_lock);
> > > > +     ret = -EEXIST;
> > > > +     if (dev->iotlb_mem)
> > > > +             goto unlock;
> > > > +
> > > > +     ret = -ENOMEM;
> > > > +     npages = size >> PAGE_SHIFT;
> > > > +     page_list = vmalloc(array_size(npages,
> > > > +                         sizeof(struct page *)));
> > >
> > > Is this basically trying to do a vmalloc with userspace-controlled size?
> > > That's an easy DOS vector.
> > >
> >
> > We already checked the size before. The size must equal to (64MB >>
> > PAGE_SHIFT) now.
>
> That's not a small amount. Can this be accounted e.g. through cgroups at least?
>

Make sense, will use __vmalloc(__GFP_ACCOUNT) instead.

> > > > +     mem = kzalloc(sizeof(*mem), GFP_KERNEL);
> > > > +     if (!page_list || !mem)
> > > > +             goto unlock;
> > > > +
> > > > +     mmap_read_lock(current->mm);
> > > > +
> > > > +     lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
> > > > +     if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
> > > > +             goto out;
> > > > +
> > > > +     pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
> > > > +                             page_list, NULL);
> > > > +     if (pinned != npages) {
> > > > +             ret = pinned < 0 ? pinned : -ENOMEM;
> > > > +             goto out;
> > > > +     }
> > >
> > >
> > > This is a popular approach but it's problematic if multiple
> > > devices try to pin the same page.
> >
> > Do you mean the data would be corrupted if multiple devices use the
> > same page as bounce buffer? This is indeed a problem.
>
> No i mean you decrement the lock twice. Question is can two bounce
> buffers share a page?
>

I think we can't. I will find a way to prevent it.

Thanks,
Yongji

  reply	other threads:[~2022-06-29 10:19 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-29  8:25 [PATCH 0/6] VDUSE: Support registering userspace memory as bounce buffer Xie Yongji
2022-06-29  8:25 ` [PATCH 1/6] vduse: Remove unnecessary spin lock protection Xie Yongji
2022-06-29  8:25 ` [PATCH 2/6] vduse: Use memcpy_{to,from}_page() in do_bounce() Xie Yongji
2022-06-29  8:25 ` [PATCH 3/6] vduse: Support using userspace pages as bounce buffer Xie Yongji
2022-06-29  8:25 ` [PATCH 4/6] vduse: Support querying IOLTB information Xie Yongji
2022-06-29  8:25 ` [PATCH 5/6] vduse: Support registering userspace memory for IOTLB Xie Yongji
2022-06-29  8:42   ` Michael S. Tsirkin
2022-06-29  8:42     ` Michael S. Tsirkin
2022-06-29  9:26     ` Yongji Xie
2022-06-29  9:54       ` Michael S. Tsirkin
2022-06-29  9:54         ` Michael S. Tsirkin
2022-06-29 10:19         ` Yongji Xie [this message]
2022-06-29 11:28           ` Michael S. Tsirkin
2022-06-29 11:28             ` Michael S. Tsirkin
2022-06-29  8:25 ` [PATCH 6/6] vduse: Update api version to 1 Xie Yongji
2022-06-29  8:33   ` Michael S. Tsirkin
2022-06-29  8:33     ` Michael S. Tsirkin
2022-06-29  9:02     ` Yongji Xie
2022-06-29  9:22       ` Michael S. Tsirkin
2022-06-29  9:22         ` Michael S. Tsirkin
2022-06-29  9:28         ` Yongji Xie
2022-07-04  9:26 ` [PATCH 0/6] VDUSE: Support registering userspace memory as bounce buffer Liu Xiaodong
2022-07-04  9:26   ` Liu Xiaodong
2022-07-04 10:02   ` Yongji Xie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CACycT3vaNLYRid5SsT11LuVCaGXbBfV=q7c7SUp1+r9BcRpwkw@mail.gmail.com' \
    --to=xieyongji@bytedance.com \
    --cc=jasowang@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maxime.coquelin@redhat.com \
    --cc=mst@redhat.com \
    --cc=songmuchun@bytedance.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=xiaodong.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.