From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S941117AbcKOF02 (ORCPT ); Tue, 15 Nov 2016 00:26:28 -0500 Received: from mail-pg0-f66.google.com ([74.125.83.66]:36476 "EHLO mail-pg0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753537AbcKOF00 (ORCPT ); Tue, 15 Nov 2016 00:26:26 -0500 Subject: Re: [PATCH v12 09/22] vfio iommu type1: Add task structure to vfio_dma To: Kirti Wankhede , alex.williamson@redhat.com, pbonzini@redhat.com, kraxel@redhat.com, cjia@nvidia.com References: <1479138156-28905-1-git-send-email-kwankhede@nvidia.com> <1479138156-28905-10-git-send-email-kwankhede@nvidia.com> Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org, kevin.tian@intel.com, jike.song@intel.com, bjsdjshi@linux.vnet.ibm.com, linux-kernel@vger.kernel.org From: Alexey Kardashevskiy Message-ID: <495eaa7c-9702-3989-dced-83528c40f80d@ozlabs.ru> Date: Tue, 15 Nov 2016 16:26:18 +1100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.4.0 MIME-Version: 1.0 In-Reply-To: <1479138156-28905-10-git-send-email-kwankhede@nvidia.com> Content-Type: text/plain; charset=koi8-r Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 15/11/16 02:42, Kirti Wankhede wrote: > Add task structure to vfio_dma structure. > During DMA_UNMAP, same task who mapped it or other task who shares same > address space is allowed to unmap, otherwise unmap fails. > QEMU maps few iova ranges initially, then fork threads and from the child > thread calls DMA_UNMAP on previously mapped iova. Since child shares same > address space, DMA_UNMAP is successful. Please add few words why you reference task instead of mm. afaict you only use mm. Thanks. > > Signed-off-by: Kirti Wankhede > Signed-off-by: Neo Jia > Change-Id: I7600f1bea6b384fd589fa72421ccf031bcfd9ac5 > --- > drivers/vfio/vfio_iommu_type1.c | 137 +++++++++++++++++++++++++--------------- > 1 file changed, 86 insertions(+), 51 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index ffe2026f1341..50aca95cf61e 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -36,6 +36,7 @@ > #include > #include > #include > +#include > > #define DRIVER_VERSION "0.2" > #define DRIVER_AUTHOR "Alex Williamson " > @@ -75,6 +76,7 @@ struct vfio_dma { > unsigned long vaddr; /* Process virtual addr */ > size_t size; /* Map size (bytes) */ > int prot; /* IOMMU_READ/WRITE */ > + struct task_struct *task; > }; > > struct vfio_group { > @@ -277,41 +279,47 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, > * the iommu can only map chunks of consecutive pfns anyway, so get the > * first page and all consecutive pages with the same locking. > */ > -static long vfio_pin_pages_remote(unsigned long vaddr, long npage, > - int prot, unsigned long *pfn_base) > +static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, > + long npage, int prot, unsigned long *pfn_base) > { > - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - bool lock_cap = capable(CAP_IPC_LOCK); > + unsigned long limit; > + bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns, > + CAP_IPC_LOCK); > + struct mm_struct *mm; > long ret, i; > bool rsvd; > > - if (!current->mm) > + mm = get_task_mm(dma->task); > + if (!mm) > return -ENODEV; > > - ret = vaddr_get_pfn(current->mm, vaddr, prot, pfn_base); > + ret = vaddr_get_pfn(mm, vaddr, prot, pfn_base); > if (ret) > - return ret; > + goto pin_pg_remote_exit; > > rsvd = is_invalid_reserved_pfn(*pfn_base); > + limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; > > - if (!rsvd && !lock_cap && current->mm->locked_vm + 1 > limit) { > + if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) { > put_pfn(*pfn_base, prot); > pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > limit << PAGE_SHIFT); > - return -ENOMEM; > + ret = -ENOMEM; > + goto pin_pg_remote_exit; > } > > if (unlikely(disable_hugepages)) { > if (!rsvd) > - vfio_lock_acct(current, 1); > - return 1; > + vfio_lock_acct(dma->task, 1); > + ret = 1; > + goto pin_pg_remote_exit; > } > > /* Lock all the consecutive pages from pfn_base */ > for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > unsigned long pfn = 0; > > - ret = vaddr_get_pfn(current->mm, vaddr, prot, &pfn); > + ret = vaddr_get_pfn(mm, vaddr, prot, &pfn); > if (ret) > break; > > @@ -321,8 +329,7 @@ static long vfio_pin_pages_remote(unsigned long vaddr, long npage, > break; > } > > - if (!rsvd && !lock_cap && > - current->mm->locked_vm + i + 1 > limit) { > + if (!rsvd && !lock_cap && mm->locked_vm + i + 1 > limit) { > put_pfn(pfn, prot); > pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > __func__, limit << PAGE_SHIFT); > @@ -331,13 +338,16 @@ static long vfio_pin_pages_remote(unsigned long vaddr, long npage, > } > > if (!rsvd) > - vfio_lock_acct(current, i); > + vfio_lock_acct(dma->task, i); > + ret = i; > > - return i; > +pin_pg_remote_exit: > + mmput(mm); > + return ret; > } > > -static long vfio_unpin_pages_remote(unsigned long pfn, long npage, > - int prot, bool do_accounting) > +static long vfio_unpin_pages_remote(struct vfio_dma *dma, unsigned long pfn, > + long npage, int prot, bool do_accounting) > { > unsigned long unlocked = 0; > long i; > @@ -346,7 +356,7 @@ static long vfio_unpin_pages_remote(unsigned long pfn, long npage, > unlocked += put_pfn(pfn++, prot); > > if (do_accounting) > - vfio_lock_acct(current, -unlocked); > + vfio_lock_acct(dma->task, -unlocked); > > return unlocked; > } > @@ -400,7 +410,7 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) > if (WARN_ON(!unmapped)) > break; > > - unlocked += vfio_unpin_pages_remote(phys >> PAGE_SHIFT, > + unlocked += vfio_unpin_pages_remote(dma, phys >> PAGE_SHIFT, > unmapped >> PAGE_SHIFT, > dma->prot, false); > iova += unmapped; > @@ -408,13 +418,14 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) > cond_resched(); > } > > - vfio_lock_acct(current, -unlocked); > + vfio_lock_acct(dma->task, -unlocked); > } > > static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) > { > vfio_unmap_unpin(iommu, dma); > vfio_unlink_dma(iommu, dma); > + put_task_struct(dma->task); > kfree(dma); > } > > @@ -510,6 +521,12 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, > while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { > if (!iommu->v2 && unmap->iova > dma->iova) > break; > + /* > + * Task with same address space who mapped this iova range is > + * allowed to unmap the iova range. > + */ > + if (dma->task->mm != current->mm) > + break; > unmapped += dma->size; > vfio_remove_dma(iommu, dma); > } > @@ -576,17 +593,55 @@ unwind: > return ret; > } > > +static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, > + size_t map_size) > +{ > + dma_addr_t iova = dma->iova; > + unsigned long vaddr = dma->vaddr; > + size_t size = map_size; > + long npage; > + unsigned long pfn; > + int ret = 0; > + > + while (size) { > + /* Pin a contiguous chunk of memory */ > + npage = vfio_pin_pages_remote(dma, vaddr + dma->size, > + size >> PAGE_SHIFT, dma->prot, > + &pfn); > + if (npage <= 0) { > + WARN_ON(!npage); > + ret = (int)npage; > + break; > + } > + > + /* Map it! */ > + ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, > + dma->prot); > + if (ret) { > + vfio_unpin_pages_remote(dma, pfn, npage, > + dma->prot, true); > + break; > + } > + > + size -= npage << PAGE_SHIFT; > + dma->size += npage << PAGE_SHIFT; > + } > + > + if (ret) > + vfio_remove_dma(iommu, dma); > + > + return ret; > +} > + > static int vfio_dma_do_map(struct vfio_iommu *iommu, > struct vfio_iommu_type1_dma_map *map) > { > dma_addr_t iova = map->iova; > unsigned long vaddr = map->vaddr; > size_t size = map->size; > - long npage; > int ret = 0, prot = 0; > uint64_t mask; > struct vfio_dma *dma; > - unsigned long pfn; > > /* Verify that none of our __u64 fields overflow */ > if (map->size != size || map->vaddr != vaddr || map->iova != iova) > @@ -612,47 +667,27 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, > mutex_lock(&iommu->lock); > > if (vfio_find_dma(iommu, iova, size)) { > - mutex_unlock(&iommu->lock); > - return -EEXIST; > + ret = -EEXIST; > + goto do_map_err; > } > > dma = kzalloc(sizeof(*dma), GFP_KERNEL); > if (!dma) { > - mutex_unlock(&iommu->lock); > - return -ENOMEM; > + ret = -ENOMEM; > + goto do_map_err; > } > > dma->iova = iova; > dma->vaddr = vaddr; > dma->prot = prot; > + get_task_struct(current); > + dma->task = current; > > /* Insert zero-sized and grow as we map chunks of it */ > vfio_link_dma(iommu, dma); > > - while (size) { > - /* Pin a contiguous chunk of memory */ > - npage = vfio_pin_pages_remote(vaddr + dma->size, > - size >> PAGE_SHIFT, prot, &pfn); > - if (npage <= 0) { > - WARN_ON(!npage); > - ret = (int)npage; > - break; > - } > - > - /* Map it! */ > - ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot); > - if (ret) { > - vfio_unpin_pages_remote(pfn, npage, prot, true); > - break; > - } > - > - size -= npage << PAGE_SHIFT; > - dma->size += npage << PAGE_SHIFT; > - } > - > - if (ret) > - vfio_remove_dma(iommu, dma); > - > + ret = vfio_pin_map_dma(iommu, dma, size); > +do_map_err: > mutex_unlock(&iommu->lock); > return ret; > } > -- Alexey From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59879) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1c6WGQ-0000cs-QC for qemu-devel@nongnu.org; Tue, 15 Nov 2016 00:26:36 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1c6WGN-0004me-Jd for qemu-devel@nongnu.org; Tue, 15 Nov 2016 00:26:34 -0500 Received: from mail-pf0-x242.google.com ([2607:f8b0:400e:c00::242]:34277) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1c6WGN-0004mW-8k for qemu-devel@nongnu.org; Tue, 15 Nov 2016 00:26:31 -0500 Received: by mail-pf0-x242.google.com with SMTP id y68so7331898pfb.1 for ; Mon, 14 Nov 2016 21:26:31 -0800 (PST) References: <1479138156-28905-1-git-send-email-kwankhede@nvidia.com> <1479138156-28905-10-git-send-email-kwankhede@nvidia.com> From: Alexey Kardashevskiy Message-ID: <495eaa7c-9702-3989-dced-83528c40f80d@ozlabs.ru> Date: Tue, 15 Nov 2016 16:26:18 +1100 MIME-Version: 1.0 In-Reply-To: <1479138156-28905-10-git-send-email-kwankhede@nvidia.com> Content-Type: text/plain; charset=koi8-r Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH v12 09/22] vfio iommu type1: Add task structure to vfio_dma List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Kirti Wankhede , alex.williamson@redhat.com, pbonzini@redhat.com, kraxel@redhat.com, cjia@nvidia.com Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org, kevin.tian@intel.com, jike.song@intel.com, bjsdjshi@linux.vnet.ibm.com, linux-kernel@vger.kernel.org On 15/11/16 02:42, Kirti Wankhede wrote: > Add task structure to vfio_dma structure. > During DMA_UNMAP, same task who mapped it or other task who shares same > address space is allowed to unmap, otherwise unmap fails. > QEMU maps few iova ranges initially, then fork threads and from the child > thread calls DMA_UNMAP on previously mapped iova. Since child shares same > address space, DMA_UNMAP is successful. Please add few words why you reference task instead of mm. afaict you only use mm. Thanks. > > Signed-off-by: Kirti Wankhede > Signed-off-by: Neo Jia > Change-Id: I7600f1bea6b384fd589fa72421ccf031bcfd9ac5 > --- > drivers/vfio/vfio_iommu_type1.c | 137 +++++++++++++++++++++++++--------------- > 1 file changed, 86 insertions(+), 51 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index ffe2026f1341..50aca95cf61e 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -36,6 +36,7 @@ > #include > #include > #include > +#include > > #define DRIVER_VERSION "0.2" > #define DRIVER_AUTHOR "Alex Williamson " > @@ -75,6 +76,7 @@ struct vfio_dma { > unsigned long vaddr; /* Process virtual addr */ > size_t size; /* Map size (bytes) */ > int prot; /* IOMMU_READ/WRITE */ > + struct task_struct *task; > }; > > struct vfio_group { > @@ -277,41 +279,47 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, > * the iommu can only map chunks of consecutive pfns anyway, so get the > * first page and all consecutive pages with the same locking. > */ > -static long vfio_pin_pages_remote(unsigned long vaddr, long npage, > - int prot, unsigned long *pfn_base) > +static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, > + long npage, int prot, unsigned long *pfn_base) > { > - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - bool lock_cap = capable(CAP_IPC_LOCK); > + unsigned long limit; > + bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns, > + CAP_IPC_LOCK); > + struct mm_struct *mm; > long ret, i; > bool rsvd; > > - if (!current->mm) > + mm = get_task_mm(dma->task); > + if (!mm) > return -ENODEV; > > - ret = vaddr_get_pfn(current->mm, vaddr, prot, pfn_base); > + ret = vaddr_get_pfn(mm, vaddr, prot, pfn_base); > if (ret) > - return ret; > + goto pin_pg_remote_exit; > > rsvd = is_invalid_reserved_pfn(*pfn_base); > + limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; > > - if (!rsvd && !lock_cap && current->mm->locked_vm + 1 > limit) { > + if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) { > put_pfn(*pfn_base, prot); > pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > limit << PAGE_SHIFT); > - return -ENOMEM; > + ret = -ENOMEM; > + goto pin_pg_remote_exit; > } > > if (unlikely(disable_hugepages)) { > if (!rsvd) > - vfio_lock_acct(current, 1); > - return 1; > + vfio_lock_acct(dma->task, 1); > + ret = 1; > + goto pin_pg_remote_exit; > } > > /* Lock all the consecutive pages from pfn_base */ > for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > unsigned long pfn = 0; > > - ret = vaddr_get_pfn(current->mm, vaddr, prot, &pfn); > + ret = vaddr_get_pfn(mm, vaddr, prot, &pfn); > if (ret) > break; > > @@ -321,8 +329,7 @@ static long vfio_pin_pages_remote(unsigned long vaddr, long npage, > break; > } > > - if (!rsvd && !lock_cap && > - current->mm->locked_vm + i + 1 > limit) { > + if (!rsvd && !lock_cap && mm->locked_vm + i + 1 > limit) { > put_pfn(pfn, prot); > pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > __func__, limit << PAGE_SHIFT); > @@ -331,13 +338,16 @@ static long vfio_pin_pages_remote(unsigned long vaddr, long npage, > } > > if (!rsvd) > - vfio_lock_acct(current, i); > + vfio_lock_acct(dma->task, i); > + ret = i; > > - return i; > +pin_pg_remote_exit: > + mmput(mm); > + return ret; > } > > -static long vfio_unpin_pages_remote(unsigned long pfn, long npage, > - int prot, bool do_accounting) > +static long vfio_unpin_pages_remote(struct vfio_dma *dma, unsigned long pfn, > + long npage, int prot, bool do_accounting) > { > unsigned long unlocked = 0; > long i; > @@ -346,7 +356,7 @@ static long vfio_unpin_pages_remote(unsigned long pfn, long npage, > unlocked += put_pfn(pfn++, prot); > > if (do_accounting) > - vfio_lock_acct(current, -unlocked); > + vfio_lock_acct(dma->task, -unlocked); > > return unlocked; > } > @@ -400,7 +410,7 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) > if (WARN_ON(!unmapped)) > break; > > - unlocked += vfio_unpin_pages_remote(phys >> PAGE_SHIFT, > + unlocked += vfio_unpin_pages_remote(dma, phys >> PAGE_SHIFT, > unmapped >> PAGE_SHIFT, > dma->prot, false); > iova += unmapped; > @@ -408,13 +418,14 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) > cond_resched(); > } > > - vfio_lock_acct(current, -unlocked); > + vfio_lock_acct(dma->task, -unlocked); > } > > static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) > { > vfio_unmap_unpin(iommu, dma); > vfio_unlink_dma(iommu, dma); > + put_task_struct(dma->task); > kfree(dma); > } > > @@ -510,6 +521,12 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, > while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { > if (!iommu->v2 && unmap->iova > dma->iova) > break; > + /* > + * Task with same address space who mapped this iova range is > + * allowed to unmap the iova range. > + */ > + if (dma->task->mm != current->mm) > + break; > unmapped += dma->size; > vfio_remove_dma(iommu, dma); > } > @@ -576,17 +593,55 @@ unwind: > return ret; > } > > +static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, > + size_t map_size) > +{ > + dma_addr_t iova = dma->iova; > + unsigned long vaddr = dma->vaddr; > + size_t size = map_size; > + long npage; > + unsigned long pfn; > + int ret = 0; > + > + while (size) { > + /* Pin a contiguous chunk of memory */ > + npage = vfio_pin_pages_remote(dma, vaddr + dma->size, > + size >> PAGE_SHIFT, dma->prot, > + &pfn); > + if (npage <= 0) { > + WARN_ON(!npage); > + ret = (int)npage; > + break; > + } > + > + /* Map it! */ > + ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, > + dma->prot); > + if (ret) { > + vfio_unpin_pages_remote(dma, pfn, npage, > + dma->prot, true); > + break; > + } > + > + size -= npage << PAGE_SHIFT; > + dma->size += npage << PAGE_SHIFT; > + } > + > + if (ret) > + vfio_remove_dma(iommu, dma); > + > + return ret; > +} > + > static int vfio_dma_do_map(struct vfio_iommu *iommu, > struct vfio_iommu_type1_dma_map *map) > { > dma_addr_t iova = map->iova; > unsigned long vaddr = map->vaddr; > size_t size = map->size; > - long npage; > int ret = 0, prot = 0; > uint64_t mask; > struct vfio_dma *dma; > - unsigned long pfn; > > /* Verify that none of our __u64 fields overflow */ > if (map->size != size || map->vaddr != vaddr || map->iova != iova) > @@ -612,47 +667,27 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, > mutex_lock(&iommu->lock); > > if (vfio_find_dma(iommu, iova, size)) { > - mutex_unlock(&iommu->lock); > - return -EEXIST; > + ret = -EEXIST; > + goto do_map_err; > } > > dma = kzalloc(sizeof(*dma), GFP_KERNEL); > if (!dma) { > - mutex_unlock(&iommu->lock); > - return -ENOMEM; > + ret = -ENOMEM; > + goto do_map_err; > } > > dma->iova = iova; > dma->vaddr = vaddr; > dma->prot = prot; > + get_task_struct(current); > + dma->task = current; > > /* Insert zero-sized and grow as we map chunks of it */ > vfio_link_dma(iommu, dma); > > - while (size) { > - /* Pin a contiguous chunk of memory */ > - npage = vfio_pin_pages_remote(vaddr + dma->size, > - size >> PAGE_SHIFT, prot, &pfn); > - if (npage <= 0) { > - WARN_ON(!npage); > - ret = (int)npage; > - break; > - } > - > - /* Map it! */ > - ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot); > - if (ret) { > - vfio_unpin_pages_remote(pfn, npage, prot, true); > - break; > - } > - > - size -= npage << PAGE_SHIFT; > - dma->size += npage << PAGE_SHIFT; > - } > - > - if (ret) > - vfio_remove_dma(iommu, dma); > - > + ret = vfio_pin_map_dma(iommu, dma, size); > +do_map_err: > mutex_unlock(&iommu->lock); > return ret; > } > -- Alexey