Hi, In patch series[1], I put forward some fixes and optimizations for vfio_iommu_type1. This extracts and improves the optimization part of it. [1] https://lore.kernel.org/linux-iommu/20201210073425.25960-1-zhukeqian1@huawei.com/T/#t Thanks, Keqian Keqian Zhu (6): vfio/iommu_type1: Make an explicit "promote" semantic vfio/iommu_type1: Ignore external domain when promote pinned_scope vfio/iommu_type1: Initially set the pinned_page_dirty_scope vfio/iommu_type1: Drop parameter "pgsize" of vfio_dma_bitmap_alloc_all vfio/iommu_type1: Drop parameter "pgsize" of vfio_iova_dirty_bitmap vfio/iommu_type1: Drop parameter "pgsize" of update_user_bitmap drivers/vfio/vfio_iommu_type1.c | 67 +++++++++++++-------------------- 1 file changed, 26 insertions(+), 41 deletions(-) -- 2.19.1
When we want to promote the pinned_page_dirty_scope of vfio_iommu, we call the "update" function to visit all vfio_group, but when we want to downgrade this, we can set the flag as false directly. So we'd better make an explicit "promote" semantic to the "update" function. BTW, if vfio_iommu already has been promoted, then return early. Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> --- drivers/vfio/vfio_iommu_type1.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0b4dedaa9128..334a8240e1da 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -148,7 +148,7 @@ static int put_pfn(unsigned long pfn, int prot); static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, struct iommu_group *iommu_group); -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); +static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu); /* * This code handles mapping and unmapping of user data buffers * into DMA'ble space using the IOMMU @@ -714,7 +714,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, group = vfio_iommu_find_iommu_group(iommu, iommu_group); if (!group->pinned_page_dirty_scope) { group->pinned_page_dirty_scope = true; - update_pinned_page_dirty_scope(iommu); + promote_pinned_page_dirty_scope(iommu); } goto pin_done; @@ -1622,27 +1622,26 @@ static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, return group; } -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) +static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu) { struct vfio_domain *domain; struct vfio_group *group; + if (iommu->pinned_page_dirty_scope) + return; + list_for_each_entry(domain, &iommu->domain_list, next) { list_for_each_entry(group, &domain->group_list, next) { - if (!group->pinned_page_dirty_scope) { - iommu->pinned_page_dirty_scope = false; + if (!group->pinned_page_dirty_scope) return; - } } } if (iommu->external_domain) { domain = iommu->external_domain; list_for_each_entry(group, &domain->group_list, next) { - if (!group->pinned_page_dirty_scope) { - iommu->pinned_page_dirty_scope = false; + if (!group->pinned_page_dirty_scope) return; - } } } @@ -2057,8 +2056,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, * addition of a dirty tracking group. */ group->pinned_page_dirty_scope = true; - if (!iommu->pinned_page_dirty_scope) - update_pinned_page_dirty_scope(iommu); + promote_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); return 0; @@ -2341,7 +2339,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, struct vfio_iommu *iommu = iommu_data; struct vfio_domain *domain; struct vfio_group *group; - bool update_dirty_scope = false; + bool promote_dirty_scope = false; LIST_HEAD(iova_copy); mutex_lock(&iommu->lock); @@ -2349,7 +2347,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (iommu->external_domain) { group = find_iommu_group(iommu->external_domain, iommu_group); if (group) { - update_dirty_scope = !group->pinned_page_dirty_scope; + promote_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); @@ -2379,7 +2377,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, continue; vfio_iommu_detach_group(domain, group); - update_dirty_scope = !group->pinned_page_dirty_scope; + promote_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); /* @@ -2415,8 +2413,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, * Removal of a group without dirty tracking may allow the iommu scope * to be promoted. */ - if (update_dirty_scope) - update_pinned_page_dirty_scope(iommu); + if (promote_dirty_scope) + promote_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); } -- 2.19.1
The pinned_scope of external domain's groups are always true, that's to say we can safely ignore external domain when promote pinned_scope status of vfio_iommu. Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> --- drivers/vfio/vfio_iommu_type1.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 334a8240e1da..110ada24ee91 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1637,14 +1637,7 @@ static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu) } } - if (iommu->external_domain) { - domain = iommu->external_domain; - list_for_each_entry(group, &domain->group_list, next) { - if (!group->pinned_page_dirty_scope) - return; - } - } - + /* The external domain always passes check */ iommu->pinned_page_dirty_scope = true; } @@ -2347,7 +2340,6 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (iommu->external_domain) { group = find_iommu_group(iommu->external_domain, iommu_group); if (group) { - promote_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); @@ -2360,7 +2352,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, kfree(iommu->external_domain); iommu->external_domain = NULL; } - goto detach_group_done; + mutex_unlock(&iommu->lock); + return; } } @@ -2408,7 +2401,6 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, else vfio_iommu_iova_free(&iova_copy); -detach_group_done: /* * Removal of a group without dirty tracking may allow the iommu scope * to be promoted. -- 2.19.1
For now there are 3 ways to promote the pinned_page_dirty_scope status of vfio_iommu: 1. Through vfio pin interface. 2. Detach a group without pinned_dirty_scope. 3. Attach a group with pinned_dirty_scope. For point 3, the only chance to promote the pinned_page_dirty_scope status is when vfio_iommu is newly created. As we can safely set empty vfio_iommu to be at pinned status, then the point 3 can be removed to reduce operations. Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> --- drivers/vfio/vfio_iommu_type1.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 110ada24ee91..b596c482487b 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2045,11 +2045,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, * Non-iommu backed group cannot dirty memory directly, * it can only use interfaces that provide dirty * tracking. - * The iommu scope can only be promoted with the - * addition of a dirty tracking group. */ group->pinned_page_dirty_scope = true; - promote_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); return 0; @@ -2436,6 +2433,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->iova_list); iommu->dma_list = RB_ROOT; iommu->dma_avail = dma_entry_limit; + iommu->pinned_page_dirty_scope = true; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); -- 2.19.1
We always use the smallest supported page size of vfio_iommu as pgsize. Remove parameter "pgsize" of vfio_dma_bitmap_alloc_all. Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> --- drivers/vfio/vfio_iommu_type1.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index b596c482487b..080c05b129ee 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -236,9 +236,10 @@ static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize) } } -static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize) +static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu) { struct rb_node *n; + size_t pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); @@ -2761,12 +2762,9 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, return -EINVAL; if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { - size_t pgsize; - mutex_lock(&iommu->lock); - pgsize = 1 << __ffs(iommu->pgsize_bitmap); if (!iommu->dirty_page_tracking) { - ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); + ret = vfio_dma_bitmap_alloc_all(iommu); if (!ret) iommu->dirty_page_tracking = true; } -- 2.19.1
We always use the smallest supported page size of vfio_iommu as pgsize. Remove parameter "pgsize" of vfio_iova_dirty_bitmap. Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> --- drivers/vfio/vfio_iommu_type1.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 080c05b129ee..82649a040148 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1015,11 +1015,12 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, } static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, - dma_addr_t iova, size_t size, size_t pgsize) + dma_addr_t iova, size_t size) { struct vfio_dma *dma; struct rb_node *n; - unsigned long pgshift = __ffs(pgsize); + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); + size_t pgsize = (size_t)1 << pgshift; int ret; /* @@ -2824,8 +2825,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, if (iommu->dirty_page_tracking) ret = vfio_iova_dirty_bitmap(range.bitmap.data, iommu, range.iova, - range.size, - range.bitmap.pgsize); + range.size); else ret = -EINVAL; out_unlock: -- 2.19.1
We always use the smallest supported page size of vfio_iommu as pgsize. Drop parameter "pgsize" of update_user_bitmap. Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> --- drivers/vfio/vfio_iommu_type1.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 82649a040148..bceda5e8baaa 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -978,10 +978,9 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu) } static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, - struct vfio_dma *dma, dma_addr_t base_iova, - size_t pgsize) + struct vfio_dma *dma, dma_addr_t base_iova) { - unsigned long pgshift = __ffs(pgsize); + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); unsigned long nbits = dma->size >> pgshift; unsigned long bit_offset = (dma->iova - base_iova) >> pgshift; unsigned long copy_offset = bit_offset / BITS_PER_LONG; @@ -1046,7 +1045,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (dma->iova > iova + size - 1) break; - ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize); + ret = update_user_bitmap(bitmap, iommu, dma, iova); if (ret) return ret; @@ -1192,7 +1191,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { ret = update_user_bitmap(bitmap->data, iommu, dma, - unmap->iova, pgsize); + unmap->iova); if (ret) break; } -- 2.19.1
On Thu, 7 Jan 2021 12:43:56 +0800 Keqian Zhu <zhukeqian1@huawei.com> wrote: > When we want to promote the pinned_page_dirty_scope of vfio_iommu, > we call the "update" function to visit all vfio_group, but when we > want to downgrade this, we can set the flag as false directly. I agree that the transition can only go in one direction, but it's still conditional on the scope of all groups involved. We are "updating" the iommu state based on the change of a group. Renaming this to "promote" seems like a matter of personal preference. > So we'd better make an explicit "promote" semantic to the "update" > function. BTW, if vfio_iommu already has been promoted, then return > early. Currently it's the caller that avoids using this function when the iommu scope is already correct. In fact the changes induces a redundant test in the pin_pages code path, we're changing a group from non-pinned-page-scope to pinned-page-scope, therefore the iommu scope cannot initially be scope limited. In the attach_group call path, we're moving that test from the caller, so at best we've introduced an additional function call. The function as it exists today is also more versatile whereas the "promote" version here forces it to a single task with no appreciable difference in complexity or code. This seems like a frivolous change. Thanks, Alex > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> > --- > drivers/vfio/vfio_iommu_type1.c | 30 ++++++++++++++---------------- > 1 file changed, 14 insertions(+), 16 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 0b4dedaa9128..334a8240e1da 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -148,7 +148,7 @@ static int put_pfn(unsigned long pfn, int prot); > static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > struct iommu_group *iommu_group); > > -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); > +static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu); > /* > * This code handles mapping and unmapping of user data buffers > * into DMA'ble space using the IOMMU > @@ -714,7 +714,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > group = vfio_iommu_find_iommu_group(iommu, iommu_group); > if (!group->pinned_page_dirty_scope) { > group->pinned_page_dirty_scope = true; > - update_pinned_page_dirty_scope(iommu); > + promote_pinned_page_dirty_scope(iommu); > } > > goto pin_done; > @@ -1622,27 +1622,26 @@ static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > return group; > } > > -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) > +static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu) > { > struct vfio_domain *domain; > struct vfio_group *group; > > + if (iommu->pinned_page_dirty_scope) > + return; > + > list_for_each_entry(domain, &iommu->domain_list, next) { > list_for_each_entry(group, &domain->group_list, next) { > - if (!group->pinned_page_dirty_scope) { > - iommu->pinned_page_dirty_scope = false; > + if (!group->pinned_page_dirty_scope) > return; > - } > } > } > > if (iommu->external_domain) { > domain = iommu->external_domain; > list_for_each_entry(group, &domain->group_list, next) { > - if (!group->pinned_page_dirty_scope) { > - iommu->pinned_page_dirty_scope = false; > + if (!group->pinned_page_dirty_scope) > return; > - } > } > } > > @@ -2057,8 +2056,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > * addition of a dirty tracking group. > */ > group->pinned_page_dirty_scope = true; > - if (!iommu->pinned_page_dirty_scope) > - update_pinned_page_dirty_scope(iommu); > + promote_pinned_page_dirty_scope(iommu); > mutex_unlock(&iommu->lock); > > return 0; > @@ -2341,7 +2339,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > struct vfio_iommu *iommu = iommu_data; > struct vfio_domain *domain; > struct vfio_group *group; > - bool update_dirty_scope = false; > + bool promote_dirty_scope = false; > LIST_HEAD(iova_copy); > > mutex_lock(&iommu->lock); > @@ -2349,7 +2347,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > if (iommu->external_domain) { > group = find_iommu_group(iommu->external_domain, iommu_group); > if (group) { > - update_dirty_scope = !group->pinned_page_dirty_scope; > + promote_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > > @@ -2379,7 +2377,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > continue; > > vfio_iommu_detach_group(domain, group); > - update_dirty_scope = !group->pinned_page_dirty_scope; > + promote_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > /* > @@ -2415,8 +2413,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > * Removal of a group without dirty tracking may allow the iommu scope > * to be promoted. > */ > - if (update_dirty_scope) > - update_pinned_page_dirty_scope(iommu); > + if (promote_dirty_scope) > + promote_pinned_page_dirty_scope(iommu); > mutex_unlock(&iommu->lock); > } >
On Thu, 7 Jan 2021 12:43:57 +0800 Keqian Zhu <zhukeqian1@huawei.com> wrote: > The pinned_scope of external domain's groups are always true, that's > to say we can safely ignore external domain when promote pinned_scope > status of vfio_iommu. > > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> > --- > drivers/vfio/vfio_iommu_type1.c | 14 +++----------- > 1 file changed, 3 insertions(+), 11 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 334a8240e1da..110ada24ee91 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -1637,14 +1637,7 @@ static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu) > } > } > > - if (iommu->external_domain) { > - domain = iommu->external_domain; > - list_for_each_entry(group, &domain->group_list, next) { > - if (!group->pinned_page_dirty_scope) > - return; > - } > - } > - > + /* The external domain always passes check */ > iommu->pinned_page_dirty_scope = true; > } > > @@ -2347,7 +2340,6 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > if (iommu->external_domain) { > group = find_iommu_group(iommu->external_domain, iommu_group); > if (group) { > - promote_dirty_scope = !group->pinned_page_dirty_scope; With this, vfio_group.pinned_page_dirty_scope is effectively a dead field on the struct for groups on the external_domain group list and handled specially. That's not great. If you actually want to make more than a trivial improvement to scope tracking, what about making a counter on our struct vfio_iommu for all the non-pinned-page (ie. all-dma) scope groups attached to the container. Groups on the external domain would still set their group dirty scope to pinned pages, groups making use of an iommu domain would have an all-dma scope initially and increment that counter when attached. Groups that still have an all-dma scope on detach would decrement the counter. If a group changes from all-dma to pinned-page scope, the counter is also decremented. We'd never need to search across group lists. Thanks, Alex > list_del(&group->next); > kfree(group); > > @@ -2360,7 +2352,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > kfree(iommu->external_domain); > iommu->external_domain = NULL; > } > - goto detach_group_done; > + mutex_unlock(&iommu->lock); > + return; > } > } > > @@ -2408,7 +2401,6 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > else > vfio_iommu_iova_free(&iova_copy); > > -detach_group_done: > /* > * Removal of a group without dirty tracking may allow the iommu scope > * to be promoted.
On Thu, 7 Jan 2021 12:43:58 +0800
Keqian Zhu <zhukeqian1@huawei.com> wrote:
> For now there are 3 ways to promote the pinned_page_dirty_scope
> status of vfio_iommu:
>
> 1. Through vfio pin interface.
> 2. Detach a group without pinned_dirty_scope.
> 3. Attach a group with pinned_dirty_scope.
>
> For point 3, the only chance to promote the pinned_page_dirty_scope
> status is when vfio_iommu is newly created. As we can safely set
> empty vfio_iommu to be at pinned status, then the point 3 can be
> removed to reduce operations.
>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> ---
> drivers/vfio/vfio_iommu_type1.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 110ada24ee91..b596c482487b 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2045,11 +2045,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
> * Non-iommu backed group cannot dirty memory directly,
> * it can only use interfaces that provide dirty
> * tracking.
> - * The iommu scope can only be promoted with the
> - * addition of a dirty tracking group.
> */
> group->pinned_page_dirty_scope = true;
> - promote_pinned_page_dirty_scope(iommu);
> mutex_unlock(&iommu->lock);
>
> return 0;
> @@ -2436,6 +2433,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
> INIT_LIST_HEAD(&iommu->iova_list);
> iommu->dma_list = RB_ROOT;
> iommu->dma_avail = dma_entry_limit;
> + iommu->pinned_page_dirty_scope = true;
> mutex_init(&iommu->lock);
> BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
>
This would be resolved automatically if we used the counter approach I
mentioned on the previous patch, adding a pinned-page scope group simply
wouldn't increment the iommu counter, which would initially be zero
indicating no "all-dma" groups. Thanks,
Alex
On Thu, 7 Jan 2021 12:43:59 +0800
Keqian Zhu <zhukeqian1@huawei.com> wrote:
> We always use the smallest supported page size of vfio_iommu as
> pgsize. Remove parameter "pgsize" of vfio_dma_bitmap_alloc_all.
>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> ---
> drivers/vfio/vfio_iommu_type1.c | 8 +++-----
> 1 file changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index b596c482487b..080c05b129ee 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -236,9 +236,10 @@ static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize)
> }
> }
>
> -static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
> +static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu)
> {
> struct rb_node *n;
> + size_t pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
>
> for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
> struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
> @@ -2761,12 +2762,9 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
> return -EINVAL;
>
> if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) {
> - size_t pgsize;
> -
> mutex_lock(&iommu->lock);
> - pgsize = 1 << __ffs(iommu->pgsize_bitmap);
> if (!iommu->dirty_page_tracking) {
> - ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
> + ret = vfio_dma_bitmap_alloc_all(iommu);
> if (!ret)
> iommu->dirty_page_tracking = true;
> }
This just moves the same calculation from one place to another, what's
the point? Thanks,
Alex
On Thu, 7 Jan 2021 12:44:00 +0800
Keqian Zhu <zhukeqian1@huawei.com> wrote:
> We always use the smallest supported page size of vfio_iommu as
> pgsize. Remove parameter "pgsize" of vfio_iova_dirty_bitmap.
>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> ---
> drivers/vfio/vfio_iommu_type1.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 080c05b129ee..82649a040148 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -1015,11 +1015,12 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
> }
>
> static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
> - dma_addr_t iova, size_t size, size_t pgsize)
> + dma_addr_t iova, size_t size)
> {
> struct vfio_dma *dma;
> struct rb_node *n;
> - unsigned long pgshift = __ffs(pgsize);
> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
> + size_t pgsize = (size_t)1 << pgshift;
> int ret;
>
> /*
> @@ -2824,8 +2825,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
> if (iommu->dirty_page_tracking)
> ret = vfio_iova_dirty_bitmap(range.bitmap.data,
> iommu, range.iova,
> - range.size,
> - range.bitmap.pgsize);
> + range.size);
> else
> ret = -EINVAL;
> out_unlock:
In this case the caller has actually already calculated both pgsize and
pgshift, the better optimization would be to pass both rather than
recalculate. Thanks,
Alex
On Thu, 7 Jan 2021 12:44:01 +0800
Keqian Zhu <zhukeqian1@huawei.com> wrote:
> We always use the smallest supported page size of vfio_iommu as
> pgsize. Drop parameter "pgsize" of update_user_bitmap.
>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> ---
> drivers/vfio/vfio_iommu_type1.c | 9 ++++-----
> 1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 82649a040148..bceda5e8baaa 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -978,10 +978,9 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu)
> }
>
> static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
> - struct vfio_dma *dma, dma_addr_t base_iova,
> - size_t pgsize)
> + struct vfio_dma *dma, dma_addr_t base_iova)
> {
> - unsigned long pgshift = __ffs(pgsize);
> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
> unsigned long nbits = dma->size >> pgshift;
> unsigned long bit_offset = (dma->iova - base_iova) >> pgshift;
> unsigned long copy_offset = bit_offset / BITS_PER_LONG;
> @@ -1046,7 +1045,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
> if (dma->iova > iova + size - 1)
> break;
>
> - ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
> + ret = update_user_bitmap(bitmap, iommu, dma, iova);
> if (ret)
> return ret;
>
> @@ -1192,7 +1191,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>
> if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
> ret = update_user_bitmap(bitmap->data, iommu, dma,
> - unmap->iova, pgsize);
> + unmap->iova);
> if (ret)
> break;
> }
Same as the previous, both call sites already have both pgsize and
pgshift, pass both rather than recalculate. Thanks,
Alex
On 2021/1/16 6:42, Alex Williamson wrote: > On Thu, 7 Jan 2021 12:43:56 +0800 > Keqian Zhu <zhukeqian1@huawei.com> wrote: > >> When we want to promote the pinned_page_dirty_scope of vfio_iommu, >> we call the "update" function to visit all vfio_group, but when we >> want to downgrade this, we can set the flag as false directly. > > I agree that the transition can only go in one direction, but it's > still conditional on the scope of all groups involved. We are > "updating" the iommu state based on the change of a group. Renaming > this to "promote" seems like a matter of personal preference. > >> So we'd better make an explicit "promote" semantic to the "update" >> function. BTW, if vfio_iommu already has been promoted, then return >> early. > > Currently it's the caller that avoids using this function when the > iommu scope is already correct. In fact the changes induces a > redundant test in the pin_pages code path, we're changing a group from > non-pinned-page-scope to pinned-page-scope, therefore the iommu scope > cannot initially be scope limited. In the attach_group call path, > we're moving that test from the caller, so at best we've introduced an > additional function call. > > The function as it exists today is also more versatile whereas the > "promote" version here forces it to a single task with no appreciable > difference in complexity or code. This seems like a frivolous change. > Thanks, OK, I will adapt your idea that maintenance a counter of non-pinned groups. Then we keep the "update" semantic, and the target is the counter ;-). Thanks, Keqian > > Alex > >> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> >> --- >> drivers/vfio/vfio_iommu_type1.c | 30 ++++++++++++++---------------- >> 1 file changed, 14 insertions(+), 16 deletions(-) >> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index 0b4dedaa9128..334a8240e1da 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -148,7 +148,7 @@ static int put_pfn(unsigned long pfn, int prot); >> static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, >> struct iommu_group *iommu_group); >> >> -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); >> +static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu); >> /* >> * This code handles mapping and unmapping of user data buffers >> * into DMA'ble space using the IOMMU >> @@ -714,7 +714,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, >> group = vfio_iommu_find_iommu_group(iommu, iommu_group); >> if (!group->pinned_page_dirty_scope) { >> group->pinned_page_dirty_scope = true; >> - update_pinned_page_dirty_scope(iommu); >> + promote_pinned_page_dirty_scope(iommu); >> } >> >> goto pin_done; >> @@ -1622,27 +1622,26 @@ static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, >> return group; >> } >> >> -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) >> +static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu) >> { >> struct vfio_domain *domain; >> struct vfio_group *group; >> >> + if (iommu->pinned_page_dirty_scope) >> + return; >> + >> list_for_each_entry(domain, &iommu->domain_list, next) { >> list_for_each_entry(group, &domain->group_list, next) { >> - if (!group->pinned_page_dirty_scope) { >> - iommu->pinned_page_dirty_scope = false; >> + if (!group->pinned_page_dirty_scope) >> return; >> - } >> } >> } >> >> if (iommu->external_domain) { >> domain = iommu->external_domain; >> list_for_each_entry(group, &domain->group_list, next) { >> - if (!group->pinned_page_dirty_scope) { >> - iommu->pinned_page_dirty_scope = false; >> + if (!group->pinned_page_dirty_scope) >> return; >> - } >> } >> } >> >> @@ -2057,8 +2056,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, >> * addition of a dirty tracking group. >> */ >> group->pinned_page_dirty_scope = true; >> - if (!iommu->pinned_page_dirty_scope) >> - update_pinned_page_dirty_scope(iommu); >> + promote_pinned_page_dirty_scope(iommu); >> mutex_unlock(&iommu->lock); >> >> return 0; >> @@ -2341,7 +2339,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, >> struct vfio_iommu *iommu = iommu_data; >> struct vfio_domain *domain; >> struct vfio_group *group; >> - bool update_dirty_scope = false; >> + bool promote_dirty_scope = false; >> LIST_HEAD(iova_copy); >> >> mutex_lock(&iommu->lock); >> @@ -2349,7 +2347,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, >> if (iommu->external_domain) { >> group = find_iommu_group(iommu->external_domain, iommu_group); >> if (group) { >> - update_dirty_scope = !group->pinned_page_dirty_scope; >> + promote_dirty_scope = !group->pinned_page_dirty_scope; >> list_del(&group->next); >> kfree(group); >> >> @@ -2379,7 +2377,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, >> continue; >> >> vfio_iommu_detach_group(domain, group); >> - update_dirty_scope = !group->pinned_page_dirty_scope; >> + promote_dirty_scope = !group->pinned_page_dirty_scope; >> list_del(&group->next); >> kfree(group); >> /* >> @@ -2415,8 +2413,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, >> * Removal of a group without dirty tracking may allow the iommu scope >> * to be promoted. >> */ >> - if (update_dirty_scope) >> - update_pinned_page_dirty_scope(iommu); >> + if (promote_dirty_scope) >> + promote_pinned_page_dirty_scope(iommu); >> mutex_unlock(&iommu->lock); >> } >> > > . >
On 2021/1/16 7:30, Alex Williamson wrote: > On Thu, 7 Jan 2021 12:43:58 +0800 > Keqian Zhu <zhukeqian1@huawei.com> wrote: > >> For now there are 3 ways to promote the pinned_page_dirty_scope >> status of vfio_iommu: >> >> 1. Through vfio pin interface. >> 2. Detach a group without pinned_dirty_scope. >> 3. Attach a group with pinned_dirty_scope. >> >> For point 3, the only chance to promote the pinned_page_dirty_scope >> status is when vfio_iommu is newly created. As we can safely set >> empty vfio_iommu to be at pinned status, then the point 3 can be >> removed to reduce operations. >> >> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> >> --- >> drivers/vfio/vfio_iommu_type1.c | 4 +--- >> 1 file changed, 1 insertion(+), 3 deletions(-) >> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index 110ada24ee91..b596c482487b 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -2045,11 +2045,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, >> * Non-iommu backed group cannot dirty memory directly, >> * it can only use interfaces that provide dirty >> * tracking. >> - * The iommu scope can only be promoted with the >> - * addition of a dirty tracking group. >> */ >> group->pinned_page_dirty_scope = true; >> - promote_pinned_page_dirty_scope(iommu); >> mutex_unlock(&iommu->lock); >> >> return 0; >> @@ -2436,6 +2433,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) >> INIT_LIST_HEAD(&iommu->iova_list); >> iommu->dma_list = RB_ROOT; >> iommu->dma_avail = dma_entry_limit; >> + iommu->pinned_page_dirty_scope = true; >> mutex_init(&iommu->lock); >> BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); >> > > This would be resolved automatically if we used the counter approach I > mentioned on the previous patch, adding a pinned-page scope group simply > wouldn't increment the iommu counter, which would initially be zero > indicating no "all-dma" groups. Thanks, > Sure, I will do that, thanks. Keqian. > Alex > > . >
On 2021/1/16 7:44, Alex Williamson wrote: > On Thu, 7 Jan 2021 12:44:01 +0800 > Keqian Zhu <zhukeqian1@huawei.com> wrote: > >> We always use the smallest supported page size of vfio_iommu as >> pgsize. Drop parameter "pgsize" of update_user_bitmap. >> >> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> >> --- >> drivers/vfio/vfio_iommu_type1.c | 9 ++++----- >> 1 file changed, 4 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index 82649a040148..bceda5e8baaa 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -978,10 +978,9 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu) >> } >> >> static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, >> - struct vfio_dma *dma, dma_addr_t base_iova, >> - size_t pgsize) >> + struct vfio_dma *dma, dma_addr_t base_iova) >> { >> - unsigned long pgshift = __ffs(pgsize); >> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); >> unsigned long nbits = dma->size >> pgshift; >> unsigned long bit_offset = (dma->iova - base_iova) >> pgshift; >> unsigned long copy_offset = bit_offset / BITS_PER_LONG; >> @@ -1046,7 +1045,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, >> if (dma->iova > iova + size - 1) >> break; >> >> - ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize); >> + ret = update_user_bitmap(bitmap, iommu, dma, iova); >> if (ret) >> return ret; >> >> @@ -1192,7 +1191,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> >> if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { >> ret = update_user_bitmap(bitmap->data, iommu, dma, >> - unmap->iova, pgsize); >> + unmap->iova); >> if (ret) >> break; >> } > > Same as the previous, both call sites already have both pgsize and > pgshift, pass both rather than recalculate. Thanks, > My idea is that the "pgsize" parameter goes here and there, disturbs our sight when read code. To be frankly, the recalculate is negligible. Or we can add new fields in vfio_iommu, which are updated in vfio_update_pgsize_bitmap(). Thanks, Keqian > Alex > > . >