All of lore.kernel.org
 help / color / mirror / Atom feed
From: Robin Murphy <robin.murphy@arm.com>
To: John Garry <john.garry@huawei.com>,
	joro@8bytes.org, will@kernel.org, mst@redhat.com,
	jasowang@redhat.com
Cc: xieyongji@bytedance.com, iommu@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org, linuxarm@huawei.com
Subject: Re: [PATCH] iommu/iova: Separate out rcache init
Date: Wed, 26 Jan 2022 17:00:23 +0000	[thread overview]
Message-ID: <ee4593b8-cdf6-935a-0eaf-48a8bfeae912@arm.com> (raw)
In-Reply-To: <1643205319-51669-1-git-send-email-john.garry@huawei.com>

On 2022-01-26 13:55, John Garry wrote:
> Currently the rcache structures are allocated for all IOVA domains, even if
> they do not use "fast" alloc+free interface. This is wasteful of memory.
> 
> In addition, fails in init_iova_rcaches() are not handled safely, which is
> less than ideal.
> 
> Make "fast" users call a separate rcache init explicitly, which includes
> error checking.
> 
> Signed-off-by: John Garry <john.garry@huawei.com>

Mangled patch? (no "---" separator here)

Overall this looks great, just a few comments further down...

> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 3a46f2cc9e5d..dd066d990809 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -525,6 +525,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   	struct iommu_dma_cookie *cookie = domain->iova_cookie;
>   	unsigned long order, base_pfn;
>   	struct iova_domain *iovad;
> +	int ret;
>   
>   	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
>   		return -EINVAL;
> @@ -559,6 +560,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   	}
>   
>   	init_iova_domain(iovad, 1UL << order, base_pfn);
> +	ret = iova_domain_init_rcaches(iovad);
> +	if (ret)
> +		return ret;
>   
>   	/* If the FQ fails we can simply fall back to strict mode */
>   	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index b28c9435b898..d3adc6ea5710 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -15,13 +15,14 @@
>   /* The anchor node sits above the top of the usable address space */
>   #define IOVA_ANCHOR	~0UL
>   
> +#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
> +
>   static bool iova_rcache_insert(struct iova_domain *iovad,
>   			       unsigned long pfn,
>   			       unsigned long size);
>   static unsigned long iova_rcache_get(struct iova_domain *iovad,
>   				     unsigned long size,
>   				     unsigned long limit_pfn);
> -static void init_iova_rcaches(struct iova_domain *iovad);
>   static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
>   static void free_iova_rcaches(struct iova_domain *iovad);
>   
> @@ -64,8 +65,6 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
>   	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
>   	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
>   	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
> -	cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead);
> -	init_iova_rcaches(iovad);
>   }
>   EXPORT_SYMBOL_GPL(init_iova_domain);
>   
> @@ -497,9 +496,9 @@ void put_iova_domain(struct iova_domain *iovad)
>   {
>   	struct iova *iova, *tmp;
>   
> -	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> -					    &iovad->cpuhp_dead);
> -	free_iova_rcaches(iovad);
> +	if (iovad->rcaches)
> +		iova_domain_free_rcaches(iovad);
> +
>   	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
>   		free_iova_mem(iova);
>   }
> @@ -608,6 +607,7 @@ EXPORT_SYMBOL_GPL(reserve_iova);
>    */
>   
>   #define IOVA_MAG_SIZE 128
> +#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
>   
>   struct iova_magazine {
>   	unsigned long size;
> @@ -620,6 +620,13 @@ struct iova_cpu_rcache {
>   	struct iova_magazine *prev;
>   };
>   
> +struct iova_rcache {
> +	spinlock_t lock;
> +	unsigned long depot_size;
> +	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
> +	struct iova_cpu_rcache __percpu *cpu_rcaches;
> +};
> +
>   static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
>   {
>   	return kzalloc(sizeof(struct iova_magazine), flags);
> @@ -693,28 +700,62 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
>   	mag->pfns[mag->size++] = pfn;
>   }
>   
> -static void init_iova_rcaches(struct iova_domain *iovad)
> +int iova_domain_init_rcaches(struct iova_domain *iovad)
>   {
> -	struct iova_cpu_rcache *cpu_rcache;
> -	struct iova_rcache *rcache;
>   	unsigned int cpu;
> -	int i;
> +	int i, ret;
> +
> +	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
> +				 sizeof(struct iova_rcache),
> +				 GFP_KERNEL);
> +	if (!iovad->rcaches)
> +		return -ENOMEM;
>   
>   	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> +		struct iova_cpu_rcache *cpu_rcache;
> +		struct iova_rcache *rcache;
> +
>   		rcache = &iovad->rcaches[i];
>   		spin_lock_init(&rcache->lock);
>   		rcache->depot_size = 0;
> -		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
> -		if (WARN_ON(!rcache->cpu_rcaches))
> -			continue;
> +		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
> +						     cache_line_size());
> +		if (!rcache->cpu_rcaches) {
> +			ret = -ENOMEM;
> +			goto out_err;
> +		}
>   		for_each_possible_cpu(cpu) {
>   			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
> +
>   			spin_lock_init(&cpu_rcache->lock);
>   			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
>   			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
> +			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
> +				ret = -ENOMEM;
> +				goto out_err;
> +			}
>   		}
>   	}
> +
> +	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> +					       &iovad->cpuhp_dead);
> +	if (ret)
> +		goto out_err;
> +	return 0;
> +
> +out_err:
> +	free_iova_rcaches(iovad);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
> +
> +void iova_domain_free_rcaches(struct iova_domain *iovad)
> +{
> +	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> +					    &iovad->cpuhp_dead);
> +	free_iova_rcaches(iovad);
>   }
> +EXPORT_SYMBOL_GPL(iova_domain_free_rcaches);

I think we should continue to expect external callers to clean up with 
put_iova_domain(). If they aren't doing that already they have a bug 
(albeit minor), and we don't want to give the impression that it's OK to 
free the caches at any point *other* than tearing down the whole 
iova_domain, since the implementation really wouldn't expect that.

>   /*
>    * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
> @@ -831,7 +872,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
>   {
>   	unsigned int log_size = order_base_2(size);
>   
> -	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
> +	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
>   		return 0;
>   
>   	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
> @@ -849,6 +890,8 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   
>   	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>   		rcache = &iovad->rcaches[i];
> +		if (!rcache->cpu_rcaches)
> +			break;
>   		for_each_possible_cpu(cpu) {
>   			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
>   			iova_magazine_free(cpu_rcache->loaded);
> @@ -858,6 +901,9 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   		for (j = 0; j < rcache->depot_size; ++j)
>   			iova_magazine_free(rcache->depot[j]);
>   	}
> +
> +	kfree(iovad->rcaches);
> +	iovad->rcaches = NULL;
>   }
>   
>   /*
> diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
> index 2b1143f11d8f..87ec9f8015f1 100644
> --- a/drivers/vdpa/vdpa_user/iova_domain.c
> +++ b/drivers/vdpa/vdpa_user/iova_domain.c
> @@ -480,6 +480,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
>   	struct file *file;
>   	struct vduse_bounce_map *map;
>   	unsigned long pfn, bounce_pfns;
> +	int ret;
>   
>   	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
>   	if (iova_limit <= bounce_size)
> @@ -513,10 +514,20 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
>   	spin_lock_init(&domain->iotlb_lock);
>   	init_iova_domain(&domain->stream_iovad,
>   			PAGE_SIZE, IOVA_START_PFN);
> +	ret = iova_domain_init_rcaches(&domain->stream_iovad);
> +	if (ret)
> +		goto err_iovad_stream;
>   	init_iova_domain(&domain->consistent_iovad,
>   			PAGE_SIZE, bounce_pfns);
> +	ret = iova_domain_init_rcaches(&domain->consistent_iovad);
> +	if (ret)
> +		goto err_iovad_consistent;
>   
>   	return domain;
> +err_iovad_consistent:
> +	iova_domain_free_rcaches(&domain->stream_iovad);
> +err_iovad_stream:
> +	fput(file);
>   err_file:
>   	vfree(domain->bounce_maps);
>   err_map:
> diff --git a/include/linux/iova.h b/include/linux/iova.h
> index cea79cb9f26c..f91679680ee4 100644
> --- a/include/linux/iova.h
> +++ b/include/linux/iova.h
> @@ -21,18 +21,8 @@ struct iova {
>   	unsigned long	pfn_lo; /* Lowest allocated pfn */
>   };
>   
> -struct iova_magazine;
> -struct iova_cpu_rcache;
>   
> -#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
> -#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
> -
> -struct iova_rcache {
> -	spinlock_t lock;
> -	unsigned long depot_size;
> -	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
> -	struct iova_cpu_rcache __percpu *cpu_rcaches;
> -};
> +struct iova_rcache;
>   
>   /* holds all the iova translations for a domain */
>   struct iova_domain {
> @@ -46,7 +36,7 @@ struct iova_domain {
>   	unsigned long	max32_alloc_size; /* Size of last failed allocation */
>   	struct iova	anchor;		/* rbtree lookup anchor */
>   
> -	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
> +	struct iova_rcache	*rcaches;
>   	struct hlist_node	cpuhp_dead;
>   };
>   
> @@ -102,6 +92,8 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
>   	unsigned long pfn_hi);
>   void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
>   	unsigned long start_pfn);
> +int iova_domain_init_rcaches(struct iova_domain *iovad);
> +void iova_domain_free_rcaches(struct iova_domain *iovad);

As above, I vote for just forward-declaring the free routine in iova.c 
and keeping it entirely private.

>   struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
>   void put_iova_domain(struct iova_domain *iovad);
>   #else
> @@ -157,6 +149,15 @@ static inline void init_iova_domain(struct iova_domain *iovad,
>   {
>   }
>   
> +static inline int iova_domain_init_rcaches(struct iova_domain *iovad)
> +{
> +	return -ENOTSUPP;
> +}
> +
> +static inline void iova_domain_free_rcaches(struct iova_domain *iovad)
> +{
> +}
> +

I'd be inclined not to add stubs at all - I think it's a reasonable 
assumption that anyone involved enough to care about rcaches has a hard 
dependency on IOMMU_IOVA already. It's certainly the case today, and I'd 
hardly want to encourage more users anyway.

Cheers,
Robin.

>   static inline struct iova *find_iova(struct iova_domain *iovad,
>   				     unsigned long pfn)
>   {

WARNING: multiple messages have this Message-ID (diff)
From: Robin Murphy <robin.murphy@arm.com>
To: John Garry <john.garry@huawei.com>,
	joro@8bytes.org, will@kernel.org, mst@redhat.com,
	jasowang@redhat.com
Cc: xieyongji@bytedance.com, iommu@lists.linux-foundation.org,
	linuxarm@huawei.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org
Subject: Re: [PATCH] iommu/iova: Separate out rcache init
Date: Wed, 26 Jan 2022 17:00:23 +0000	[thread overview]
Message-ID: <ee4593b8-cdf6-935a-0eaf-48a8bfeae912@arm.com> (raw)
In-Reply-To: <1643205319-51669-1-git-send-email-john.garry@huawei.com>

On 2022-01-26 13:55, John Garry wrote:
> Currently the rcache structures are allocated for all IOVA domains, even if
> they do not use "fast" alloc+free interface. This is wasteful of memory.
> 
> In addition, fails in init_iova_rcaches() are not handled safely, which is
> less than ideal.
> 
> Make "fast" users call a separate rcache init explicitly, which includes
> error checking.
> 
> Signed-off-by: John Garry <john.garry@huawei.com>

Mangled patch? (no "---" separator here)

Overall this looks great, just a few comments further down...

> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 3a46f2cc9e5d..dd066d990809 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -525,6 +525,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   	struct iommu_dma_cookie *cookie = domain->iova_cookie;
>   	unsigned long order, base_pfn;
>   	struct iova_domain *iovad;
> +	int ret;
>   
>   	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
>   		return -EINVAL;
> @@ -559,6 +560,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   	}
>   
>   	init_iova_domain(iovad, 1UL << order, base_pfn);
> +	ret = iova_domain_init_rcaches(iovad);
> +	if (ret)
> +		return ret;
>   
>   	/* If the FQ fails we can simply fall back to strict mode */
>   	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index b28c9435b898..d3adc6ea5710 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -15,13 +15,14 @@
>   /* The anchor node sits above the top of the usable address space */
>   #define IOVA_ANCHOR	~0UL
>   
> +#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
> +
>   static bool iova_rcache_insert(struct iova_domain *iovad,
>   			       unsigned long pfn,
>   			       unsigned long size);
>   static unsigned long iova_rcache_get(struct iova_domain *iovad,
>   				     unsigned long size,
>   				     unsigned long limit_pfn);
> -static void init_iova_rcaches(struct iova_domain *iovad);
>   static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
>   static void free_iova_rcaches(struct iova_domain *iovad);
>   
> @@ -64,8 +65,6 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
>   	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
>   	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
>   	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
> -	cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead);
> -	init_iova_rcaches(iovad);
>   }
>   EXPORT_SYMBOL_GPL(init_iova_domain);
>   
> @@ -497,9 +496,9 @@ void put_iova_domain(struct iova_domain *iovad)
>   {
>   	struct iova *iova, *tmp;
>   
> -	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> -					    &iovad->cpuhp_dead);
> -	free_iova_rcaches(iovad);
> +	if (iovad->rcaches)
> +		iova_domain_free_rcaches(iovad);
> +
>   	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
>   		free_iova_mem(iova);
>   }
> @@ -608,6 +607,7 @@ EXPORT_SYMBOL_GPL(reserve_iova);
>    */
>   
>   #define IOVA_MAG_SIZE 128
> +#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
>   
>   struct iova_magazine {
>   	unsigned long size;
> @@ -620,6 +620,13 @@ struct iova_cpu_rcache {
>   	struct iova_magazine *prev;
>   };
>   
> +struct iova_rcache {
> +	spinlock_t lock;
> +	unsigned long depot_size;
> +	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
> +	struct iova_cpu_rcache __percpu *cpu_rcaches;
> +};
> +
>   static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
>   {
>   	return kzalloc(sizeof(struct iova_magazine), flags);
> @@ -693,28 +700,62 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
>   	mag->pfns[mag->size++] = pfn;
>   }
>   
> -static void init_iova_rcaches(struct iova_domain *iovad)
> +int iova_domain_init_rcaches(struct iova_domain *iovad)
>   {
> -	struct iova_cpu_rcache *cpu_rcache;
> -	struct iova_rcache *rcache;
>   	unsigned int cpu;
> -	int i;
> +	int i, ret;
> +
> +	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
> +				 sizeof(struct iova_rcache),
> +				 GFP_KERNEL);
> +	if (!iovad->rcaches)
> +		return -ENOMEM;
>   
>   	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> +		struct iova_cpu_rcache *cpu_rcache;
> +		struct iova_rcache *rcache;
> +
>   		rcache = &iovad->rcaches[i];
>   		spin_lock_init(&rcache->lock);
>   		rcache->depot_size = 0;
> -		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
> -		if (WARN_ON(!rcache->cpu_rcaches))
> -			continue;
> +		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
> +						     cache_line_size());
> +		if (!rcache->cpu_rcaches) {
> +			ret = -ENOMEM;
> +			goto out_err;
> +		}
>   		for_each_possible_cpu(cpu) {
>   			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
> +
>   			spin_lock_init(&cpu_rcache->lock);
>   			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
>   			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
> +			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
> +				ret = -ENOMEM;
> +				goto out_err;
> +			}
>   		}
>   	}
> +
> +	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> +					       &iovad->cpuhp_dead);
> +	if (ret)
> +		goto out_err;
> +	return 0;
> +
> +out_err:
> +	free_iova_rcaches(iovad);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
> +
> +void iova_domain_free_rcaches(struct iova_domain *iovad)
> +{
> +	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> +					    &iovad->cpuhp_dead);
> +	free_iova_rcaches(iovad);
>   }
> +EXPORT_SYMBOL_GPL(iova_domain_free_rcaches);

I think we should continue to expect external callers to clean up with 
put_iova_domain(). If they aren't doing that already they have a bug 
(albeit minor), and we don't want to give the impression that it's OK to 
free the caches at any point *other* than tearing down the whole 
iova_domain, since the implementation really wouldn't expect that.

>   /*
>    * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
> @@ -831,7 +872,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
>   {
>   	unsigned int log_size = order_base_2(size);
>   
> -	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
> +	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
>   		return 0;
>   
>   	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
> @@ -849,6 +890,8 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   
>   	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>   		rcache = &iovad->rcaches[i];
> +		if (!rcache->cpu_rcaches)
> +			break;
>   		for_each_possible_cpu(cpu) {
>   			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
>   			iova_magazine_free(cpu_rcache->loaded);
> @@ -858,6 +901,9 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   		for (j = 0; j < rcache->depot_size; ++j)
>   			iova_magazine_free(rcache->depot[j]);
>   	}
> +
> +	kfree(iovad->rcaches);
> +	iovad->rcaches = NULL;
>   }
>   
>   /*
> diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
> index 2b1143f11d8f..87ec9f8015f1 100644
> --- a/drivers/vdpa/vdpa_user/iova_domain.c
> +++ b/drivers/vdpa/vdpa_user/iova_domain.c
> @@ -480,6 +480,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
>   	struct file *file;
>   	struct vduse_bounce_map *map;
>   	unsigned long pfn, bounce_pfns;
> +	int ret;
>   
>   	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
>   	if (iova_limit <= bounce_size)
> @@ -513,10 +514,20 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
>   	spin_lock_init(&domain->iotlb_lock);
>   	init_iova_domain(&domain->stream_iovad,
>   			PAGE_SIZE, IOVA_START_PFN);
> +	ret = iova_domain_init_rcaches(&domain->stream_iovad);
> +	if (ret)
> +		goto err_iovad_stream;
>   	init_iova_domain(&domain->consistent_iovad,
>   			PAGE_SIZE, bounce_pfns);
> +	ret = iova_domain_init_rcaches(&domain->consistent_iovad);
> +	if (ret)
> +		goto err_iovad_consistent;
>   
>   	return domain;
> +err_iovad_consistent:
> +	iova_domain_free_rcaches(&domain->stream_iovad);
> +err_iovad_stream:
> +	fput(file);
>   err_file:
>   	vfree(domain->bounce_maps);
>   err_map:
> diff --git a/include/linux/iova.h b/include/linux/iova.h
> index cea79cb9f26c..f91679680ee4 100644
> --- a/include/linux/iova.h
> +++ b/include/linux/iova.h
> @@ -21,18 +21,8 @@ struct iova {
>   	unsigned long	pfn_lo; /* Lowest allocated pfn */
>   };
>   
> -struct iova_magazine;
> -struct iova_cpu_rcache;
>   
> -#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
> -#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
> -
> -struct iova_rcache {
> -	spinlock_t lock;
> -	unsigned long depot_size;
> -	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
> -	struct iova_cpu_rcache __percpu *cpu_rcaches;
> -};
> +struct iova_rcache;
>   
>   /* holds all the iova translations for a domain */
>   struct iova_domain {
> @@ -46,7 +36,7 @@ struct iova_domain {
>   	unsigned long	max32_alloc_size; /* Size of last failed allocation */
>   	struct iova	anchor;		/* rbtree lookup anchor */
>   
> -	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
> +	struct iova_rcache	*rcaches;
>   	struct hlist_node	cpuhp_dead;
>   };
>   
> @@ -102,6 +92,8 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
>   	unsigned long pfn_hi);
>   void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
>   	unsigned long start_pfn);
> +int iova_domain_init_rcaches(struct iova_domain *iovad);
> +void iova_domain_free_rcaches(struct iova_domain *iovad);

As above, I vote for just forward-declaring the free routine in iova.c 
and keeping it entirely private.

>   struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
>   void put_iova_domain(struct iova_domain *iovad);
>   #else
> @@ -157,6 +149,15 @@ static inline void init_iova_domain(struct iova_domain *iovad,
>   {
>   }
>   
> +static inline int iova_domain_init_rcaches(struct iova_domain *iovad)
> +{
> +	return -ENOTSUPP;
> +}
> +
> +static inline void iova_domain_free_rcaches(struct iova_domain *iovad)
> +{
> +}
> +

I'd be inclined not to add stubs at all - I think it's a reasonable 
assumption that anyone involved enough to care about rcaches has a hard 
dependency on IOMMU_IOVA already. It's certainly the case today, and I'd 
hardly want to encourage more users anyway.

Cheers,
Robin.

>   static inline struct iova *find_iova(struct iova_domain *iovad,
>   				     unsigned long pfn)
>   {
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

WARNING: multiple messages have this Message-ID (diff)
From: Robin Murphy <robin.murphy@arm.com>
To: John Garry <john.garry@huawei.com>,
	joro@8bytes.org, will@kernel.org, mst@redhat.com,
	jasowang@redhat.com
Cc: xieyongji@bytedance.com, iommu@lists.linux-foundation.org,
	linuxarm@huawei.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org
Subject: Re: [PATCH] iommu/iova: Separate out rcache init
Date: Wed, 26 Jan 2022 17:00:23 +0000	[thread overview]
Message-ID: <ee4593b8-cdf6-935a-0eaf-48a8bfeae912@arm.com> (raw)
In-Reply-To: <1643205319-51669-1-git-send-email-john.garry@huawei.com>

On 2022-01-26 13:55, John Garry wrote:
> Currently the rcache structures are allocated for all IOVA domains, even if
> they do not use "fast" alloc+free interface. This is wasteful of memory.
> 
> In addition, fails in init_iova_rcaches() are not handled safely, which is
> less than ideal.
> 
> Make "fast" users call a separate rcache init explicitly, which includes
> error checking.
> 
> Signed-off-by: John Garry <john.garry@huawei.com>

Mangled patch? (no "---" separator here)

Overall this looks great, just a few comments further down...

> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 3a46f2cc9e5d..dd066d990809 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -525,6 +525,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   	struct iommu_dma_cookie *cookie = domain->iova_cookie;
>   	unsigned long order, base_pfn;
>   	struct iova_domain *iovad;
> +	int ret;
>   
>   	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
>   		return -EINVAL;
> @@ -559,6 +560,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   	}
>   
>   	init_iova_domain(iovad, 1UL << order, base_pfn);
> +	ret = iova_domain_init_rcaches(iovad);
> +	if (ret)
> +		return ret;
>   
>   	/* If the FQ fails we can simply fall back to strict mode */
>   	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index b28c9435b898..d3adc6ea5710 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -15,13 +15,14 @@
>   /* The anchor node sits above the top of the usable address space */
>   #define IOVA_ANCHOR	~0UL
>   
> +#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
> +
>   static bool iova_rcache_insert(struct iova_domain *iovad,
>   			       unsigned long pfn,
>   			       unsigned long size);
>   static unsigned long iova_rcache_get(struct iova_domain *iovad,
>   				     unsigned long size,
>   				     unsigned long limit_pfn);
> -static void init_iova_rcaches(struct iova_domain *iovad);
>   static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
>   static void free_iova_rcaches(struct iova_domain *iovad);
>   
> @@ -64,8 +65,6 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
>   	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
>   	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
>   	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
> -	cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead);
> -	init_iova_rcaches(iovad);
>   }
>   EXPORT_SYMBOL_GPL(init_iova_domain);
>   
> @@ -497,9 +496,9 @@ void put_iova_domain(struct iova_domain *iovad)
>   {
>   	struct iova *iova, *tmp;
>   
> -	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> -					    &iovad->cpuhp_dead);
> -	free_iova_rcaches(iovad);
> +	if (iovad->rcaches)
> +		iova_domain_free_rcaches(iovad);
> +
>   	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
>   		free_iova_mem(iova);
>   }
> @@ -608,6 +607,7 @@ EXPORT_SYMBOL_GPL(reserve_iova);
>    */
>   
>   #define IOVA_MAG_SIZE 128
> +#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
>   
>   struct iova_magazine {
>   	unsigned long size;
> @@ -620,6 +620,13 @@ struct iova_cpu_rcache {
>   	struct iova_magazine *prev;
>   };
>   
> +struct iova_rcache {
> +	spinlock_t lock;
> +	unsigned long depot_size;
> +	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
> +	struct iova_cpu_rcache __percpu *cpu_rcaches;
> +};
> +
>   static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
>   {
>   	return kzalloc(sizeof(struct iova_magazine), flags);
> @@ -693,28 +700,62 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
>   	mag->pfns[mag->size++] = pfn;
>   }
>   
> -static void init_iova_rcaches(struct iova_domain *iovad)
> +int iova_domain_init_rcaches(struct iova_domain *iovad)
>   {
> -	struct iova_cpu_rcache *cpu_rcache;
> -	struct iova_rcache *rcache;
>   	unsigned int cpu;
> -	int i;
> +	int i, ret;
> +
> +	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
> +				 sizeof(struct iova_rcache),
> +				 GFP_KERNEL);
> +	if (!iovad->rcaches)
> +		return -ENOMEM;
>   
>   	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> +		struct iova_cpu_rcache *cpu_rcache;
> +		struct iova_rcache *rcache;
> +
>   		rcache = &iovad->rcaches[i];
>   		spin_lock_init(&rcache->lock);
>   		rcache->depot_size = 0;
> -		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
> -		if (WARN_ON(!rcache->cpu_rcaches))
> -			continue;
> +		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
> +						     cache_line_size());
> +		if (!rcache->cpu_rcaches) {
> +			ret = -ENOMEM;
> +			goto out_err;
> +		}
>   		for_each_possible_cpu(cpu) {
>   			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
> +
>   			spin_lock_init(&cpu_rcache->lock);
>   			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
>   			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
> +			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
> +				ret = -ENOMEM;
> +				goto out_err;
> +			}
>   		}
>   	}
> +
> +	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> +					       &iovad->cpuhp_dead);
> +	if (ret)
> +		goto out_err;
> +	return 0;
> +
> +out_err:
> +	free_iova_rcaches(iovad);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
> +
> +void iova_domain_free_rcaches(struct iova_domain *iovad)
> +{
> +	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
> +					    &iovad->cpuhp_dead);
> +	free_iova_rcaches(iovad);
>   }
> +EXPORT_SYMBOL_GPL(iova_domain_free_rcaches);

I think we should continue to expect external callers to clean up with 
put_iova_domain(). If they aren't doing that already they have a bug 
(albeit minor), and we don't want to give the impression that it's OK to 
free the caches at any point *other* than tearing down the whole 
iova_domain, since the implementation really wouldn't expect that.

>   /*
>    * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
> @@ -831,7 +872,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
>   {
>   	unsigned int log_size = order_base_2(size);
>   
> -	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
> +	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
>   		return 0;
>   
>   	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
> @@ -849,6 +890,8 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   
>   	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>   		rcache = &iovad->rcaches[i];
> +		if (!rcache->cpu_rcaches)
> +			break;
>   		for_each_possible_cpu(cpu) {
>   			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
>   			iova_magazine_free(cpu_rcache->loaded);
> @@ -858,6 +901,9 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   		for (j = 0; j < rcache->depot_size; ++j)
>   			iova_magazine_free(rcache->depot[j]);
>   	}
> +
> +	kfree(iovad->rcaches);
> +	iovad->rcaches = NULL;
>   }
>   
>   /*
> diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
> index 2b1143f11d8f..87ec9f8015f1 100644
> --- a/drivers/vdpa/vdpa_user/iova_domain.c
> +++ b/drivers/vdpa/vdpa_user/iova_domain.c
> @@ -480,6 +480,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
>   	struct file *file;
>   	struct vduse_bounce_map *map;
>   	unsigned long pfn, bounce_pfns;
> +	int ret;
>   
>   	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
>   	if (iova_limit <= bounce_size)
> @@ -513,10 +514,20 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
>   	spin_lock_init(&domain->iotlb_lock);
>   	init_iova_domain(&domain->stream_iovad,
>   			PAGE_SIZE, IOVA_START_PFN);
> +	ret = iova_domain_init_rcaches(&domain->stream_iovad);
> +	if (ret)
> +		goto err_iovad_stream;
>   	init_iova_domain(&domain->consistent_iovad,
>   			PAGE_SIZE, bounce_pfns);
> +	ret = iova_domain_init_rcaches(&domain->consistent_iovad);
> +	if (ret)
> +		goto err_iovad_consistent;
>   
>   	return domain;
> +err_iovad_consistent:
> +	iova_domain_free_rcaches(&domain->stream_iovad);
> +err_iovad_stream:
> +	fput(file);
>   err_file:
>   	vfree(domain->bounce_maps);
>   err_map:
> diff --git a/include/linux/iova.h b/include/linux/iova.h
> index cea79cb9f26c..f91679680ee4 100644
> --- a/include/linux/iova.h
> +++ b/include/linux/iova.h
> @@ -21,18 +21,8 @@ struct iova {
>   	unsigned long	pfn_lo; /* Lowest allocated pfn */
>   };
>   
> -struct iova_magazine;
> -struct iova_cpu_rcache;
>   
> -#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
> -#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
> -
> -struct iova_rcache {
> -	spinlock_t lock;
> -	unsigned long depot_size;
> -	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
> -	struct iova_cpu_rcache __percpu *cpu_rcaches;
> -};
> +struct iova_rcache;
>   
>   /* holds all the iova translations for a domain */
>   struct iova_domain {
> @@ -46,7 +36,7 @@ struct iova_domain {
>   	unsigned long	max32_alloc_size; /* Size of last failed allocation */
>   	struct iova	anchor;		/* rbtree lookup anchor */
>   
> -	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
> +	struct iova_rcache	*rcaches;
>   	struct hlist_node	cpuhp_dead;
>   };
>   
> @@ -102,6 +92,8 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
>   	unsigned long pfn_hi);
>   void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
>   	unsigned long start_pfn);
> +int iova_domain_init_rcaches(struct iova_domain *iovad);
> +void iova_domain_free_rcaches(struct iova_domain *iovad);

As above, I vote for just forward-declaring the free routine in iova.c 
and keeping it entirely private.

>   struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
>   void put_iova_domain(struct iova_domain *iovad);
>   #else
> @@ -157,6 +149,15 @@ static inline void init_iova_domain(struct iova_domain *iovad,
>   {
>   }
>   
> +static inline int iova_domain_init_rcaches(struct iova_domain *iovad)
> +{
> +	return -ENOTSUPP;
> +}
> +
> +static inline void iova_domain_free_rcaches(struct iova_domain *iovad)
> +{
> +}
> +

I'd be inclined not to add stubs at all - I think it's a reasonable 
assumption that anyone involved enough to care about rcaches has a hard 
dependency on IOMMU_IOVA already. It's certainly the case today, and I'd 
hardly want to encourage more users anyway.

Cheers,
Robin.

>   static inline struct iova *find_iova(struct iova_domain *iovad,
>   				     unsigned long pfn)
>   {
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

  reply	other threads:[~2022-01-26 17:00 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-26 13:55 [PATCH] iommu/iova: Separate out rcache init John Garry
2022-01-26 13:55 ` John Garry via iommu
2022-01-26 17:00 ` Robin Murphy [this message]
2022-01-26 17:00   ` Robin Murphy
2022-01-26 17:00   ` Robin Murphy
2022-01-26 17:58   ` John Garry
2022-01-26 17:58     ` John Garry via iommu
2022-01-28 11:32   ` John Garry
2022-01-28 11:32     ` John Garry via iommu
2022-01-28 16:54     ` Robin Murphy
2022-01-28 16:54       ` Robin Murphy
2022-01-28 16:54       ` Robin Murphy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ee4593b8-cdf6-935a-0eaf-48a8bfeae912@arm.com \
    --to=robin.murphy@arm.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jasowang@redhat.com \
    --cc=john.garry@huawei.com \
    --cc=joro@8bytes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    --cc=mst@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=will@kernel.org \
    --cc=xieyongji@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.