All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] iommu/iova: using separate rcache for SAC and DAC
@ 2022-09-16 15:46 brookxu.cn
  2022-09-16 17:03 ` Robin Murphy
  2022-09-17  3:44 ` kernel test robot
  0 siblings, 2 replies; 5+ messages in thread
From: brookxu.cn @ 2022-09-16 15:46 UTC (permalink / raw)
  To: robin.murphy, joro, will; +Cc: iommu, linux-kernel

From: Chunguang Xu <chunguang.xu@shopee.com>

While iommu_dma_forcedac disable, for PCI device kernel
try SAC first, if failed then try DAC. Since now rcache
does not distinguish SAC and DAC, if all PFNs contained
in cpu loaded cache is larger than SAC max PFN, but the
SAC address space is sufficient, as cpu loaded cached is
not empty, kernel will iova_alloc () to alloc IOVA. For
PCI device, kernel alloc SAC most, loaded cache may
invalid for SAC alloc for a long time, kernel will enter
alloc_iova() slow path frequencely, as result performance
is degrade. To circumvent this problem, SAC and DAC maybe
better to use separate caches.

Signed-off-by: Chunguang Xu <chunguang.xu@shopee.com>
---
 drivers/iommu/iova.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 47d1983dfa2a..d5775719a143 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -16,6 +16,7 @@
 #define IOVA_ANCHOR	~0UL
 
 #define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
+#define IOVA_RANGE_CACHE_ARRAY_SIZE (2 * IOVA_RANGE_CACHE_MAX_SIZE)
 
 static bool iova_rcache_insert(struct iova_domain *iovad,
 			       unsigned long pfn,
@@ -723,13 +724,13 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
 	unsigned int cpu;
 	int i, ret;
 
-	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
+	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_ARRAY_SIZE,
 				 sizeof(struct iova_rcache),
 				 GFP_KERNEL);
 	if (!iovad->rcaches)
 		return -ENOMEM;
 
-	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
 		struct iova_cpu_rcache *cpu_rcache;
 		struct iova_rcache *rcache;
 
@@ -825,11 +826,15 @@ static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
 			       unsigned long size)
 {
 	unsigned int log_size = order_base_2(size);
+	unsigned int index;
 
 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 		return false;
 
-	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
+	if (pfn > DMA_BIT_MASK(32))
+		index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
+
+	return __iova_rcache_insert(iovad, &iovad->rcaches[index], pfn);
 }
 
 /*
@@ -881,11 +886,20 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
 				     unsigned long limit_pfn)
 {
 	unsigned int log_size = order_base_2(size);
+	unsigned long iova_pfn;
+	unsigned int index;
 
 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
 		return 0;
 
-	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
+	iova_pfn = __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
+
+	if (!iova_pfn && limit_pfn > DMA_BIT_MASK(32)) {
+		index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
+		iova_pfn = __iova_rcache_get(&iovad->rcaches[index], limit_pfn - size);
+	}
+
+	return iova_pfn
 }
 
 /*
@@ -898,7 +912,7 @@ static void free_iova_rcaches(struct iova_domain *iovad)
 	unsigned int cpu;
 	int i, j;
 
-	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
 		rcache = &iovad->rcaches[i];
 		if (!rcache->cpu_rcaches)
 			break;
@@ -926,7 +940,7 @@ static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
 	unsigned long flags;
 	int i;
 
-	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
 		rcache = &iovad->rcaches[i];
 		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 		spin_lock_irqsave(&cpu_rcache->lock, flags);
@@ -945,7 +959,7 @@ static void free_global_cached_iovas(struct iova_domain *iovad)
 	unsigned long flags;
 	int i, j;
 
-	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
 		rcache = &iovad->rcaches[i];
 		spin_lock_irqsave(&rcache->lock, flags);
 		for (j = 0; j < rcache->depot_size; ++j) {
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] iommu/iova: using separate rcache for SAC and DAC
  2022-09-16 15:46 [PATCH] iommu/iova: using separate rcache for SAC and DAC brookxu.cn
@ 2022-09-16 17:03 ` Robin Murphy
  2022-09-19  3:02   ` brookxu.cn
  2022-09-17  3:44 ` kernel test robot
  1 sibling, 1 reply; 5+ messages in thread
From: Robin Murphy @ 2022-09-16 17:03 UTC (permalink / raw)
  To: brookxu.cn, joro, will; +Cc: iommu, linux-kernel

On 2022-09-16 16:46, brookxu.cn wrote:
> From: Chunguang Xu <chunguang.xu@shopee.com>
> 
> While iommu_dma_forcedac disable, for PCI device kernel
> try SAC first, if failed then try DAC. Since now rcache
> does not distinguish SAC and DAC, if all PFNs contained
> in cpu loaded cache is larger than SAC max PFN, but the
> SAC address space is sufficient, as cpu loaded cached is
> not empty, kernel will iova_alloc () to alloc IOVA. For
> PCI device, kernel alloc SAC most, loaded cache may
> invalid for SAC alloc for a long time, kernel will enter
> alloc_iova() slow path frequencely, as result performance
> is degrade. To circumvent this problem, SAC and DAC maybe
> better to use separate caches.

I really dislike the idea of doubling the already significantly large 
footprint of rcaches purely to optimise for the stupid case. If you've 
got to the point of hitting contention in the SAC path, you presumably 
don't have broken hardware/drivers so you're infinitely better off using 
forcedac and avoiding it entirely to begin with. And frankly even if you 
*do* have broken hardware, if you care about performance you're still 
better off fixing the relevant driver(s) to set correct DMA masks so you 
can use forcedac.

Since this mechanism now officially serves as a bandage for broken 
hardware/drivers and little else, I had an idea to make it a lot better, 
guess it's time to have a go at writing that patch up...

Thanks,
Robin.

> Signed-off-by: Chunguang Xu <chunguang.xu@shopee.com>
> ---
>   drivers/iommu/iova.c | 28 +++++++++++++++++++++-------
>   1 file changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index 47d1983dfa2a..d5775719a143 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -16,6 +16,7 @@
>   #define IOVA_ANCHOR	~0UL
>   
>   #define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
> +#define IOVA_RANGE_CACHE_ARRAY_SIZE (2 * IOVA_RANGE_CACHE_MAX_SIZE)
>   
>   static bool iova_rcache_insert(struct iova_domain *iovad,
>   			       unsigned long pfn,
> @@ -723,13 +724,13 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
>   	unsigned int cpu;
>   	int i, ret;
>   
> -	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
> +	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_ARRAY_SIZE,
>   				 sizeof(struct iova_rcache),
>   				 GFP_KERNEL);
>   	if (!iovad->rcaches)
>   		return -ENOMEM;
>   
> -	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> +	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>   		struct iova_cpu_rcache *cpu_rcache;
>   		struct iova_rcache *rcache;
>   
> @@ -825,11 +826,15 @@ static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
>   			       unsigned long size)
>   {
>   	unsigned int log_size = order_base_2(size);
> +	unsigned int index;
>   
>   	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
>   		return false;
>   
> -	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
> +	if (pfn > DMA_BIT_MASK(32))
> +		index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
> +
> +	return __iova_rcache_insert(iovad, &iovad->rcaches[index], pfn);
>   }
>   
>   /*
> @@ -881,11 +886,20 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
>   				     unsigned long limit_pfn)
>   {
>   	unsigned int log_size = order_base_2(size);
> +	unsigned long iova_pfn;
> +	unsigned int index;
>   
>   	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
>   		return 0;
>   
> -	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
> +	iova_pfn = __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
> +
> +	if (!iova_pfn && limit_pfn > DMA_BIT_MASK(32)) {
> +		index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
> +		iova_pfn = __iova_rcache_get(&iovad->rcaches[index], limit_pfn - size);
> +	}
> +
> +	return iova_pfn
>   }
>   
>   /*
> @@ -898,7 +912,7 @@ static void free_iova_rcaches(struct iova_domain *iovad)
>   	unsigned int cpu;
>   	int i, j;
>   
> -	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> +	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>   		rcache = &iovad->rcaches[i];
>   		if (!rcache->cpu_rcaches)
>   			break;
> @@ -926,7 +940,7 @@ static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
>   	unsigned long flags;
>   	int i;
>   
> -	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> +	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>   		rcache = &iovad->rcaches[i];
>   		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
>   		spin_lock_irqsave(&cpu_rcache->lock, flags);
> @@ -945,7 +959,7 @@ static void free_global_cached_iovas(struct iova_domain *iovad)
>   	unsigned long flags;
>   	int i, j;
>   
> -	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> +	for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>   		rcache = &iovad->rcaches[i];
>   		spin_lock_irqsave(&rcache->lock, flags);
>   		for (j = 0; j < rcache->depot_size; ++j) {

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] iommu/iova: using separate rcache for SAC and DAC
  2022-09-16 15:46 [PATCH] iommu/iova: using separate rcache for SAC and DAC brookxu.cn
  2022-09-16 17:03 ` Robin Murphy
@ 2022-09-17  3:44 ` kernel test robot
  1 sibling, 0 replies; 5+ messages in thread
From: kernel test robot @ 2022-09-17  3:44 UTC (permalink / raw)
  To: brookxu.cn, robin.murphy, joro, will; +Cc: kbuild-all, iommu, linux-kernel

Hi brookxu.cn",

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v6.0-rc5]
[also build test ERROR on linus/master next-20220916]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/brookxu-cn/iommu-iova-using-separate-rcache-for-SAC-and-DAC/20220916-234845
base:    80e78fcce86de0288793a0ef0f6acf37656ee4cf
config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220917/202209171115.TzR7T0b4-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-5) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/fa50a31c6b74157cac20cd44b3717c19b934ff76
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review brookxu-cn/iommu-iova-using-separate-rcache-for-SAC-and-DAC/20220916-234845
        git checkout fa50a31c6b74157cac20cd44b3717c19b934ff76
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/iommu/iova.c: In function 'iova_rcache_get':
>> drivers/iommu/iova.c:902:24: error: expected ';' before '}' token
     902 |         return iova_pfn
         |                        ^
         |                        ;
     903 | }
         | ~                       


vim +902 drivers/iommu/iova.c

   878	
   879	/*
   880	 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
   881	 * size is too big or the DMA limit we are given isn't satisfied by the
   882	 * top element in the magazine.
   883	 */
   884	static unsigned long iova_rcache_get(struct iova_domain *iovad,
   885					     unsigned long size,
   886					     unsigned long limit_pfn)
   887	{
   888		unsigned int log_size = order_base_2(size);
   889		unsigned long iova_pfn;
   890		unsigned int index;
   891	
   892		if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
   893			return 0;
   894	
   895		iova_pfn = __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
   896	
   897		if (!iova_pfn && limit_pfn > DMA_BIT_MASK(32)) {
   898			index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
   899			iova_pfn = __iova_rcache_get(&iovad->rcaches[index], limit_pfn - size);
   900		}
   901	
 > 902		return iova_pfn
   903	}
   904	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] iommu/iova: using separate rcache for SAC and DAC
  2022-09-16 17:03 ` Robin Murphy
@ 2022-09-19  3:02   ` brookxu.cn
  2022-09-23 12:27     ` Robin Murphy
  0 siblings, 1 reply; 5+ messages in thread
From: brookxu.cn @ 2022-09-19  3:02 UTC (permalink / raw)
  To: Robin Murphy, joro, will; +Cc: iommu, linux-kernel



在 2022/9/17 1:03, Robin Murphy 写道:
> On 2022-09-16 16:46, brookxu.cn wrote:
>> From: Chunguang Xu <chunguang.xu@shopee.com>
>>
>> While iommu_dma_forcedac disable, for PCI device kernel
>> try SAC first, if failed then try DAC. Since now rcache
>> does not distinguish SAC and DAC, if all PFNs contained
>> in cpu loaded cache is larger than SAC max PFN, but the
>> SAC address space is sufficient, as cpu loaded cached is
>> not empty, kernel will iova_alloc () to alloc IOVA. For
>> PCI device, kernel alloc SAC most, loaded cache may
>> invalid for SAC alloc for a long time, kernel will enter
>> alloc_iova() slow path frequencely, as result performance
>> is degrade. To circumvent this problem, SAC and DAC maybe
>> better to use separate caches.
> 
> I really dislike the idea of doubling the already significantly large 
> footprint of rcaches purely to optimise for the stupid case. If you've 
> got to the point of hitting contention in the SAC path, you presumably 
> don't have broken hardware/drivers so you're infinitely better off using 
> forcedac and avoiding it entirely to begin with. And frankly even if you 
> *do* have broken hardware, if you care about performance you're still 
> better off fixing the relevant driver(s) to set correct DMA masks so you 
> can use forcedac.

Some of our NICs have poor performance after heavy traffic impact. We 
found that it maybe caused by a large number of 1-2 order IOVA SAC 
address applications to make SAC address space fragmentation during 
taffic impact, which in turn causes some 3 order IOVA SAC address 
applications to fail. Kernel will try DAC address and release it into 
rcache. After the traffic recovery, and SAC address space recovery from 
fragmentation, but due to the existence of several DAC PFN on the cache 
of a certain CPU,  which will broken the usage of rcache for SAC address 
application. As a result, kernel will enter alloc_iova() slow path 
frequencely and performance is poor. So I think this issue should be not 
too much of a broken hardware or driver,  but they maybe can also be 
optimized.

> 
> Since this mechanism now officially serves as a bandage for broken 
> hardware/drivers and little else, I had an idea to make it a lot better, 
> guess it's time to have a go at writing that patch up...
> 
> Thanks,
> Robin.
> 
>> Signed-off-by: Chunguang Xu <chunguang.xu@shopee.com>
>> ---
>>   drivers/iommu/iova.c | 28 +++++++++++++++++++++-------
>>   1 file changed, 21 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>> index 47d1983dfa2a..d5775719a143 100644
>> --- a/drivers/iommu/iova.c
>> +++ b/drivers/iommu/iova.c
>> @@ -16,6 +16,7 @@
>>   #define IOVA_ANCHOR    ~0UL
>>   #define IOVA_RANGE_CACHE_MAX_SIZE 6    /* log of max cached IOVA 
>> range size (in pages) */
>> +#define IOVA_RANGE_CACHE_ARRAY_SIZE (2 * IOVA_RANGE_CACHE_MAX_SIZE)
>>   static bool iova_rcache_insert(struct iova_domain *iovad,
>>                      unsigned long pfn,
>> @@ -723,13 +724,13 @@ int iova_domain_init_rcaches(struct iova_domain 
>> *iovad)
>>       unsigned int cpu;
>>       int i, ret;
>> -    iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
>> +    iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_ARRAY_SIZE,
>>                    sizeof(struct iova_rcache),
>>                    GFP_KERNEL);
>>       if (!iovad->rcaches)
>>           return -ENOMEM;
>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>           struct iova_cpu_rcache *cpu_rcache;
>>           struct iova_rcache *rcache;
>> @@ -825,11 +826,15 @@ static bool iova_rcache_insert(struct 
>> iova_domain *iovad, unsigned long pfn,
>>                      unsigned long size)
>>   {
>>       unsigned int log_size = order_base_2(size);
>> +    unsigned int index;
>>       if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
>>           return false;
>> -    return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
>> +    if (pfn > DMA_BIT_MASK(32))
>> +        index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
>> +
>> +    return __iova_rcache_insert(iovad, &iovad->rcaches[index], pfn);
>>   }
>>   /*
>> @@ -881,11 +886,20 @@ static unsigned long iova_rcache_get(struct 
>> iova_domain *iovad,
>>                        unsigned long limit_pfn)
>>   {
>>       unsigned int log_size = order_base_2(size);
>> +    unsigned long iova_pfn;
>> +    unsigned int index;
>>       if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
>>           return 0;
>> -    return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - 
>> size);
>> +    iova_pfn = __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn 
>> - size);
>> +
>> +    if (!iova_pfn && limit_pfn > DMA_BIT_MASK(32)) {
>> +        index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
>> +        iova_pfn = __iova_rcache_get(&iovad->rcaches[index], 
>> limit_pfn - size);
>> +    }
>> +
>> +    return iova_pfn
>>   }
>>   /*
>> @@ -898,7 +912,7 @@ static void free_iova_rcaches(struct iova_domain 
>> *iovad)
>>       unsigned int cpu;
>>       int i, j;
>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>           rcache = &iovad->rcaches[i];
>>           if (!rcache->cpu_rcaches)
>>               break;
>> @@ -926,7 +940,7 @@ static void free_cpu_cached_iovas(unsigned int 
>> cpu, struct iova_domain *iovad)
>>       unsigned long flags;
>>       int i;
>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>           rcache = &iovad->rcaches[i];
>>           cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
>>           spin_lock_irqsave(&cpu_rcache->lock, flags);
>> @@ -945,7 +959,7 @@ static void free_global_cached_iovas(struct 
>> iova_domain *iovad)
>>       unsigned long flags;
>>       int i, j;
>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>           rcache = &iovad->rcaches[i];
>>           spin_lock_irqsave(&rcache->lock, flags);
>>           for (j = 0; j < rcache->depot_size; ++j) {

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] iommu/iova: using separate rcache for SAC and DAC
  2022-09-19  3:02   ` brookxu.cn
@ 2022-09-23 12:27     ` Robin Murphy
  0 siblings, 0 replies; 5+ messages in thread
From: Robin Murphy @ 2022-09-23 12:27 UTC (permalink / raw)
  To: brookxu.cn, joro, will; +Cc: iommu, linux-kernel

On 2022-09-19 04:02, brookxu.cn@gmail.com wrote:
> 
> 
> 在 2022/9/17 1:03, Robin Murphy 写道:
>> On 2022-09-16 16:46, brookxu.cn wrote:
>>> From: Chunguang Xu <chunguang.xu@shopee.com>
>>>
>>> While iommu_dma_forcedac disable, for PCI device kernel
>>> try SAC first, if failed then try DAC. Since now rcache
>>> does not distinguish SAC and DAC, if all PFNs contained
>>> in cpu loaded cache is larger than SAC max PFN, but the
>>> SAC address space is sufficient, as cpu loaded cached is
>>> not empty, kernel will iova_alloc () to alloc IOVA. For
>>> PCI device, kernel alloc SAC most, loaded cache may
>>> invalid for SAC alloc for a long time, kernel will enter
>>> alloc_iova() slow path frequencely, as result performance
>>> is degrade. To circumvent this problem, SAC and DAC maybe
>>> better to use separate caches.
>>
>> I really dislike the idea of doubling the already significantly large 
>> footprint of rcaches purely to optimise for the stupid case. If you've 
>> got to the point of hitting contention in the SAC path, you presumably 
>> don't have broken hardware/drivers so you're infinitely better off 
>> using forcedac and avoiding it entirely to begin with. And frankly 
>> even if you *do* have broken hardware, if you care about performance 
>> you're still better off fixing the relevant driver(s) to set correct 
>> DMA masks so you can use forcedac.
> 
> Some of our NICs have poor performance after heavy traffic impact. We 
> found that it maybe caused by a large number of 1-2 order IOVA SAC 
> address applications to make SAC address space fragmentation during 
> taffic impact, which in turn causes some 3 order IOVA SAC address 
> applications to fail. Kernel will try DAC address and release it into 
> rcache. After the traffic recovery, and SAC address space recovery from 
> fragmentation, but due to the existence of several DAC PFN on the cache 
> of a certain CPU,  which will broken the usage of rcache for SAC address 
> application. As a result, kernel will enter alloc_iova() slow path 
> frequencely and performance is poor. So I think this issue should be not 
> too much of a broken hardware or driver,  but they maybe can also be 
> optimized.

My point is that your system *isn't* broken, because the DAC addresses 
work fine, so there's really no need for it to spend any time messing 
about with SAC constraints at all. Please try my patch[1].

Thanks,
Robin.

[1] 
https://lore.kernel.org/linux-iommu/2b0ca6254dd0102bf559b2a73e9b51da089afbe3.1663764627.git.robin.murphy@arm.com/

> 
>>
>> Since this mechanism now officially serves as a bandage for broken 
>> hardware/drivers and little else, I had an idea to make it a lot 
>> better, guess it's time to have a go at writing that patch up...
>>
>> Thanks,
>> Robin.
>>
>>> Signed-off-by: Chunguang Xu <chunguang.xu@shopee.com>
>>> ---
>>>   drivers/iommu/iova.c | 28 +++++++++++++++++++++-------
>>>   1 file changed, 21 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>>> index 47d1983dfa2a..d5775719a143 100644
>>> --- a/drivers/iommu/iova.c
>>> +++ b/drivers/iommu/iova.c
>>> @@ -16,6 +16,7 @@
>>>   #define IOVA_ANCHOR    ~0UL
>>>   #define IOVA_RANGE_CACHE_MAX_SIZE 6    /* log of max cached IOVA 
>>> range size (in pages) */
>>> +#define IOVA_RANGE_CACHE_ARRAY_SIZE (2 * IOVA_RANGE_CACHE_MAX_SIZE)
>>>   static bool iova_rcache_insert(struct iova_domain *iovad,
>>>                      unsigned long pfn,
>>> @@ -723,13 +724,13 @@ int iova_domain_init_rcaches(struct iova_domain 
>>> *iovad)
>>>       unsigned int cpu;
>>>       int i, ret;
>>> -    iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
>>> +    iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_ARRAY_SIZE,
>>>                    sizeof(struct iova_rcache),
>>>                    GFP_KERNEL);
>>>       if (!iovad->rcaches)
>>>           return -ENOMEM;
>>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>>           struct iova_cpu_rcache *cpu_rcache;
>>>           struct iova_rcache *rcache;
>>> @@ -825,11 +826,15 @@ static bool iova_rcache_insert(struct 
>>> iova_domain *iovad, unsigned long pfn,
>>>                      unsigned long size)
>>>   {
>>>       unsigned int log_size = order_base_2(size);
>>> +    unsigned int index;
>>>       if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
>>>           return false;
>>> -    return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
>>> +    if (pfn > DMA_BIT_MASK(32))
>>> +        index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
>>> +
>>> +    return __iova_rcache_insert(iovad, &iovad->rcaches[index], pfn);
>>>   }
>>>   /*
>>> @@ -881,11 +886,20 @@ static unsigned long iova_rcache_get(struct 
>>> iova_domain *iovad,
>>>                        unsigned long limit_pfn)
>>>   {
>>>       unsigned int log_size = order_base_2(size);
>>> +    unsigned long iova_pfn;
>>> +    unsigned int index;
>>>       if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
>>>           return 0;
>>> -    return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - 
>>> size);
>>> +    iova_pfn = __iova_rcache_get(&iovad->rcaches[log_size], 
>>> limit_pfn - size);
>>> +
>>> +    if (!iova_pfn && limit_pfn > DMA_BIT_MASK(32)) {
>>> +        index = log_size + IOVA_RANGE_CACHE_MAX_SIZE;
>>> +        iova_pfn = __iova_rcache_get(&iovad->rcaches[index], 
>>> limit_pfn - size);
>>> +    }
>>> +
>>> +    return iova_pfn
>>>   }
>>>   /*
>>> @@ -898,7 +912,7 @@ static void free_iova_rcaches(struct iova_domain 
>>> *iovad)
>>>       unsigned int cpu;
>>>       int i, j;
>>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>>           rcache = &iovad->rcaches[i];
>>>           if (!rcache->cpu_rcaches)
>>>               break;
>>> @@ -926,7 +940,7 @@ static void free_cpu_cached_iovas(unsigned int 
>>> cpu, struct iova_domain *iovad)
>>>       unsigned long flags;
>>>       int i;
>>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>>           rcache = &iovad->rcaches[i];
>>>           cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
>>>           spin_lock_irqsave(&cpu_rcache->lock, flags);
>>> @@ -945,7 +959,7 @@ static void free_global_cached_iovas(struct 
>>> iova_domain *iovad)
>>>       unsigned long flags;
>>>       int i, j;
>>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>>> +    for (i = 0; i < IOVA_RANGE_CACHE_ARRAY_SIZE; ++i) {
>>>           rcache = &iovad->rcaches[i];
>>>           spin_lock_irqsave(&rcache->lock, flags);
>>>           for (j = 0; j < rcache->depot_size; ++j) {

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-09-23 12:30 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-16 15:46 [PATCH] iommu/iova: using separate rcache for SAC and DAC brookxu.cn
2022-09-16 17:03 ` Robin Murphy
2022-09-19  3:02   ` brookxu.cn
2022-09-23 12:27     ` Robin Murphy
2022-09-17  3:44 ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.