All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/ttm: optimize the pool shrinker a bit
@ 2021-04-01 13:54 Christian König
  2021-04-02  4:18   ` kernel test robot
  2021-04-08 11:08 ` Daniel Vetter
  0 siblings, 2 replies; 8+ messages in thread
From: Christian König @ 2021-04-01 13:54 UTC (permalink / raw)
  To: dri-devel; +Cc: daniel.vetter

Switch back to using a spinlock again by moving the IOMMU unmap outside
of the locked region.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 40 +++++++++++++++-------------------
 include/linux/shrinker.h       |  1 +
 mm/vmscan.c                    | 10 +++++++++
 3 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index cb38b1a17b09..a8b4abe687ce 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -70,7 +70,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
 static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
 static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
 
-static struct mutex shrinker_lock;
+static spinlock_t shrinker_lock;
 static struct list_head shrinker_list;
 static struct shrinker mm_shrinker;
 
@@ -263,9 +263,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
 	spin_lock_init(&pt->lock);
 	INIT_LIST_HEAD(&pt->pages);
 
-	mutex_lock(&shrinker_lock);
+	spin_lock(&shrinker_lock);
 	list_add_tail(&pt->shrinker_list, &shrinker_list);
-	mutex_unlock(&shrinker_lock);
+	spin_unlock(&shrinker_lock);
 }
 
 /* Remove a pool_type from the global shrinker list and free all pages */
@@ -273,9 +273,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
 {
 	struct page *p;
 
-	mutex_lock(&shrinker_lock);
+	spin_lock(&shrinker_lock);
 	list_del(&pt->shrinker_list);
-	mutex_unlock(&shrinker_lock);
+	spin_unlock(&shrinker_lock);
 
 	while ((p = ttm_pool_type_take(pt)))
 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
@@ -313,24 +313,19 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
 static unsigned int ttm_pool_shrink(void)
 {
 	struct ttm_pool_type *pt;
-	unsigned int num_freed;
 	struct page *p;
 
-	mutex_lock(&shrinker_lock);
+	spin_lock(&shrinker_lock);
 	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
+	list_move_tail(&pt->shrinker_list, &shrinker_list);
+	spin_unlock(&shrinker_lock);
 
 	p = ttm_pool_type_take(pt);
-	if (p) {
-		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
-		num_freed = 1 << pt->order;
-	} else {
-		num_freed = 0;
-	}
-
-	list_move_tail(&pt->shrinker_list, &shrinker_list);
-	mutex_unlock(&shrinker_lock);
+	if (!p)
+		return 0;
 
-	return num_freed;
+	ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
+	return 1 << pt->order;
 }
 
 /* Return the allocation order based for a page */
@@ -530,6 +525,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
 			for (j = 0; j < MAX_ORDER; ++j)
 				ttm_pool_type_fini(&pool->caching[i].orders[j]);
 	}
+	sync_shrinkers();
 }
 
 /* As long as pages are available make sure to release at least one */
@@ -604,7 +600,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
 {
 	ttm_pool_debugfs_header(m);
 
-	mutex_lock(&shrinker_lock);
+	spin_lock(&shrinker_lock);
 	seq_puts(m, "wc\t:");
 	ttm_pool_debugfs_orders(global_write_combined, m);
 	seq_puts(m, "uc\t:");
@@ -613,7 +609,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
 	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
 	seq_puts(m, "uc 32\t:");
 	ttm_pool_debugfs_orders(global_dma32_uncached, m);
-	mutex_unlock(&shrinker_lock);
+	spin_unlock(&shrinker_lock);
 
 	ttm_pool_debugfs_footer(m);
 
@@ -640,7 +636,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 
 	ttm_pool_debugfs_header(m);
 
-	mutex_lock(&shrinker_lock);
+	spin_lock(&shrinker_lock);
 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
 		seq_puts(m, "DMA ");
 		switch (i) {
@@ -656,7 +652,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 		}
 		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
 	}
-	mutex_unlock(&shrinker_lock);
+	spin_unlock(&shrinker_lock);
 
 	ttm_pool_debugfs_footer(m);
 	return 0;
@@ -693,7 +689,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 	if (!page_pool_size)
 		page_pool_size = num_pages;
 
-	mutex_init(&shrinker_lock);
+	spin_lock_init(&shrinker_lock);
 	INIT_LIST_HEAD(&shrinker_list);
 
 	for (i = 0; i < MAX_ORDER; ++i) {
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 0f80123650e2..6b75dc372fce 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -92,4 +92,5 @@ extern void register_shrinker_prepared(struct shrinker *shrinker);
 extern int register_shrinker(struct shrinker *shrinker);
 extern void unregister_shrinker(struct shrinker *shrinker);
 extern void free_prealloced_shrinker(struct shrinker *shrinker);
+extern void sync_shrinkers(void);
 #endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 562e87cbd7a1..46cd9c215d73 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -408,6 +408,16 @@ void unregister_shrinker(struct shrinker *shrinker)
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
+/**
+ * sync_shrinker - Wait for all running shrinkers to complete.
+ */
+void sync_shrinkers(void)
+{
+	down_write(&shrinker_rwsem);
+	up_write(&shrinker_rwsem);
+}
+EXPORT_SYMBOL(sync_shrinkers);
+
 #define SHRINK_BATCH 128
 
 static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/ttm: optimize the pool shrinker a bit
  2021-04-01 13:54 [PATCH] drm/ttm: optimize the pool shrinker a bit Christian König
@ 2021-04-02  4:18   ` kernel test robot
  2021-04-08 11:08 ` Daniel Vetter
  1 sibling, 0 replies; 8+ messages in thread
From: kernel test robot @ 2021-04-02  4:18 UTC (permalink / raw)
  To: Christian König, dri-devel
  Cc: clang-built-linux, kbuild-all, daniel.vetter

[-- Attachment #1: Type: text/plain, Size: 2406 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next next-20210401]
[cannot apply to drm-intel/for-linux-next linus/master drm/drm-next v5.12-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-optimize-the-pool-shrinker-a-bit/20210401-215623
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a004-20210401 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 1c268a8ff4e90a85d0e634350b1104080614cf2b)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/c0f3e98ef4f78d5e9d874be1f339186faf5c60bc
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-optimize-the-pool-shrinker-a-bit/20210401-215623
        git checkout c0f3e98ef4f78d5e9d874be1f339186faf5c60bc
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> mm/vmscan.c:415: warning: expecting prototype for sync_shrinker(). Prototype was for sync_shrinkers() instead
   mm/vmscan.c:1627: warning: wrong kernel-doc identifier on line:
    * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.


vim +415 mm/vmscan.c

   410	
   411	/**
   412	 * sync_shrinker - Wait for all running shrinkers to complete.
   413	 */
   414	void sync_shrinkers(void)
 > 415	{
   416		down_write(&shrinker_rwsem);
   417		up_write(&shrinker_rwsem);
   418	}
   419	EXPORT_SYMBOL(sync_shrinkers);
   420	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 30860 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/ttm: optimize the pool shrinker a bit
@ 2021-04-02  4:18   ` kernel test robot
  0 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2021-04-02  4:18 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2462 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next next-20210401]
[cannot apply to drm-intel/for-linux-next linus/master drm/drm-next v5.12-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-optimize-the-pool-shrinker-a-bit/20210401-215623
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a004-20210401 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 1c268a8ff4e90a85d0e634350b1104080614cf2b)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/c0f3e98ef4f78d5e9d874be1f339186faf5c60bc
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-optimize-the-pool-shrinker-a-bit/20210401-215623
        git checkout c0f3e98ef4f78d5e9d874be1f339186faf5c60bc
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> mm/vmscan.c:415: warning: expecting prototype for sync_shrinker(). Prototype was for sync_shrinkers() instead
   mm/vmscan.c:1627: warning: wrong kernel-doc identifier on line:
    * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.


vim +415 mm/vmscan.c

   410	
   411	/**
   412	 * sync_shrinker - Wait for all running shrinkers to complete.
   413	 */
   414	void sync_shrinkers(void)
 > 415	{
   416		down_write(&shrinker_rwsem);
   417		up_write(&shrinker_rwsem);
   418	}
   419	EXPORT_SYMBOL(sync_shrinkers);
   420	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 30860 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/ttm: optimize the pool shrinker a bit
  2021-04-01 13:54 [PATCH] drm/ttm: optimize the pool shrinker a bit Christian König
  2021-04-02  4:18   ` kernel test robot
@ 2021-04-08 11:08 ` Daniel Vetter
  2021-04-08 11:17   ` Christian König
  1 sibling, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2021-04-08 11:08 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, dri-devel

On Thu, Apr 01, 2021 at 03:54:13PM +0200, Christian König wrote:
> Switch back to using a spinlock again by moving the IOMMU unmap outside
> of the locked region.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 40 +++++++++++++++-------------------
>  include/linux/shrinker.h       |  1 +
>  mm/vmscan.c                    | 10 +++++++++
>  3 files changed, 29 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index cb38b1a17b09..a8b4abe687ce 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -70,7 +70,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
>  static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
>  static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>  
> -static struct mutex shrinker_lock;
> +static spinlock_t shrinker_lock;
>  static struct list_head shrinker_list;
>  static struct shrinker mm_shrinker;
>  
> @@ -263,9 +263,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
>  	spin_lock_init(&pt->lock);
>  	INIT_LIST_HEAD(&pt->pages);
>  
> -	mutex_lock(&shrinker_lock);
> +	spin_lock(&shrinker_lock);
>  	list_add_tail(&pt->shrinker_list, &shrinker_list);
> -	mutex_unlock(&shrinker_lock);
> +	spin_unlock(&shrinker_lock);
>  }
>  
>  /* Remove a pool_type from the global shrinker list and free all pages */
> @@ -273,9 +273,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
>  {
>  	struct page *p;
>  
> -	mutex_lock(&shrinker_lock);
> +	spin_lock(&shrinker_lock);
>  	list_del(&pt->shrinker_list);
> -	mutex_unlock(&shrinker_lock);
> +	spin_unlock(&shrinker_lock);
>  
>  	while ((p = ttm_pool_type_take(pt)))
>  		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> @@ -313,24 +313,19 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
>  static unsigned int ttm_pool_shrink(void)
>  {
>  	struct ttm_pool_type *pt;
> -	unsigned int num_freed;
>  	struct page *p;
>  
> -	mutex_lock(&shrinker_lock);
> +	spin_lock(&shrinker_lock);
>  	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
> +	list_move_tail(&pt->shrinker_list, &shrinker_list);
> +	spin_unlock(&shrinker_lock);
>  
>  	p = ttm_pool_type_take(pt);
> -	if (p) {
> -		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> -		num_freed = 1 << pt->order;
> -	} else {
> -		num_freed = 0;
> -	}
> -
> -	list_move_tail(&pt->shrinker_list, &shrinker_list);
> -	mutex_unlock(&shrinker_lock);
> +	if (!p)
> +		return 0;
>  
> -	return num_freed;
> +	ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> +	return 1 << pt->order;
>  }
>  
>  /* Return the allocation order based for a page */
> @@ -530,6 +525,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>  			for (j = 0; j < MAX_ORDER; ++j)
>  				ttm_pool_type_fini(&pool->caching[i].orders[j]);
>  	}
> +	sync_shrinkers();
>  }
>  
>  /* As long as pages are available make sure to release at least one */
> @@ -604,7 +600,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
>  {
>  	ttm_pool_debugfs_header(m);
>  
> -	mutex_lock(&shrinker_lock);
> +	spin_lock(&shrinker_lock);
>  	seq_puts(m, "wc\t:");
>  	ttm_pool_debugfs_orders(global_write_combined, m);
>  	seq_puts(m, "uc\t:");
> @@ -613,7 +609,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
>  	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
>  	seq_puts(m, "uc 32\t:");
>  	ttm_pool_debugfs_orders(global_dma32_uncached, m);
> -	mutex_unlock(&shrinker_lock);
> +	spin_unlock(&shrinker_lock);
>  
>  	ttm_pool_debugfs_footer(m);
>  
> @@ -640,7 +636,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>  
>  	ttm_pool_debugfs_header(m);
>  
> -	mutex_lock(&shrinker_lock);
> +	spin_lock(&shrinker_lock);
>  	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>  		seq_puts(m, "DMA ");
>  		switch (i) {
> @@ -656,7 +652,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>  		}
>  		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
>  	}
> -	mutex_unlock(&shrinker_lock);
> +	spin_unlock(&shrinker_lock);
>  
>  	ttm_pool_debugfs_footer(m);
>  	return 0;
> @@ -693,7 +689,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>  	if (!page_pool_size)
>  		page_pool_size = num_pages;
>  
> -	mutex_init(&shrinker_lock);
> +	spin_lock_init(&shrinker_lock);
>  	INIT_LIST_HEAD(&shrinker_list);
>  
>  	for (i = 0; i < MAX_ORDER; ++i) {
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 0f80123650e2..6b75dc372fce 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -92,4 +92,5 @@ extern void register_shrinker_prepared(struct shrinker *shrinker);
>  extern int register_shrinker(struct shrinker *shrinker);
>  extern void unregister_shrinker(struct shrinker *shrinker);
>  extern void free_prealloced_shrinker(struct shrinker *shrinker);
> +extern void sync_shrinkers(void);
>  #endif
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 562e87cbd7a1..46cd9c215d73 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -408,6 +408,16 @@ void unregister_shrinker(struct shrinker *shrinker)
>  }
>  EXPORT_SYMBOL(unregister_shrinker);
>  
> +/**
> + * sync_shrinker - Wait for all running shrinkers to complete.
> + */
> +void sync_shrinkers(void)

This one should probably be in its own patch, with a bit more commit
message about why we need it and all that. I'd assume that just
unregistering the shrinker should sync everything we needed to sync
already, and for other sync needs we can do locking within our own
shrinker?
-Daniel

> +{
> +	down_write(&shrinker_rwsem);
> +	up_write(&shrinker_rwsem);
> +}
> +EXPORT_SYMBOL(sync_shrinkers);
> +
>  #define SHRINK_BATCH 128
>  
>  static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/ttm: optimize the pool shrinker a bit
  2021-04-08 11:08 ` Daniel Vetter
@ 2021-04-08 11:17   ` Christian König
  2021-04-08 11:31     ` Daniel Vetter
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2021-04-08 11:17 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: daniel.vetter, dri-devel

Am 08.04.21 um 13:08 schrieb Daniel Vetter:
> On Thu, Apr 01, 2021 at 03:54:13PM +0200, Christian König wrote:
>> Switch back to using a spinlock again by moving the IOMMU unmap outside
>> of the locked region.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 40 +++++++++++++++-------------------
>>   include/linux/shrinker.h       |  1 +
>>   mm/vmscan.c                    | 10 +++++++++
>>   3 files changed, 29 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
>> index cb38b1a17b09..a8b4abe687ce 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -70,7 +70,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
>>   static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
>>   static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>>   
>> -static struct mutex shrinker_lock;
>> +static spinlock_t shrinker_lock;
>>   static struct list_head shrinker_list;
>>   static struct shrinker mm_shrinker;
>>   
>> @@ -263,9 +263,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
>>   	spin_lock_init(&pt->lock);
>>   	INIT_LIST_HEAD(&pt->pages);
>>   
>> -	mutex_lock(&shrinker_lock);
>> +	spin_lock(&shrinker_lock);
>>   	list_add_tail(&pt->shrinker_list, &shrinker_list);
>> -	mutex_unlock(&shrinker_lock);
>> +	spin_unlock(&shrinker_lock);
>>   }
>>   
>>   /* Remove a pool_type from the global shrinker list and free all pages */
>> @@ -273,9 +273,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
>>   {
>>   	struct page *p;
>>   
>> -	mutex_lock(&shrinker_lock);
>> +	spin_lock(&shrinker_lock);
>>   	list_del(&pt->shrinker_list);
>> -	mutex_unlock(&shrinker_lock);
>> +	spin_unlock(&shrinker_lock);
>>   
>>   	while ((p = ttm_pool_type_take(pt)))
>>   		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>> @@ -313,24 +313,19 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
>>   static unsigned int ttm_pool_shrink(void)
>>   {
>>   	struct ttm_pool_type *pt;
>> -	unsigned int num_freed;
>>   	struct page *p;
>>   
>> -	mutex_lock(&shrinker_lock);
>> +	spin_lock(&shrinker_lock);
>>   	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
>> +	list_move_tail(&pt->shrinker_list, &shrinker_list);
>> +	spin_unlock(&shrinker_lock);
>>   
>>   	p = ttm_pool_type_take(pt);
>> -	if (p) {
>> -		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>> -		num_freed = 1 << pt->order;
>> -	} else {
>> -		num_freed = 0;
>> -	}
>> -
>> -	list_move_tail(&pt->shrinker_list, &shrinker_list);
>> -	mutex_unlock(&shrinker_lock);
>> +	if (!p)
>> +		return 0;
>>   
>> -	return num_freed;
>> +	ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>> +	return 1 << pt->order;
>>   }
>>   
>>   /* Return the allocation order based for a page */
>> @@ -530,6 +525,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>   			for (j = 0; j < MAX_ORDER; ++j)
>>   				ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>   	}
>> +	sync_shrinkers();
>>   }
>>   
>>   /* As long as pages are available make sure to release at least one */
>> @@ -604,7 +600,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
>>   {
>>   	ttm_pool_debugfs_header(m);
>>   
>> -	mutex_lock(&shrinker_lock);
>> +	spin_lock(&shrinker_lock);
>>   	seq_puts(m, "wc\t:");
>>   	ttm_pool_debugfs_orders(global_write_combined, m);
>>   	seq_puts(m, "uc\t:");
>> @@ -613,7 +609,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
>>   	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
>>   	seq_puts(m, "uc 32\t:");
>>   	ttm_pool_debugfs_orders(global_dma32_uncached, m);
>> -	mutex_unlock(&shrinker_lock);
>> +	spin_unlock(&shrinker_lock);
>>   
>>   	ttm_pool_debugfs_footer(m);
>>   
>> @@ -640,7 +636,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>>   
>>   	ttm_pool_debugfs_header(m);
>>   
>> -	mutex_lock(&shrinker_lock);
>> +	spin_lock(&shrinker_lock);
>>   	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>>   		seq_puts(m, "DMA ");
>>   		switch (i) {
>> @@ -656,7 +652,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>>   		}
>>   		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
>>   	}
>> -	mutex_unlock(&shrinker_lock);
>> +	spin_unlock(&shrinker_lock);
>>   
>>   	ttm_pool_debugfs_footer(m);
>>   	return 0;
>> @@ -693,7 +689,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>>   	if (!page_pool_size)
>>   		page_pool_size = num_pages;
>>   
>> -	mutex_init(&shrinker_lock);
>> +	spin_lock_init(&shrinker_lock);
>>   	INIT_LIST_HEAD(&shrinker_list);
>>   
>>   	for (i = 0; i < MAX_ORDER; ++i) {
>> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
>> index 0f80123650e2..6b75dc372fce 100644
>> --- a/include/linux/shrinker.h
>> +++ b/include/linux/shrinker.h
>> @@ -92,4 +92,5 @@ extern void register_shrinker_prepared(struct shrinker *shrinker);
>>   extern int register_shrinker(struct shrinker *shrinker);
>>   extern void unregister_shrinker(struct shrinker *shrinker);
>>   extern void free_prealloced_shrinker(struct shrinker *shrinker);
>> +extern void sync_shrinkers(void);
>>   #endif
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 562e87cbd7a1..46cd9c215d73 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -408,6 +408,16 @@ void unregister_shrinker(struct shrinker *shrinker)
>>   }
>>   EXPORT_SYMBOL(unregister_shrinker);
>>   
>> +/**
>> + * sync_shrinker - Wait for all running shrinkers to complete.
>> + */
>> +void sync_shrinkers(void)
> This one should probably be in its own patch, with a bit more commit
> message about why we need it and all that. I'd assume that just
> unregistering the shrinker should sync everything we needed to sync
> already, and for other sync needs we can do locking within our own
> shrinker?

Correct. Reason why we need the barrier is that we need to destroy the 
device (during hotplug) before the shrinker is unregistered (during 
module unload).

Going to separate that, write something up in the commit message and 
send it to the appropriate audience.

Thanks,
Christian.

> -Daniel
>
>> +{
>> +	down_write(&shrinker_rwsem);
>> +	up_write(&shrinker_rwsem);
>> +}
>> +EXPORT_SYMBOL(sync_shrinkers);
>> +
>>   #define SHRINK_BATCH 128
>>   
>>   static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
>> -- 
>> 2.25.1
>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/ttm: optimize the pool shrinker a bit
  2021-04-08 11:17   ` Christian König
@ 2021-04-08 11:31     ` Daniel Vetter
  2021-04-08 12:44       ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2021-04-08 11:31 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, dri-devel

On Thu, Apr 08, 2021 at 01:17:32PM +0200, Christian König wrote:
> Am 08.04.21 um 13:08 schrieb Daniel Vetter:
> > On Thu, Apr 01, 2021 at 03:54:13PM +0200, Christian König wrote:
> > > Switch back to using a spinlock again by moving the IOMMU unmap outside
> > > of the locked region.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_pool.c | 40 +++++++++++++++-------------------
> > >   include/linux/shrinker.h       |  1 +
> > >   mm/vmscan.c                    | 10 +++++++++
> > >   3 files changed, 29 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > > index cb38b1a17b09..a8b4abe687ce 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > @@ -70,7 +70,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
> > >   static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> > >   static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> > > -static struct mutex shrinker_lock;
> > > +static spinlock_t shrinker_lock;
> > >   static struct list_head shrinker_list;
> > >   static struct shrinker mm_shrinker;
> > > @@ -263,9 +263,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
> > >   	spin_lock_init(&pt->lock);
> > >   	INIT_LIST_HEAD(&pt->pages);
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	list_add_tail(&pt->shrinker_list, &shrinker_list);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   }
> > >   /* Remove a pool_type from the global shrinker list and free all pages */
> > > @@ -273,9 +273,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
> > >   {
> > >   	struct page *p;
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	list_del(&pt->shrinker_list);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   	while ((p = ttm_pool_type_take(pt)))
> > >   		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> > > @@ -313,24 +313,19 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
> > >   static unsigned int ttm_pool_shrink(void)
> > >   {
> > >   	struct ttm_pool_type *pt;
> > > -	unsigned int num_freed;
> > >   	struct page *p;
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
> > > +	list_move_tail(&pt->shrinker_list, &shrinker_list);
> > > +	spin_unlock(&shrinker_lock);
> > >   	p = ttm_pool_type_take(pt);
> > > -	if (p) {
> > > -		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> > > -		num_freed = 1 << pt->order;
> > > -	} else {
> > > -		num_freed = 0;
> > > -	}
> > > -
> > > -	list_move_tail(&pt->shrinker_list, &shrinker_list);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	if (!p)
> > > +		return 0;
> > > -	return num_freed;
> > > +	ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> > > +	return 1 << pt->order;
> > >   }
> > >   /* Return the allocation order based for a page */
> > > @@ -530,6 +525,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> > >   			for (j = 0; j < MAX_ORDER; ++j)
> > >   				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > >   	}
> > > +	sync_shrinkers();
> > >   }
> > >   /* As long as pages are available make sure to release at least one */
> > > @@ -604,7 +600,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
> > >   {
> > >   	ttm_pool_debugfs_header(m);
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	seq_puts(m, "wc\t:");
> > >   	ttm_pool_debugfs_orders(global_write_combined, m);
> > >   	seq_puts(m, "uc\t:");
> > > @@ -613,7 +609,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
> > >   	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
> > >   	seq_puts(m, "uc 32\t:");
> > >   	ttm_pool_debugfs_orders(global_dma32_uncached, m);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   	ttm_pool_debugfs_footer(m);
> > > @@ -640,7 +636,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
> > >   	ttm_pool_debugfs_header(m);
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
> > >   		seq_puts(m, "DMA ");
> > >   		switch (i) {
> > > @@ -656,7 +652,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
> > >   		}
> > >   		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
> > >   	}
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   	ttm_pool_debugfs_footer(m);
> > >   	return 0;
> > > @@ -693,7 +689,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> > >   	if (!page_pool_size)
> > >   		page_pool_size = num_pages;
> > > -	mutex_init(&shrinker_lock);
> > > +	spin_lock_init(&shrinker_lock);
> > >   	INIT_LIST_HEAD(&shrinker_list);
> > >   	for (i = 0; i < MAX_ORDER; ++i) {
> > > diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> > > index 0f80123650e2..6b75dc372fce 100644
> > > --- a/include/linux/shrinker.h
> > > +++ b/include/linux/shrinker.h
> > > @@ -92,4 +92,5 @@ extern void register_shrinker_prepared(struct shrinker *shrinker);
> > >   extern int register_shrinker(struct shrinker *shrinker);
> > >   extern void unregister_shrinker(struct shrinker *shrinker);
> > >   extern void free_prealloced_shrinker(struct shrinker *shrinker);
> > > +extern void sync_shrinkers(void);
> > >   #endif
> > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > > index 562e87cbd7a1..46cd9c215d73 100644
> > > --- a/mm/vmscan.c
> > > +++ b/mm/vmscan.c
> > > @@ -408,6 +408,16 @@ void unregister_shrinker(struct shrinker *shrinker)
> > >   }
> > >   EXPORT_SYMBOL(unregister_shrinker);
> > > +/**
> > > + * sync_shrinker - Wait for all running shrinkers to complete.
> > > + */
> > > +void sync_shrinkers(void)
> > This one should probably be in its own patch, with a bit more commit
> > message about why we need it and all that. I'd assume that just
> > unregistering the shrinker should sync everything we needed to sync
> > already, and for other sync needs we can do locking within our own
> > shrinker?
> 
> Correct. Reason why we need the barrier is that we need to destroy the
> device (during hotplug) before the shrinker is unregistered (during module
> unload).
> 
> Going to separate that, write something up in the commit message and send it
> to the appropriate audience.

Hm why do we need that? Either way sounds like an orthogonal series for
the hotunplug work, not just shrinker optimization.
-Daniel

> 
> Thanks,
> Christian.
> 
> > -Daniel
> > 
> > > +{
> > > +	down_write(&shrinker_rwsem);
> > > +	up_write(&shrinker_rwsem);
> > > +}
> > > +EXPORT_SYMBOL(sync_shrinkers);
> > > +
> > >   #define SHRINK_BATCH 128
> > >   static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> > > -- 
> > > 2.25.1
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/ttm: optimize the pool shrinker a bit
  2021-04-08 11:31     ` Daniel Vetter
@ 2021-04-08 12:44       ` Christian König
  2021-04-09  7:41         ` Daniel Vetter
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2021-04-08 12:44 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: daniel.vetter, dri-devel

Am 08.04.21 um 13:31 schrieb Daniel Vetter:
> On Thu, Apr 08, 2021 at 01:17:32PM +0200, Christian König wrote:
>> Am 08.04.21 um 13:08 schrieb Daniel Vetter:
>>> On Thu, Apr 01, 2021 at 03:54:13PM +0200, Christian König wrote:
>>>> [SNIP]
>>>>    EXPORT_SYMBOL(unregister_shrinker);
>>>> +/**
>>>> + * sync_shrinker - Wait for all running shrinkers to complete.
>>>> + */
>>>> +void sync_shrinkers(void)
>>> This one should probably be in its own patch, with a bit more commit
>>> message about why we need it and all that. I'd assume that just
>>> unregistering the shrinker should sync everything we needed to sync
>>> already, and for other sync needs we can do locking within our own
>>> shrinker?
>> Correct. Reason why we need the barrier is that we need to destroy the
>> device (during hotplug) before the shrinker is unregistered (during module
>> unload).
>>
>> Going to separate that, write something up in the commit message and send it
>> to the appropriate audience.
> Hm why do we need that?

When the shrinker runs in parallel with (for example) a hotplug event 
and unmaps pages from the devices IOMMU I must make sure that you can't 
destroy the device or pool structure at the same time.

Previously holding the mutex while updating the IOMMU would take care of 
that, but now we need to prevent this otherwise.

Could be that this is also handled somewhere else, but I'm better save 
than sorry here and grabbing/releasing write side of the shrinker_rwsem 
is rather lightweight.

> Either way sounds like an orthogonal series for
> the hotunplug work, not just shrinker optimization.

It is unrelated to the hotplug work in general.

Regards,
Christian.

> -Daniel
>
>> Thanks,
>> Christian.
>>
>>> -Daniel
>>>
>>>> +{
>>>> +	down_write(&shrinker_rwsem);
>>>> +	up_write(&shrinker_rwsem);
>>>> +}
>>>> +EXPORT_SYMBOL(sync_shrinkers);
>>>> +
>>>>    #define SHRINK_BATCH 128
>>>>    static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
>>>> -- 
>>>> 2.25.1
>>>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/ttm: optimize the pool shrinker a bit
  2021-04-08 12:44       ` Christian König
@ 2021-04-09  7:41         ` Daniel Vetter
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2021-04-09  7:41 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, dri-devel

On Thu, Apr 08, 2021 at 02:44:16PM +0200, Christian König wrote:
> Am 08.04.21 um 13:31 schrieb Daniel Vetter:
> > On Thu, Apr 08, 2021 at 01:17:32PM +0200, Christian König wrote:
> > > Am 08.04.21 um 13:08 schrieb Daniel Vetter:
> > > > On Thu, Apr 01, 2021 at 03:54:13PM +0200, Christian König wrote:
> > > > > [SNIP]
> > > > >    EXPORT_SYMBOL(unregister_shrinker);
> > > > > +/**
> > > > > + * sync_shrinker - Wait for all running shrinkers to complete.
> > > > > + */
> > > > > +void sync_shrinkers(void)
> > > > This one should probably be in its own patch, with a bit more commit
> > > > message about why we need it and all that. I'd assume that just
> > > > unregistering the shrinker should sync everything we needed to sync
> > > > already, and for other sync needs we can do locking within our own
> > > > shrinker?
> > > Correct. Reason why we need the barrier is that we need to destroy the
> > > device (during hotplug) before the shrinker is unregistered (during module
> > > unload).
> > > 
> > > Going to separate that, write something up in the commit message and send it
> > > to the appropriate audience.
> > Hm why do we need that?
> 
> When the shrinker runs in parallel with (for example) a hotplug event and
> unmaps pages from the devices IOMMU I must make sure that you can't destroy
> the device or pool structure at the same time.
> 
> Previously holding the mutex while updating the IOMMU would take care of
> that, but now we need to prevent this otherwise.
> 
> Could be that this is also handled somewhere else, but I'm better save than
> sorry here and grabbing/releasing write side of the shrinker_rwsem is rather
> lightweight.

I forgot that we don't have a per-pool (or at least per-device) shrinker,
but one global one for all ttm device. So yeah with that design a
sync_shrinker is needed.
-Daniel

> 
> > Either way sounds like an orthogonal series for
> > the hotunplug work, not just shrinker optimization.
> 
> It is unrelated to the hotplug work in general.
> 
> Regards,
> Christian.
> 
> > -Daniel
> > 
> > > Thanks,
> > > Christian.
> > > 
> > > > -Daniel
> > > > 
> > > > > +{
> > > > > +	down_write(&shrinker_rwsem);
> > > > > +	up_write(&shrinker_rwsem);
> > > > > +}
> > > > > +EXPORT_SYMBOL(sync_shrinkers);
> > > > > +
> > > > >    #define SHRINK_BATCH 128
> > > > >    static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> > > > > -- 
> > > > > 2.25.1
> > > > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-04-09  7:41 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-01 13:54 [PATCH] drm/ttm: optimize the pool shrinker a bit Christian König
2021-04-02  4:18 ` kernel test robot
2021-04-02  4:18   ` kernel test robot
2021-04-08 11:08 ` Daniel Vetter
2021-04-08 11:17   ` Christian König
2021-04-08 11:31     ` Daniel Vetter
2021-04-08 12:44       ` Christian König
2021-04-09  7:41         ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.