dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/ttm: add debugfs directory v2
@ 2020-12-18 17:55 Christian König
  2020-12-18 17:55 ` [PATCH 2/4] drm/ttm: add a debugfs file for the global page pools Christian König
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Christian König @ 2020-12-18 17:55 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

As far as I can tell the buffer_count was never used by an
userspace application.

The number of BOs in the system is far better suited in
debugfs than sysfs and we now should be able to add other
information here as well.

v2: add that additionally to sysfs

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/ttm/ttm_bo.c     | 48 ++------------------------------
 drivers/gpu/drm/ttm/ttm_module.c |  4 +++
 drivers/gpu/drm/ttm/ttm_module.h |  6 ++--
 3 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 31e8b3da5563..cd55e3104e50 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -44,8 +44,6 @@
 
 #include "ttm_module.h"
 
-static void ttm_bo_global_kobj_release(struct kobject *kobj);
-
 /*
  * ttm_global_mutex - protecting the global BO state
  */
@@ -54,11 +52,6 @@ unsigned ttm_bo_glob_use_count;
 struct ttm_bo_global ttm_bo_glob;
 EXPORT_SYMBOL(ttm_bo_glob);
 
-static struct attribute ttm_bo_count = {
-	.name = "bo_count",
-	.mode = S_IRUGO
-};
-
 /* default destructor */
 static void ttm_bo_default_destroy(struct ttm_buffer_object *bo)
 {
@@ -84,32 +77,6 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
 	}
 }
 
-static ssize_t ttm_bo_global_show(struct kobject *kobj,
-				  struct attribute *attr,
-				  char *buffer)
-{
-	struct ttm_bo_global *glob =
-		container_of(kobj, struct ttm_bo_global, kobj);
-
-	return snprintf(buffer, PAGE_SIZE, "%d\n",
-				atomic_read(&glob->bo_count));
-}
-
-static struct attribute *ttm_bo_global_attrs[] = {
-	&ttm_bo_count,
-	NULL
-};
-
-static const struct sysfs_ops ttm_bo_global_ops = {
-	.show = &ttm_bo_global_show
-};
-
-static struct kobj_type ttm_bo_glob_kobj_type  = {
-	.release = &ttm_bo_global_kobj_release,
-	.sysfs_ops = &ttm_bo_global_ops,
-	.default_attrs = ttm_bo_global_attrs
-};
-
 static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -1226,14 +1193,6 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev,
 }
 EXPORT_SYMBOL(ttm_bo_dma_acc_size);
 
-static void ttm_bo_global_kobj_release(struct kobject *kobj)
-{
-	struct ttm_bo_global *glob =
-		container_of(kobj, struct ttm_bo_global, kobj);
-
-	__free_page(glob->dummy_read_page);
-}
-
 static void ttm_bo_global_release(void)
 {
 	struct ttm_bo_global *glob = &ttm_bo_glob;
@@ -1245,6 +1204,7 @@ static void ttm_bo_global_release(void)
 	kobject_del(&glob->kobj);
 	kobject_put(&glob->kobj);
 	ttm_mem_global_release(&ttm_mem_glob);
+	__free_page(glob->dummy_read_page);
 	memset(glob, 0, sizeof(*glob));
 out:
 	mutex_unlock(&ttm_global_mutex);
@@ -1277,10 +1237,8 @@ static int ttm_bo_global_init(void)
 	INIT_LIST_HEAD(&glob->device_list);
 	atomic_set(&glob->bo_count, 0);
 
-	ret = kobject_init_and_add(
-		&glob->kobj, &ttm_bo_glob_kobj_type, ttm_get_kobj(), "buffer_objects");
-	if (unlikely(ret != 0))
-		kobject_put(&glob->kobj);
+	debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
+				&glob->bo_count);
 out:
 	mutex_unlock(&ttm_global_mutex);
 	return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index c0906437cb1c..f6566603a60f 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -32,12 +32,14 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/sched.h>
+#include <linux/debugfs.h>
 #include <drm/drm_sysfs.h>
 
 #include "ttm_module.h"
 
 static DECLARE_WAIT_QUEUE_HEAD(exit_q);
 static atomic_t device_released;
+struct dentry *ttm_debugfs_root;
 
 static struct device_type ttm_drm_class_type = {
 	.name = "ttm",
@@ -77,6 +79,7 @@ static int __init ttm_init(void)
 	if (unlikely(ret != 0))
 		goto out_no_dev_reg;
 
+	ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
 	return 0;
 out_no_dev_reg:
 	atomic_set(&device_released, 1);
@@ -94,6 +97,7 @@ static void __exit ttm_exit(void)
 	 */
 
 	wait_event(exit_q, atomic_read(&device_released) == 1);
+	debugfs_remove(ttm_debugfs_root);
 }
 
 module_init(ttm_init);
diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h
index 45fa318c1585..2f03c2fcf570 100644
--- a/drivers/gpu/drm/ttm/ttm_module.h
+++ b/drivers/gpu/drm/ttm/ttm_module.h
@@ -31,10 +31,12 @@
 #ifndef _TTM_MODULE_H_
 #define _TTM_MODULE_H_
 
-#include <linux/kernel.h>
+#define TTM_PFX "[TTM] "
+
 struct kobject;
+struct dentry;
 
-#define TTM_PFX "[TTM] "
 extern struct kobject *ttm_get_kobj(void);
+extern struct dentry *ttm_debugfs_root;
 
 #endif /* _TTM_MODULE_H_ */
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/4] drm/ttm: add a debugfs file for the global page pools
  2020-12-18 17:55 [PATCH 1/4] drm/ttm: add debugfs directory v2 Christian König
@ 2020-12-18 17:55 ` Christian König
  2020-12-22 13:36   ` Daniel Vetter
  2020-12-18 17:55 ` [PATCH 3/4] drm/ttm: add debugfs entry to test pool shrinker Christian König
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Christian König @ 2020-12-18 17:55 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

Instead of printing this on the per device pool.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 70 ++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 7b2f60616750..1d61e8fc0e81 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -42,6 +42,8 @@
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_tt.h>
 
+#include "ttm_module.h"
+
 /**
  * struct ttm_pool_dma - Helper object for coherent DMA mappings
  *
@@ -543,6 +545,17 @@ static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
 	return count;
 }
 
+/* Print a nice header for the order */
+static void ttm_pool_debugfs_header(struct seq_file *m)
+{
+	unsigned int i;
+
+	seq_puts(m, "\t ");
+	for (i = 0; i < MAX_ORDER; ++i)
+		seq_printf(m, " ---%2u---", i);
+	seq_puts(m, "\n");
+}
+
 /* Dump information about the different pool types */
 static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
 				    struct seq_file *m)
@@ -554,6 +567,35 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
 	seq_puts(m, "\n");
 }
 
+/* Dump the total amount of allocated pages */
+static void ttm_pool_debugfs_footer(struct seq_file *m)
+{
+	seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
+		   atomic_long_read(&allocated_pages), page_pool_size);
+}
+
+/* Dump the information for the global pools */
+static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
+{
+	ttm_pool_debugfs_header(m);
+
+	spin_lock(&shrinker_lock);
+	seq_puts(m, "wc\t:");
+	ttm_pool_debugfs_orders(global_write_combined, m);
+	seq_puts(m, "uc\t:");
+	ttm_pool_debugfs_orders(global_uncached, m);
+	seq_puts(m, "wc 32\t:");
+	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
+	seq_puts(m, "uc 32\t:");
+	ttm_pool_debugfs_orders(global_dma32_uncached, m);
+	spin_unlock(&shrinker_lock);
+
+	ttm_pool_debugfs_footer(m);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_globals);
+
 /**
  * ttm_pool_debugfs - Debugfs dump function for a pool
  *
@@ -566,23 +608,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 {
 	unsigned int i;
 
-	spin_lock(&shrinker_lock);
-
-	seq_puts(m, "\t ");
-	for (i = 0; i < MAX_ORDER; ++i)
-		seq_printf(m, " ---%2u---", i);
-	seq_puts(m, "\n");
-
-	seq_puts(m, "wc\t:");
-	ttm_pool_debugfs_orders(global_write_combined, m);
-	seq_puts(m, "uc\t:");
-	ttm_pool_debugfs_orders(global_uncached, m);
-
-	seq_puts(m, "wc 32\t:");
-	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
-	seq_puts(m, "uc 32\t:");
-	ttm_pool_debugfs_orders(global_dma32_uncached, m);
+	ttm_pool_debugfs_header(m);
 
+	spin_lock(&shrinker_lock);
 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
 		seq_puts(m, "DMA ");
 		switch (i) {
@@ -598,12 +626,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 		}
 		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
 	}
-
-	seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
-		   atomic_long_read(&allocated_pages), page_pool_size);
-
 	spin_unlock(&shrinker_lock);
 
+	ttm_pool_debugfs_footer(m);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_pool_debugfs);
@@ -660,6 +685,11 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 				   ttm_uncached, i);
 	}
 
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
+			    &ttm_pool_debugfs_globals_fops);
+#endif
+
 	mm_shrinker.count_objects = ttm_pool_shrinker_count;
 	mm_shrinker.scan_objects = ttm_pool_shrinker_scan;
 	mm_shrinker.seeks = 1;
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/4] drm/ttm: add debugfs entry to test pool shrinker
  2020-12-18 17:55 [PATCH 1/4] drm/ttm: add debugfs directory v2 Christian König
  2020-12-18 17:55 ` [PATCH 2/4] drm/ttm: add a debugfs file for the global page pools Christian König
@ 2020-12-18 17:55 ` Christian König
  2020-12-22 13:43   ` Daniel Vetter
  2020-12-18 17:55 ` [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit Christian König
  2020-12-22 13:31 ` [PATCH 1/4] drm/ttm: add debugfs directory v2 Daniel Vetter
  3 siblings, 1 reply; 13+ messages in thread
From: Christian König @ 2020-12-18 17:55 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

Useful for testing.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 50 ++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 1d61e8fc0e81..1cdacd58753a 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -529,6 +529,28 @@ void ttm_pool_fini(struct ttm_pool *pool)
 }
 EXPORT_SYMBOL(ttm_pool_fini);
 
+/* As long as pages are available make sure to release at least one */
+static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
+					    struct shrink_control *sc)
+{
+	unsigned long num_freed = 0;
+
+	do
+		num_freed += ttm_pool_shrink();
+	while (!num_freed && atomic_long_read(&allocated_pages));
+
+	return num_freed;
+}
+
+/* Return the number of pages available or SHRINK_EMPTY if we have none */
+static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
+					     struct shrink_control *sc)
+{
+	unsigned long num_pages = atomic_long_read(&allocated_pages);
+
+	return num_pages ? num_pages : SHRINK_EMPTY;
+}
+
 #ifdef CONFIG_DEBUG_FS
 /* Count the number of pages available in a pool_type */
 static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
@@ -633,29 +655,19 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 }
 EXPORT_SYMBOL(ttm_pool_debugfs);
 
-#endif
-
-/* As long as pages are available make sure to release at least one */
-static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
-					    struct shrink_control *sc)
+/* Test the shrinker functions and dump the result */
+static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data)
 {
-	unsigned long num_freed = 0;
+	struct shrink_control sc = { .gfp_mask = GFP_KERNEL };
 
-	do
-		num_freed += ttm_pool_shrink();
-	while (!num_freed && atomic_long_read(&allocated_pages));
+	seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(&mm_shrinker, &sc),
+		   ttm_pool_shrinker_scan(&mm_shrinker, &sc));
 
-	return num_freed;
+	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
 
-/* Return the number of pages available or SHRINK_EMPTY if we have none */
-static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
-					     struct shrink_control *sc)
-{
-	unsigned long num_pages = atomic_long_read(&allocated_pages);
-
-	return num_pages ? num_pages : SHRINK_EMPTY;
-}
+#endif
 
 /**
  * ttm_pool_mgr_init - Initialize globals
@@ -688,6 +700,8 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 #ifdef CONFIG_DEBUG_FS
 	debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
 			    &ttm_pool_debugfs_globals_fops);
+	debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
+			    &ttm_pool_debugfs_shrink_fops);
 #endif
 
 	mm_shrinker.count_objects = ttm_pool_shrinker_count;
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit
  2020-12-18 17:55 [PATCH 1/4] drm/ttm: add debugfs directory v2 Christian König
  2020-12-18 17:55 ` [PATCH 2/4] drm/ttm: add a debugfs file for the global page pools Christian König
  2020-12-18 17:55 ` [PATCH 3/4] drm/ttm: add debugfs entry to test pool shrinker Christian König
@ 2020-12-18 17:55 ` Christian König
  2020-12-22 13:51   ` Daniel Vetter
  2020-12-22 13:31 ` [PATCH 1/4] drm/ttm: add debugfs directory v2 Daniel Vetter
  3 siblings, 1 reply; 13+ messages in thread
From: Christian König @ 2020-12-18 17:55 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

Only initialize the DMA coherent pools if they are used.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 1cdacd58753a..f09e34614226 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 	pool->use_dma_alloc = use_dma_alloc;
 	pool->use_dma32 = use_dma32;
 
-	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-		for (j = 0; j < MAX_ORDER; ++j)
-			ttm_pool_type_init(&pool->caching[i].orders[j],
-					   pool, i, j);
+	if (use_dma_alloc) {
+		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+			for (j = 0; j < MAX_ORDER; ++j)
+				ttm_pool_type_init(&pool->caching[i].orders[j],
+						   pool, i, j);
+	}
 }
 EXPORT_SYMBOL(ttm_pool_init);
 
@@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool)
 {
 	unsigned int i, j;
 
-	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-		for (j = 0; j < MAX_ORDER; ++j)
-			ttm_pool_type_fini(&pool->caching[i].orders[j]);
+	if (pool->use_dma_alloc) {
+		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+			for (j = 0; j < MAX_ORDER; ++j)
+				ttm_pool_type_fini(&pool->caching[i].orders[j]);
+	}
 }
 EXPORT_SYMBOL(ttm_pool_fini);
 
@@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 {
 	unsigned int i;
 
+	if (!pool->use_dma_alloc) {
+		seq_puts(m, "unused\n");
+		return 0;
+	}
+
 	ttm_pool_debugfs_header(m);
 
 	spin_lock(&shrinker_lock);
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/4] drm/ttm: add debugfs directory v2
  2020-12-18 17:55 [PATCH 1/4] drm/ttm: add debugfs directory v2 Christian König
                   ` (2 preceding siblings ...)
  2020-12-18 17:55 ` [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit Christian König
@ 2020-12-22 13:31 ` Daniel Vetter
  3 siblings, 0 replies; 13+ messages in thread
From: Daniel Vetter @ 2020-12-22 13:31 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, dri-devel

On Fri, Dec 18, 2020 at 06:55:35PM +0100, Christian König wrote:
> As far as I can tell the buffer_count was never used by an
> userspace application.
> 
> The number of BOs in the system is far better suited in
> debugfs than sysfs and we now should be able to add other
> information here as well.
> 
> v2: add that additionally to sysfs
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Not sure where I acked this, but looks reasonable.
-Daniel

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c     | 48 ++------------------------------
>  drivers/gpu/drm/ttm/ttm_module.c |  4 +++
>  drivers/gpu/drm/ttm/ttm_module.h |  6 ++--
>  3 files changed, 11 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 31e8b3da5563..cd55e3104e50 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -44,8 +44,6 @@
>  
>  #include "ttm_module.h"
>  
> -static void ttm_bo_global_kobj_release(struct kobject *kobj);
> -
>  /*
>   * ttm_global_mutex - protecting the global BO state
>   */
> @@ -54,11 +52,6 @@ unsigned ttm_bo_glob_use_count;
>  struct ttm_bo_global ttm_bo_glob;
>  EXPORT_SYMBOL(ttm_bo_glob);
>  
> -static struct attribute ttm_bo_count = {
> -	.name = "bo_count",
> -	.mode = S_IRUGO
> -};
> -
>  /* default destructor */
>  static void ttm_bo_default_destroy(struct ttm_buffer_object *bo)
>  {
> @@ -84,32 +77,6 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
>  	}
>  }
>  
> -static ssize_t ttm_bo_global_show(struct kobject *kobj,
> -				  struct attribute *attr,
> -				  char *buffer)
> -{
> -	struct ttm_bo_global *glob =
> -		container_of(kobj, struct ttm_bo_global, kobj);
> -
> -	return snprintf(buffer, PAGE_SIZE, "%d\n",
> -				atomic_read(&glob->bo_count));
> -}
> -
> -static struct attribute *ttm_bo_global_attrs[] = {
> -	&ttm_bo_count,
> -	NULL
> -};
> -
> -static const struct sysfs_ops ttm_bo_global_ops = {
> -	.show = &ttm_bo_global_show
> -};
> -
> -static struct kobj_type ttm_bo_glob_kobj_type  = {
> -	.release = &ttm_bo_global_kobj_release,
> -	.sysfs_ops = &ttm_bo_global_ops,
> -	.default_attrs = ttm_bo_global_attrs
> -};
> -
>  static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
>  {
>  	struct ttm_bo_device *bdev = bo->bdev;
> @@ -1226,14 +1193,6 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev,
>  }
>  EXPORT_SYMBOL(ttm_bo_dma_acc_size);
>  
> -static void ttm_bo_global_kobj_release(struct kobject *kobj)
> -{
> -	struct ttm_bo_global *glob =
> -		container_of(kobj, struct ttm_bo_global, kobj);
> -
> -	__free_page(glob->dummy_read_page);
> -}
> -
>  static void ttm_bo_global_release(void)
>  {
>  	struct ttm_bo_global *glob = &ttm_bo_glob;
> @@ -1245,6 +1204,7 @@ static void ttm_bo_global_release(void)
>  	kobject_del(&glob->kobj);
>  	kobject_put(&glob->kobj);
>  	ttm_mem_global_release(&ttm_mem_glob);
> +	__free_page(glob->dummy_read_page);
>  	memset(glob, 0, sizeof(*glob));
>  out:
>  	mutex_unlock(&ttm_global_mutex);
> @@ -1277,10 +1237,8 @@ static int ttm_bo_global_init(void)
>  	INIT_LIST_HEAD(&glob->device_list);
>  	atomic_set(&glob->bo_count, 0);
>  
> -	ret = kobject_init_and_add(
> -		&glob->kobj, &ttm_bo_glob_kobj_type, ttm_get_kobj(), "buffer_objects");
> -	if (unlikely(ret != 0))
> -		kobject_put(&glob->kobj);
> +	debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
> +				&glob->bo_count);
>  out:
>  	mutex_unlock(&ttm_global_mutex);
>  	return ret;
> diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
> index c0906437cb1c..f6566603a60f 100644
> --- a/drivers/gpu/drm/ttm/ttm_module.c
> +++ b/drivers/gpu/drm/ttm/ttm_module.c
> @@ -32,12 +32,14 @@
>  #include <linux/module.h>
>  #include <linux/device.h>
>  #include <linux/sched.h>
> +#include <linux/debugfs.h>
>  #include <drm/drm_sysfs.h>
>  
>  #include "ttm_module.h"
>  
>  static DECLARE_WAIT_QUEUE_HEAD(exit_q);
>  static atomic_t device_released;
> +struct dentry *ttm_debugfs_root;
>  
>  static struct device_type ttm_drm_class_type = {
>  	.name = "ttm",
> @@ -77,6 +79,7 @@ static int __init ttm_init(void)
>  	if (unlikely(ret != 0))
>  		goto out_no_dev_reg;
>  
> +	ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
>  	return 0;
>  out_no_dev_reg:
>  	atomic_set(&device_released, 1);
> @@ -94,6 +97,7 @@ static void __exit ttm_exit(void)
>  	 */
>  
>  	wait_event(exit_q, atomic_read(&device_released) == 1);
> +	debugfs_remove(ttm_debugfs_root);
>  }
>  
>  module_init(ttm_init);
> diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h
> index 45fa318c1585..2f03c2fcf570 100644
> --- a/drivers/gpu/drm/ttm/ttm_module.h
> +++ b/drivers/gpu/drm/ttm/ttm_module.h
> @@ -31,10 +31,12 @@
>  #ifndef _TTM_MODULE_H_
>  #define _TTM_MODULE_H_
>  
> -#include <linux/kernel.h>
> +#define TTM_PFX "[TTM] "
> +
>  struct kobject;
> +struct dentry;
>  
> -#define TTM_PFX "[TTM] "
>  extern struct kobject *ttm_get_kobj(void);
> +extern struct dentry *ttm_debugfs_root;
>  
>  #endif /* _TTM_MODULE_H_ */
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/4] drm/ttm: add a debugfs file for the global page pools
  2020-12-18 17:55 ` [PATCH 2/4] drm/ttm: add a debugfs file for the global page pools Christian König
@ 2020-12-22 13:36   ` Daniel Vetter
  0 siblings, 0 replies; 13+ messages in thread
From: Daniel Vetter @ 2020-12-22 13:36 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, dri-devel

On Fri, Dec 18, 2020 at 06:55:36PM +0100, Christian König wrote:
> Instead of printing this on the per device pool.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Hm shouldn't we remove this from radeon/amdgpu too and drop the
EXPORT_SYMBOL for ttm_pool_debugfs? I'm kinda confused why we still have
both ...
-Daniel


> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 70 ++++++++++++++++++++++++----------
>  1 file changed, 50 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 7b2f60616750..1d61e8fc0e81 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -42,6 +42,8 @@
>  #include <drm/ttm/ttm_bo_driver.h>
>  #include <drm/ttm/ttm_tt.h>
>  
> +#include "ttm_module.h"
> +
>  /**
>   * struct ttm_pool_dma - Helper object for coherent DMA mappings
>   *
> @@ -543,6 +545,17 @@ static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
>  	return count;
>  }
>  
> +/* Print a nice header for the order */
> +static void ttm_pool_debugfs_header(struct seq_file *m)
> +{
> +	unsigned int i;
> +
> +	seq_puts(m, "\t ");
> +	for (i = 0; i < MAX_ORDER; ++i)
> +		seq_printf(m, " ---%2u---", i);
> +	seq_puts(m, "\n");
> +}
> +
>  /* Dump information about the different pool types */
>  static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>  				    struct seq_file *m)
> @@ -554,6 +567,35 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>  	seq_puts(m, "\n");
>  }
>  
> +/* Dump the total amount of allocated pages */
> +static void ttm_pool_debugfs_footer(struct seq_file *m)
> +{
> +	seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
> +		   atomic_long_read(&allocated_pages), page_pool_size);
> +}
> +
> +/* Dump the information for the global pools */
> +static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
> +{
> +	ttm_pool_debugfs_header(m);
> +
> +	spin_lock(&shrinker_lock);
> +	seq_puts(m, "wc\t:");
> +	ttm_pool_debugfs_orders(global_write_combined, m);
> +	seq_puts(m, "uc\t:");
> +	ttm_pool_debugfs_orders(global_uncached, m);
> +	seq_puts(m, "wc 32\t:");
> +	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
> +	seq_puts(m, "uc 32\t:");
> +	ttm_pool_debugfs_orders(global_dma32_uncached, m);
> +	spin_unlock(&shrinker_lock);
> +
> +	ttm_pool_debugfs_footer(m);
> +
> +	return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_globals);
> +
>  /**
>   * ttm_pool_debugfs - Debugfs dump function for a pool
>   *
> @@ -566,23 +608,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>  {
>  	unsigned int i;
>  
> -	spin_lock(&shrinker_lock);
> -
> -	seq_puts(m, "\t ");
> -	for (i = 0; i < MAX_ORDER; ++i)
> -		seq_printf(m, " ---%2u---", i);
> -	seq_puts(m, "\n");
> -
> -	seq_puts(m, "wc\t:");
> -	ttm_pool_debugfs_orders(global_write_combined, m);
> -	seq_puts(m, "uc\t:");
> -	ttm_pool_debugfs_orders(global_uncached, m);
> -
> -	seq_puts(m, "wc 32\t:");
> -	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
> -	seq_puts(m, "uc 32\t:");
> -	ttm_pool_debugfs_orders(global_dma32_uncached, m);
> +	ttm_pool_debugfs_header(m);
>  
> +	spin_lock(&shrinker_lock);
>  	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>  		seq_puts(m, "DMA ");
>  		switch (i) {
> @@ -598,12 +626,9 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>  		}
>  		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
>  	}
> -
> -	seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
> -		   atomic_long_read(&allocated_pages), page_pool_size);
> -
>  	spin_unlock(&shrinker_lock);
>  
> +	ttm_pool_debugfs_footer(m);
>  	return 0;
>  }
>  EXPORT_SYMBOL(ttm_pool_debugfs);
> @@ -660,6 +685,11 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>  				   ttm_uncached, i);
>  	}
>  
> +#ifdef CONFIG_DEBUG_FS
> +	debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
> +			    &ttm_pool_debugfs_globals_fops);
> +#endif
> +
>  	mm_shrinker.count_objects = ttm_pool_shrinker_count;
>  	mm_shrinker.scan_objects = ttm_pool_shrinker_scan;
>  	mm_shrinker.seeks = 1;
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] drm/ttm: add debugfs entry to test pool shrinker
  2020-12-18 17:55 ` [PATCH 3/4] drm/ttm: add debugfs entry to test pool shrinker Christian König
@ 2020-12-22 13:43   ` Daniel Vetter
  0 siblings, 0 replies; 13+ messages in thread
From: Daniel Vetter @ 2020-12-22 13:43 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, dri-devel

On Fri, Dec 18, 2020 at 06:55:37PM +0100, Christian König wrote:
> Useful for testing.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 50 ++++++++++++++++++++++------------
>  1 file changed, 32 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 1d61e8fc0e81..1cdacd58753a 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -529,6 +529,28 @@ void ttm_pool_fini(struct ttm_pool *pool)
>  }
>  EXPORT_SYMBOL(ttm_pool_fini);
>  
> +/* As long as pages are available make sure to release at least one */
> +static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
> +					    struct shrink_control *sc)
> +{
> +	unsigned long num_freed = 0;
> +
> +	do
> +		num_freed += ttm_pool_shrink();
> +	while (!num_freed && atomic_long_read(&allocated_pages));
> +
> +	return num_freed;
> +}
> +
> +/* Return the number of pages available or SHRINK_EMPTY if we have none */
> +static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
> +					     struct shrink_control *sc)
> +{
> +	unsigned long num_pages = atomic_long_read(&allocated_pages);
> +
> +	return num_pages ? num_pages : SHRINK_EMPTY;
> +}
> +
>  #ifdef CONFIG_DEBUG_FS
>  /* Count the number of pages available in a pool_type */
>  static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
> @@ -633,29 +655,19 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>  }
>  EXPORT_SYMBOL(ttm_pool_debugfs);
>  
> -#endif
> -
> -/* As long as pages are available make sure to release at least one */
> -static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
> -					    struct shrink_control *sc)
> +/* Test the shrinker functions and dump the result */
> +static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data)
>  {
> -	unsigned long num_freed = 0;
> +	struct shrink_control sc = { .gfp_mask = GFP_KERNEL };
>  
> -	do
> -		num_freed += ttm_pool_shrink();
> -	while (!num_freed && atomic_long_read(&allocated_pages));

Need to wrap this in fs_reclaim_acquire/release(), otherwise lockdep won't
connect the dots and the dedicated testing interface here isn't very
useful.

Also this is a bit a confusing interface, I'd expect the show function to
do the counting, and then the write function would try to write shrink as
many objects as requests with the _scan function.

Both should be annotated with fs_reclaim_acquire/release.

> +	seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(&mm_shrinker, &sc),
> +		   ttm_pool_shrinker_scan(&mm_shrinker, &sc));
>  
> -	return num_freed;
> +	return 0;
>  }
> +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
>  
> -/* Return the number of pages available or SHRINK_EMPTY if we have none */
> -static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
> -					     struct shrink_control *sc)
> -{
> -	unsigned long num_pages = atomic_long_read(&allocated_pages);
> -
> -	return num_pages ? num_pages : SHRINK_EMPTY;
> -}
> +#endif
>  
>  /**
>   * ttm_pool_mgr_init - Initialize globals
> @@ -688,6 +700,8 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>  #ifdef CONFIG_DEBUG_FS
>  	debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
>  			    &ttm_pool_debugfs_globals_fops);
> +	debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
> +			    &ttm_pool_debugfs_shrink_fops);

I think an interface per pool would make more sense for dedicated testing.
I'm kinda wondering whether we shouldn't do this in mm/ for all shrinkers
by default, it's really quite neat for being able to test stuff.
-Daniel

>  #endif
>  
>  	mm_shrinker.count_objects = ttm_pool_shrinker_count;
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit
  2020-12-18 17:55 ` [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit Christian König
@ 2020-12-22 13:51   ` Daniel Vetter
  2021-01-07 12:49     ` Christian König
  0 siblings, 1 reply; 13+ messages in thread
From: Daniel Vetter @ 2020-12-22 13:51 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, dri-devel

On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
> Only initialize the DMA coherent pools if they are used.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Ah, just realized the answer to my question on patch 2: The pools are
per-device, due to dma_alloc_coherent being per-device (but really mostly
it isn't, but that's what we have to deal with fighting the dma-api
abstraction).

I think this would make a lot more sense if the shrinkers are per-pool
(and also most of the debugfs files), since as-is in a multi-gpu system
the first gpu's pool gets preferrentially thrashed. Which isn't a nice
design. Splitting that into per gpu shrinkers means we get equal shrinking
without having to maintain a global lru. This is how xfs seems to set up
their shrinkers, and in general xfs people have a solid understanding of
this stuff.

Aside: I think it also would make tons of sense to split up your new ttm
bo shrinker up into a per-device lru, and throw the global system memory
lru out the window completely :-) Assuming we can indeed get rid of it,
and vmwgfx doesn't need it somewhere still.

Aside from this lgtm, but I guess will change a bit with that shuffling.
-Daniel

> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++-------
>  1 file changed, 16 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 1cdacd58753a..f09e34614226 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>  	pool->use_dma_alloc = use_dma_alloc;
>  	pool->use_dma32 = use_dma32;
>  
> -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -		for (j = 0; j < MAX_ORDER; ++j)
> -			ttm_pool_type_init(&pool->caching[i].orders[j],
> -					   pool, i, j);
> +	if (use_dma_alloc) {
> +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> +			for (j = 0; j < MAX_ORDER; ++j)
> +				ttm_pool_type_init(&pool->caching[i].orders[j],
> +						   pool, i, j);
> +	}
>  }
>  EXPORT_SYMBOL(ttm_pool_init);
>  
> @@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool)
>  {
>  	unsigned int i, j;
>  
> -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -		for (j = 0; j < MAX_ORDER; ++j)
> -			ttm_pool_type_fini(&pool->caching[i].orders[j]);
> +	if (pool->use_dma_alloc) {
> +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> +			for (j = 0; j < MAX_ORDER; ++j)
> +				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> +	}
>  }
>  EXPORT_SYMBOL(ttm_pool_fini);
>  
> @@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>  {
>  	unsigned int i;
>  
> +	if (!pool->use_dma_alloc) {
> +		seq_puts(m, "unused\n");
> +		return 0;
> +	}
> +
>  	ttm_pool_debugfs_header(m);
>  
>  	spin_lock(&shrinker_lock);
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit
  2020-12-22 13:51   ` Daniel Vetter
@ 2021-01-07 12:49     ` Christian König
  2021-01-07 16:38       ` Daniel Vetter
  0 siblings, 1 reply; 13+ messages in thread
From: Christian König @ 2021-01-07 12:49 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: ray.huang, dri-devel

Am 22.12.20 um 14:51 schrieb Daniel Vetter:
> On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
>> Only initialize the DMA coherent pools if they are used.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Ah, just realized the answer to my question on patch 2: The pools are
> per-device, due to dma_alloc_coherent being per-device (but really mostly
> it isn't, but that's what we have to deal with fighting the dma-api
> abstraction).
>
> I think this would make a lot more sense if the shrinkers are per-pool
> (and also most of the debugfs files), since as-is in a multi-gpu system
> the first gpu's pool gets preferrentially thrashed. Which isn't a nice
> design. Splitting that into per gpu shrinkers means we get equal shrinking
> without having to maintain a global lru. This is how xfs seems to set up
> their shrinkers, and in general xfs people have a solid understanding of
> this stuff.

Well fairness and not trashing the first GPUs pool is the reason why I 
implemented just one shrinker plus a global LRU.

In other words shrink_slab() just uses list_for_each_entry() on all 
shrinkers.

In the pool shrinker callback shrink one pool and move it to the end of 
the shrinker list.

>
> Aside: I think it also would make tons of sense to split up your new ttm
> bo shrinker up into a per-device lru, and throw the global system memory
> lru out the window completely :-) Assuming we can indeed get rid of it,
> and vmwgfx doesn't need it somewhere still.

Yeah, I already have that as a patch set here, but I have this dependent 
on a larger rename of the device structures.

> Aside from this lgtm, but I guess will change a bit with that shuffling.

Thanks for the review, going to send out a new version with the 
fs_reclaim_acquire/release added in a minute.

Christian.

> -Daniel
>
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++-------
>>   1 file changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
>> index 1cdacd58753a..f09e34614226 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>>   	pool->use_dma_alloc = use_dma_alloc;
>>   	pool->use_dma32 = use_dma32;
>>   
>> -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> -		for (j = 0; j < MAX_ORDER; ++j)
>> -			ttm_pool_type_init(&pool->caching[i].orders[j],
>> -					   pool, i, j);
>> +	if (use_dma_alloc) {
>> +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> +			for (j = 0; j < MAX_ORDER; ++j)
>> +				ttm_pool_type_init(&pool->caching[i].orders[j],
>> +						   pool, i, j);
>> +	}
>>   }
>>   EXPORT_SYMBOL(ttm_pool_init);
>>   
>> @@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>   {
>>   	unsigned int i, j;
>>   
>> -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> -		for (j = 0; j < MAX_ORDER; ++j)
>> -			ttm_pool_type_fini(&pool->caching[i].orders[j]);
>> +	if (pool->use_dma_alloc) {
>> +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> +			for (j = 0; j < MAX_ORDER; ++j)
>> +				ttm_pool_type_fini(&pool->caching[i].orders[j]);
>> +	}
>>   }
>>   EXPORT_SYMBOL(ttm_pool_fini);
>>   
>> @@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>>   {
>>   	unsigned int i;
>>   
>> +	if (!pool->use_dma_alloc) {
>> +		seq_puts(m, "unused\n");
>> +		return 0;
>> +	}
>> +
>>   	ttm_pool_debugfs_header(m);
>>   
>>   	spin_lock(&shrinker_lock);
>> -- 
>> 2.25.1
>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit
  2021-01-07 12:49     ` Christian König
@ 2021-01-07 16:38       ` Daniel Vetter
  2021-01-19 12:11         ` Christian König
  0 siblings, 1 reply; 13+ messages in thread
From: Daniel Vetter @ 2021-01-07 16:38 UTC (permalink / raw)
  To: christian.koenig; +Cc: ray.huang, dri-devel

On Thu, Jan 07, 2021 at 01:49:45PM +0100, Christian König wrote:
> Am 22.12.20 um 14:51 schrieb Daniel Vetter:
> > On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
> > > Only initialize the DMA coherent pools if they are used.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Ah, just realized the answer to my question on patch 2: The pools are
> > per-device, due to dma_alloc_coherent being per-device (but really mostly
> > it isn't, but that's what we have to deal with fighting the dma-api
> > abstraction).
> > 
> > I think this would make a lot more sense if the shrinkers are per-pool
> > (and also most of the debugfs files), since as-is in a multi-gpu system
> > the first gpu's pool gets preferrentially thrashed. Which isn't a nice
> > design. Splitting that into per gpu shrinkers means we get equal shrinking
> > without having to maintain a global lru. This is how xfs seems to set up
> > their shrinkers, and in general xfs people have a solid understanding of
> > this stuff.
> 
> Well fairness and not trashing the first GPUs pool is the reason why I
> implemented just one shrinker plus a global LRU.

That's kinda defeating the point of how the core mm works. At least of how
I'm understanding how it works. Imo we shouldn't try to re-implement this
kind of balancing across different pools in our callback, since core mm
tries pretty hard to equally shrink already (it only shrinks each shrinker
a little bit each round, but does a lot of rounds under memory pressure).

Also maintaining your own global lru means global locking for the usual
case of none-to-little memory contention, unecessarily wasting the fast
path.

> In other words shrink_slab() just uses list_for_each_entry() on all
> shrinkers.
> 
> In the pool shrinker callback shrink one pool and move it to the end of the
> shrinker list.
> 
> > 
> > Aside: I think it also would make tons of sense to split up your new ttm
> > bo shrinker up into a per-device lru, and throw the global system memory
> > lru out the window completely :-) Assuming we can indeed get rid of it,
> > and vmwgfx doesn't need it somewhere still.
> 
> Yeah, I already have that as a patch set here, but I have this dependent on
> a larger rename of the device structures.

Hm maybe include that in the next round, just for the bigger picture?
Don't have to merge it all in one go, just want to make sure we agree on
where we're going.

> > Aside from this lgtm, but I guess will change a bit with that shuffling.
> 
> Thanks for the review, going to send out a new version with the
> fs_reclaim_acquire/release added in a minute.

Cool.

Cheers, Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++-------
> > >   1 file changed, 16 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > > index 1cdacd58753a..f09e34614226 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> > >   	pool->use_dma_alloc = use_dma_alloc;
> > >   	pool->use_dma32 = use_dma32;
> > > -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > -		for (j = 0; j < MAX_ORDER; ++j)
> > > -			ttm_pool_type_init(&pool->caching[i].orders[j],
> > > -					   pool, i, j);
> > > +	if (use_dma_alloc) {
> > > +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > +			for (j = 0; j < MAX_ORDER; ++j)
> > > +				ttm_pool_type_init(&pool->caching[i].orders[j],
> > > +						   pool, i, j);
> > > +	}
> > >   }
> > >   EXPORT_SYMBOL(ttm_pool_init);
> > > @@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool)
> > >   {
> > >   	unsigned int i, j;
> > > -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > -		for (j = 0; j < MAX_ORDER; ++j)
> > > -			ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > > +	if (pool->use_dma_alloc) {
> > > +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > +			for (j = 0; j < MAX_ORDER; ++j)
> > > +				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > > +	}
> > >   }
> > >   EXPORT_SYMBOL(ttm_pool_fini);
> > > @@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
> > >   {
> > >   	unsigned int i;
> > > +	if (!pool->use_dma_alloc) {
> > > +		seq_puts(m, "unused\n");
> > > +		return 0;
> > > +	}
> > > +
> > >   	ttm_pool_debugfs_header(m);
> > >   	spin_lock(&shrinker_lock);
> > > -- 
> > > 2.25.1
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit
  2021-01-07 16:38       ` Daniel Vetter
@ 2021-01-19 12:11         ` Christian König
  2021-01-19 13:22           ` Daniel Vetter
  0 siblings, 1 reply; 13+ messages in thread
From: Christian König @ 2021-01-19 12:11 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: ray.huang, dri-devel

Am 07.01.21 um 17:38 schrieb Daniel Vetter:
> On Thu, Jan 07, 2021 at 01:49:45PM +0100, Christian König wrote:
>> Am 22.12.20 um 14:51 schrieb Daniel Vetter:
>>> On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
>>>> Only initialize the DMA coherent pools if they are used.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Ah, just realized the answer to my question on patch 2: The pools are
>>> per-device, due to dma_alloc_coherent being per-device (but really mostly
>>> it isn't, but that's what we have to deal with fighting the dma-api
>>> abstraction).
>>>
>>> I think this would make a lot more sense if the shrinkers are per-pool
>>> (and also most of the debugfs files), since as-is in a multi-gpu system
>>> the first gpu's pool gets preferrentially thrashed. Which isn't a nice
>>> design. Splitting that into per gpu shrinkers means we get equal shrinking
>>> without having to maintain a global lru. This is how xfs seems to set up
>>> their shrinkers, and in general xfs people have a solid understanding of
>>> this stuff.
>> Well fairness and not trashing the first GPUs pool is the reason why I
>> implemented just one shrinker plus a global LRU.
> That's kinda defeating the point of how the core mm works. At least of how
> I'm understanding how it works. Imo we shouldn't try to re-implement this
> kind of balancing across different pools in our callback, since core mm
> tries pretty hard to equally shrink already (it only shrinks each shrinker
> a little bit each round, but does a lot of rounds under memory pressure).

Correct, see the problem is that we don't want to shrink from each pool 
on each round.

E.g. we have something like 48 global pools and 36 for each device which 
needs a DMA coherent pool.

On each round we want to shrink only one cached item from one pool and 
not 48.

> Also maintaining your own global lru means global locking for the usual
> case of none-to-little memory contention, unecessarily wasting the fast
> path.

No, the fast path doesn't need to take the global LRU lock.

I've optimized this quite a bit by looking into the pools only once for 
each power of two.

>> In other words shrink_slab() just uses list_for_each_entry() on all
>> shrinkers.
>>
>> In the pool shrinker callback shrink one pool and move it to the end of the
>> shrinker list.
>>
>>> Aside: I think it also would make tons of sense to split up your new ttm
>>> bo shrinker up into a per-device lru, and throw the global system memory
>>> lru out the window completely :-) Assuming we can indeed get rid of it,
>>> and vmwgfx doesn't need it somewhere still.
>> Yeah, I already have that as a patch set here, but I have this dependent on
>> a larger rename of the device structures.
> Hm maybe include that in the next round, just for the bigger picture?
> Don't have to merge it all in one go, just want to make sure we agree on
> where we're going.

I need to clean this set up quite a bit. Let's push this one here 
upstream first.

>>> Aside from this lgtm, but I guess will change a bit with that shuffling.
>> Thanks for the review, going to send out a new version with the
>> fs_reclaim_acquire/release added in a minute.
> Cool.
>
> Cheers, Daniel

Got distracted by bug fixes in the last two weeks, but really going to 
send that out now :)

Christian.

>
>> Christian.
>>
>>> -Daniel
>>>
>>>> ---
>>>>    drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++-------
>>>>    1 file changed, 16 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
>>>> index 1cdacd58753a..f09e34614226 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>> @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>>>>    	pool->use_dma_alloc = use_dma_alloc;
>>>>    	pool->use_dma32 = use_dma32;
>>>> -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>> -		for (j = 0; j < MAX_ORDER; ++j)
>>>> -			ttm_pool_type_init(&pool->caching[i].orders[j],
>>>> -					   pool, i, j);
>>>> +	if (use_dma_alloc) {
>>>> +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>> +			for (j = 0; j < MAX_ORDER; ++j)
>>>> +				ttm_pool_type_init(&pool->caching[i].orders[j],
>>>> +						   pool, i, j);
>>>> +	}
>>>>    }
>>>>    EXPORT_SYMBOL(ttm_pool_init);
>>>> @@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>>>    {
>>>>    	unsigned int i, j;
>>>> -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>> -		for (j = 0; j < MAX_ORDER; ++j)
>>>> -			ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>>> +	if (pool->use_dma_alloc) {
>>>> +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>> +			for (j = 0; j < MAX_ORDER; ++j)
>>>> +				ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>>> +	}
>>>>    }
>>>>    EXPORT_SYMBOL(ttm_pool_fini);
>>>> @@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>>>>    {
>>>>    	unsigned int i;
>>>> +	if (!pool->use_dma_alloc) {
>>>> +		seq_puts(m, "unused\n");
>>>> +		return 0;
>>>> +	}
>>>> +
>>>>    	ttm_pool_debugfs_header(m);
>>>>    	spin_lock(&shrinker_lock);
>>>> -- 
>>>> 2.25.1
>>>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit
  2021-01-19 12:11         ` Christian König
@ 2021-01-19 13:22           ` Daniel Vetter
  0 siblings, 0 replies; 13+ messages in thread
From: Daniel Vetter @ 2021-01-19 13:22 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, dri-devel

On Tue, Jan 19, 2021 at 01:11:36PM +0100, Christian König wrote:
> Am 07.01.21 um 17:38 schrieb Daniel Vetter:
> > On Thu, Jan 07, 2021 at 01:49:45PM +0100, Christian König wrote:
> > > Am 22.12.20 um 14:51 schrieb Daniel Vetter:
> > > > On Fri, Dec 18, 2020 at 06:55:38PM +0100, Christian König wrote:
> > > > > Only initialize the DMA coherent pools if they are used.
> > > > > 
> > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > Ah, just realized the answer to my question on patch 2: The pools are
> > > > per-device, due to dma_alloc_coherent being per-device (but really mostly
> > > > it isn't, but that's what we have to deal with fighting the dma-api
> > > > abstraction).
> > > > 
> > > > I think this would make a lot more sense if the shrinkers are per-pool
> > > > (and also most of the debugfs files), since as-is in a multi-gpu system
> > > > the first gpu's pool gets preferrentially thrashed. Which isn't a nice
> > > > design. Splitting that into per gpu shrinkers means we get equal shrinking
> > > > without having to maintain a global lru. This is how xfs seems to set up
> > > > their shrinkers, and in general xfs people have a solid understanding of
> > > > this stuff.
> > > Well fairness and not trashing the first GPUs pool is the reason why I
> > > implemented just one shrinker plus a global LRU.
> > That's kinda defeating the point of how the core mm works. At least of how
> > I'm understanding how it works. Imo we shouldn't try to re-implement this
> > kind of balancing across different pools in our callback, since core mm
> > tries pretty hard to equally shrink already (it only shrinks each shrinker
> > a little bit each round, but does a lot of rounds under memory pressure).
> 
> Correct, see the problem is that we don't want to shrink from each pool on
> each round.
> 
> E.g. we have something like 48 global pools and 36 for each device which
> needs a DMA coherent pool.
> 
> On each round we want to shrink only one cached item from one pool and not
> 48.

Hm if the pool is that small, then this feels like we're caching at the
wrong level, and probably we should cache at the dma-api level. Or well,
below that even.

Either way that kind of design stuff should be captured in an overview
DOC: kerneldoc imo.

Also the point of shrinkers is that they really should be all sized
equally, so if there's very little stuff in them, they shouldn't get
shrunk on first round.

Otoh if they're huge, they will be shrunk big time. So aside from fringe
effects of rounding slightly different since it's all integers, did you
actually measure a benefit here? Or is this more conjecture about how you
think shrinkers work or don't work?

> > Also maintaining your own global lru means global locking for the usual
> > case of none-to-little memory contention, unecessarily wasting the fast
> > path.
> 
> No, the fast path doesn't need to take the global LRU lock.
> 
> I've optimized this quite a bit by looking into the pools only once for each
> power of two.
> 
> > > In other words shrink_slab() just uses list_for_each_entry() on all
> > > shrinkers.
> > > 
> > > In the pool shrinker callback shrink one pool and move it to the end of the
> > > shrinker list.
> > > 
> > > > Aside: I think it also would make tons of sense to split up your new ttm
> > > > bo shrinker up into a per-device lru, and throw the global system memory
> > > > lru out the window completely :-) Assuming we can indeed get rid of it,
> > > > and vmwgfx doesn't need it somewhere still.
> > > Yeah, I already have that as a patch set here, but I have this dependent on
> > > a larger rename of the device structures.
> > Hm maybe include that in the next round, just for the bigger picture?
> > Don't have to merge it all in one go, just want to make sure we agree on
> > where we're going.
> 
> I need to clean this set up quite a bit. Let's push this one here upstream
> first.

Hm yeah I guess we need to get somewhere first, but this feels a bit
murky. I'll try and review more details for the next round at least.
-Daniel

> 
> > > > Aside from this lgtm, but I guess will change a bit with that shuffling.
> > > Thanks for the review, going to send out a new version with the
> > > fs_reclaim_acquire/release added in a minute.
> > Cool.
> > 
> > Cheers, Daniel
> 
> Got distracted by bug fixes in the last two weeks, but really going to send
> that out now :)
> 
> Christian.
> 
> > 
> > > Christian.
> > > 
> > > > -Daniel
> > > > 
> > > > > ---
> > > > >    drivers/gpu/drm/ttm/ttm_pool.c | 23 ++++++++++++++++-------
> > > > >    1 file changed, 16 insertions(+), 7 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > > > > index 1cdacd58753a..f09e34614226 100644
> > > > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > > > @@ -504,10 +504,12 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> > > > >    	pool->use_dma_alloc = use_dma_alloc;
> > > > >    	pool->use_dma32 = use_dma32;
> > > > > -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > > > -		for (j = 0; j < MAX_ORDER; ++j)
> > > > > -			ttm_pool_type_init(&pool->caching[i].orders[j],
> > > > > -					   pool, i, j);
> > > > > +	if (use_dma_alloc) {
> > > > > +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > > > +			for (j = 0; j < MAX_ORDER; ++j)
> > > > > +				ttm_pool_type_init(&pool->caching[i].orders[j],
> > > > > +						   pool, i, j);
> > > > > +	}
> > > > >    }
> > > > >    EXPORT_SYMBOL(ttm_pool_init);
> > > > > @@ -523,9 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool)
> > > > >    {
> > > > >    	unsigned int i, j;
> > > > > -	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > > > -		for (j = 0; j < MAX_ORDER; ++j)
> > > > > -			ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > > > > +	if (pool->use_dma_alloc) {
> > > > > +		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > > > +			for (j = 0; j < MAX_ORDER; ++j)
> > > > > +				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > > > > +	}
> > > > >    }
> > > > >    EXPORT_SYMBOL(ttm_pool_fini);
> > > > > @@ -630,6 +634,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
> > > > >    {
> > > > >    	unsigned int i;
> > > > > +	if (!pool->use_dma_alloc) {
> > > > > +		seq_puts(m, "unused\n");
> > > > > +		return 0;
> > > > > +	}
> > > > > +
> > > > >    	ttm_pool_debugfs_header(m);
> > > > >    	spin_lock(&shrinker_lock);
> > > > > -- 
> > > > > 2.25.1
> > > > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/4] drm/ttm: add debugfs directory v2
@ 2021-01-19 12:18 Christian König
  0 siblings, 0 replies; 13+ messages in thread
From: Christian König @ 2021-01-19 12:18 UTC (permalink / raw)
  To: ray.huang, daniel, dri-devel

As far as I can tell the buffer_count was never used by an
userspace application.

The number of BOs in the system is far better suited in
debugfs than sysfs and we now should be able to add other
information here as well.

v2: add that additionally to sysfs

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/ttm/ttm_bo.c     | 48 ++------------------------------
 drivers/gpu/drm/ttm/ttm_module.c |  4 +++
 drivers/gpu/drm/ttm/ttm_module.h |  6 ++--
 3 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b65f4b12f986..c289a6a37ff9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -44,8 +44,6 @@
 
 #include "ttm_module.h"
 
-static void ttm_bo_global_kobj_release(struct kobject *kobj);
-
 /*
  * ttm_global_mutex - protecting the global BO state
  */
@@ -54,11 +52,6 @@ unsigned ttm_bo_glob_use_count;
 struct ttm_bo_global ttm_bo_glob;
 EXPORT_SYMBOL(ttm_bo_glob);
 
-static struct attribute ttm_bo_count = {
-	.name = "bo_count",
-	.mode = S_IRUGO
-};
-
 /* default destructor */
 static void ttm_bo_default_destroy(struct ttm_buffer_object *bo)
 {
@@ -84,32 +77,6 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
 	}
 }
 
-static ssize_t ttm_bo_global_show(struct kobject *kobj,
-				  struct attribute *attr,
-				  char *buffer)
-{
-	struct ttm_bo_global *glob =
-		container_of(kobj, struct ttm_bo_global, kobj);
-
-	return snprintf(buffer, PAGE_SIZE, "%d\n",
-				atomic_read(&glob->bo_count));
-}
-
-static struct attribute *ttm_bo_global_attrs[] = {
-	&ttm_bo_count,
-	NULL
-};
-
-static const struct sysfs_ops ttm_bo_global_ops = {
-	.show = &ttm_bo_global_show
-};
-
-static struct kobj_type ttm_bo_glob_kobj_type  = {
-	.release = &ttm_bo_global_kobj_release,
-	.sysfs_ops = &ttm_bo_global_ops,
-	.default_attrs = ttm_bo_global_attrs
-};
-
 static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -1228,14 +1195,6 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev,
 }
 EXPORT_SYMBOL(ttm_bo_dma_acc_size);
 
-static void ttm_bo_global_kobj_release(struct kobject *kobj)
-{
-	struct ttm_bo_global *glob =
-		container_of(kobj, struct ttm_bo_global, kobj);
-
-	__free_page(glob->dummy_read_page);
-}
-
 static void ttm_bo_global_release(void)
 {
 	struct ttm_bo_global *glob = &ttm_bo_glob;
@@ -1247,6 +1206,7 @@ static void ttm_bo_global_release(void)
 	kobject_del(&glob->kobj);
 	kobject_put(&glob->kobj);
 	ttm_mem_global_release(&ttm_mem_glob);
+	__free_page(glob->dummy_read_page);
 	memset(glob, 0, sizeof(*glob));
 out:
 	mutex_unlock(&ttm_global_mutex);
@@ -1279,10 +1239,8 @@ static int ttm_bo_global_init(void)
 	INIT_LIST_HEAD(&glob->device_list);
 	atomic_set(&glob->bo_count, 0);
 
-	ret = kobject_init_and_add(
-		&glob->kobj, &ttm_bo_glob_kobj_type, ttm_get_kobj(), "buffer_objects");
-	if (unlikely(ret != 0))
-		kobject_put(&glob->kobj);
+	debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
+				&glob->bo_count);
 out:
 	mutex_unlock(&ttm_global_mutex);
 	return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index c0906437cb1c..f6566603a60f 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -32,12 +32,14 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/sched.h>
+#include <linux/debugfs.h>
 #include <drm/drm_sysfs.h>
 
 #include "ttm_module.h"
 
 static DECLARE_WAIT_QUEUE_HEAD(exit_q);
 static atomic_t device_released;
+struct dentry *ttm_debugfs_root;
 
 static struct device_type ttm_drm_class_type = {
 	.name = "ttm",
@@ -77,6 +79,7 @@ static int __init ttm_init(void)
 	if (unlikely(ret != 0))
 		goto out_no_dev_reg;
 
+	ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
 	return 0;
 out_no_dev_reg:
 	atomic_set(&device_released, 1);
@@ -94,6 +97,7 @@ static void __exit ttm_exit(void)
 	 */
 
 	wait_event(exit_q, atomic_read(&device_released) == 1);
+	debugfs_remove(ttm_debugfs_root);
 }
 
 module_init(ttm_init);
diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h
index 45fa318c1585..2f03c2fcf570 100644
--- a/drivers/gpu/drm/ttm/ttm_module.h
+++ b/drivers/gpu/drm/ttm/ttm_module.h
@@ -31,10 +31,12 @@
 #ifndef _TTM_MODULE_H_
 #define _TTM_MODULE_H_
 
-#include <linux/kernel.h>
+#define TTM_PFX "[TTM] "
+
 struct kobject;
+struct dentry;
 
-#define TTM_PFX "[TTM] "
 extern struct kobject *ttm_get_kobj(void);
+extern struct dentry *ttm_debugfs_root;
 
 #endif /* _TTM_MODULE_H_ */
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-01-19 13:22 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-18 17:55 [PATCH 1/4] drm/ttm: add debugfs directory v2 Christian König
2020-12-18 17:55 ` [PATCH 2/4] drm/ttm: add a debugfs file for the global page pools Christian König
2020-12-22 13:36   ` Daniel Vetter
2020-12-18 17:55 ` [PATCH 3/4] drm/ttm: add debugfs entry to test pool shrinker Christian König
2020-12-22 13:43   ` Daniel Vetter
2020-12-18 17:55 ` [PATCH 4/4] drm/ttm: optimize ttm pool shrinker a bit Christian König
2020-12-22 13:51   ` Daniel Vetter
2021-01-07 12:49     ` Christian König
2021-01-07 16:38       ` Daniel Vetter
2021-01-19 12:11         ` Christian König
2021-01-19 13:22           ` Daniel Vetter
2020-12-22 13:31 ` [PATCH 1/4] drm/ttm: add debugfs directory v2 Daniel Vetter
2021-01-19 12:18 Christian König

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).