All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-03-15 16:04 Christian König
  2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c        | 57 ++++++++---------------------
 drivers/gpu/drm/ttm/ttm_device.c    | 29 +++++++++++++++
 drivers/gpu/drm/ttm/ttm_tt.c        |  2 +-
 drivers/gpu/drm/vmwgfx/ttm_memory.c |  3 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 include/drm/ttm/ttm_bo_api.h        |  3 +-
 include/drm/ttm/ttm_device.h        |  2 +
 7 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a08dec7281fc..56d2e38af273 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1186,56 +1186,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_wait);
 
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags)
 {
 	struct ttm_global *glob = &ttm_glob;
-	struct ttm_buffer_object *bo;
-	int ret = -EBUSY;
 	bool locked;
-	unsigned i;
-
-	spin_lock(&glob->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
-							    NULL))
-				continue;
-
-			if (!ttm_bo_get_unless_zero(bo)) {
-				if (locked)
-					dma_resv_unlock(bo->base.resv);
-				continue;
-			}
+	int ret;
 
-			ret = 0;
-			break;
-		}
-		if (!ret)
-			break;
-	}
+	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+		return -EBUSY;
 
-	if (ret) {
-		spin_unlock(&glob->lru_lock);
-		return ret;
+	if (!ttm_bo_get_unless_zero(bo)) {
+		if (locked)
+			dma_resv_unlock(bo->base.resv);
+		return -EBUSY;
 	}
 
 	if (bo->deleted) {
-		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+		ttm_bo_cleanup_refs(bo, false, false, locked);
 		ttm_bo_put(bo);
-		return ret;
+		return 0;
 	}
 
 	ttm_bo_del_from_lru(bo);
+	/* TODO: Cleanup the locking */
 	spin_unlock(&glob->lru_lock);
 
-	/**
+	/*
 	 * Move to system cached
 	 */
-
 	if (bo->mem.mem_type != TTM_PL_SYSTEM) {
 		struct ttm_operation_ctx ctx = { false, false };
 		struct ttm_resource evict_mem;
@@ -1255,29 +1234,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 		}
 	}
 
-	/**
+	/*
 	 * Make sure BO is idle.
 	 */
-
 	ret = ttm_bo_wait(bo, false, false);
 	if (unlikely(ret != 0))
 		goto out;
 
 	ttm_bo_unmap_virtual(bo);
 
-	/**
+	/*
 	 * Swap out. Buffer will be swapped in again as soon as
 	 * anyone tries to access a ttm page.
 	 */
-
 	if (bo->bdev->funcs->swap_notify)
 		bo->bdev->funcs->swap_notify(bo);
 
 	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
 out:
 
-	/**
-	 *
+	/*
 	 * Unreserve without putting on LRU to avoid swapping out an
 	 * already swapped buffer.
 	 */
@@ -1286,7 +1262,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 	ttm_bo_put(bo);
 	return ret;
 }
-EXPORT_SYMBOL(ttm_bo_swapout);
 
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
 	return ret;
 }
 
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+	struct ttm_global *glob = &ttm_glob;
+	struct ttm_buffer_object *bo;
+	unsigned i;
+	int ret;
+
+	spin_lock(&glob->lru_lock);
+	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+			uint32_t num_pages = bo->ttm->num_pages;
+
+			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+			/* ttm_bo_swapout has dropped the lru_lock */
+			if (!ret)
+				return num_pages;
+			if (ret != -EBUSY)
+				return ret;
+		}
+	}
+	spin_unlock(&glob->lru_lock);
+	return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
 static void ttm_init_sysman(struct ttm_device *bdev)
 {
 	struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
 	};
 	int ret;
 
-	ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+	ret = ttm_global_swapout(&ctx, GFP_NOFS);
 	return ret < 0 ? SHRINK_EMPTY : ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
 
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
 
 #include "ttm_memory.h"
 
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
 
 	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
 		spin_unlock(&glob->lock);
-		ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+		ret = ttm_global_swapout(ctx, GFP_KERNEL);
 		spin_lock(&glob->lock);
 		if (unlikely(ret < 0))
 			break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6910111099c8..b991422e156c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
 	vmw_execbuf_release_pinned_bo(dev_priv);
 	vmw_resource_evict_all(dev_priv);
 	vmw_release_device_early(dev_priv);
-	while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
 	if (dev_priv->enable_fb)
 		vmw_fifo_resource_dec(dev_priv);
 	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
 		  const char __user *wbuf, char __user *rbuf,
 		  size_t count, loff_t *f_pos, bool write);
 
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags);
 
 /**
  * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
 	struct delayed_work wq;
 };
 
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
 static inline struct ttm_resource_manager *
 ttm_manager_type(struct ttm_device *bdev, int mem_type)
 {
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/3] drm/ttm: remove swap LRU v2
  2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-03-15 16:04 ` Christian König
  2021-03-15 18:54     ` kernel test robot
  2021-03-15 18:54   ` Matthew Auld
  2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

Instead evict round robin from each devices SYSTEM and TT domain.

v2: reorder num_pages access reported by Dan's script

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c        | 33 ++--------------
 drivers/gpu/drm/ttm/ttm_bo_util.c   |  1 -
 drivers/gpu/drm/ttm/ttm_device.c    | 60 +++++++++++++++++++++--------
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 include/drm/ttm/ttm_bo_api.h        |  1 -
 include/drm/ttm/ttm_bo_driver.h     |  1 -
 include/drm/ttm/ttm_device.h        |  7 +---
 7 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 56d2e38af273..a1be88be357b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
 {
 	struct ttm_device *bdev = bo->bdev;
 
-	list_del_init(&bo->swap);
 	list_del_init(&bo->lru);
 
 	if (bdev->funcs->del_from_lru_notify)
@@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 
 	man = ttm_manager_type(bdev, mem->mem_type);
 	list_move_tail(&bo->lru, &man->lru[bo->priority]);
-	if (man->use_tt && bo->ttm &&
-	    !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
-				     TTM_PAGE_FLAG_SWAPPED))) {
-		struct list_head *swap;
-
-		swap = &ttm_glob.swap_lru[bo->priority];
-		list_move_tail(&bo->swap, swap);
-	} else {
-		list_del_init(&bo->swap);
-	}
 
 	if (bdev->funcs->del_from_lru_notify)
 		bdev->funcs->del_from_lru_notify(bo);
@@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 			ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
 			break;
 		}
-		if (bo->ttm && !(bo->ttm->page_flags &
-				 (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
-			ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
 	}
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
@@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
 				    &pos->last->lru);
 	}
-
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
-		struct list_head *lru;
-
-		if (!pos->first)
-			continue;
-
-		dma_resv_assert_held(pos->first->base.resv);
-		dma_resv_assert_held(pos->last->base.resv);
-
-		lru = &ttm_glob.swap_lru[i];
-		list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
-	}
 }
 EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
 
@@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
 	kref_init(&bo->kref);
 	INIT_LIST_HEAD(&bo->lru);
 	INIT_LIST_HEAD(&bo->ddestroy);
-	INIT_LIST_HEAD(&bo->swap);
 	bo->bdev = bdev;
 	bo->type = type;
 	bo->mem.mem_type = TTM_PL_SYSTEM;
@@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	bool locked;
 	int ret;
 
+	if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
+					       TTM_PAGE_FLAG_SWAPPED))
+		return false;
+
 	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
 		return -EBUSY;
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e5819fec4..a2a17c84ceb3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	atomic_inc(&ttm_glob.bo_count);
 	INIT_LIST_HEAD(&fbo->base.ddestroy);
 	INIT_LIST_HEAD(&fbo->base.lru);
-	INIT_LIST_HEAD(&fbo->base.swap);
 	fbo->base.moving = NULL;
 	drm_vma_node_reset(&fbo->base.base.vma_node);
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index dfc2a7e4e490..2c280fb1e992 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -67,7 +67,6 @@ static int ttm_global_init(void)
 	unsigned long num_pages;
 	struct sysinfo si;
 	int ret = 0;
-	unsigned i;
 
 	mutex_lock(&ttm_global_mutex);
 	if (++ttm_glob_use_count > 1)
@@ -90,8 +89,6 @@ static int ttm_global_init(void)
 		goto out;
 	}
 
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
-		INIT_LIST_HEAD(&glob->swap_lru[i]);
 	INIT_LIST_HEAD(&glob->device_list);
 	atomic_set(&glob->bo_count, 0);
 
@@ -109,27 +106,60 @@ static int ttm_global_init(void)
 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 {
 	struct ttm_global *glob = &ttm_glob;
+	struct ttm_device *bdev;
+	int ret = -EBUSY;
+
+	mutex_lock(&ttm_global_mutex);
+	list_for_each_entry(bdev, &glob->device_list, device_list) {
+		ret = ttm_device_swapout(bdev, ctx, gfp_flags);
+		if (ret > 0) {
+			list_move_tail(&bdev->device_list, &glob->device_list);
+			break;
+		}
+	}
+	mutex_unlock(&ttm_global_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+			gfp_t gfp_flags)
+{
+	struct ttm_global *glob = &ttm_glob;
+	struct ttm_resource_manager *man;
 	struct ttm_buffer_object *bo;
-	unsigned i;
+	unsigned i, j;
 	int ret;
 
 	spin_lock(&glob->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			uint32_t num_pages = bo->ttm->num_pages;
-
-			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
-			/* ttm_bo_swapout has dropped the lru_lock */
-			if (!ret)
-				return num_pages;
-			if (ret != -EBUSY)
-				return ret;
+	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
+		man = ttm_manager_type(bdev, i);
+		if (!man || !man->use_tt)
+			continue;
+
+		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
+			list_for_each_entry(bo, &man->lru[j], lru) {
+				long num_pages;
+
+				if (!bo->ttm ||
+				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
+				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
+					continue;
+
+				num_pages = bo->ttm->num_pages;
+				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+				/* ttm_bo_swapout has dropped the lru_lock */
+				if (!ret)
+					return num_pages;
+				if (ret != -EBUSY)
+					return ret;
+			}
 		}
 	}
 	spin_unlock(&glob->lru_lock);
 	return 0;
 }
-EXPORT_SYMBOL(ttm_global_swapout);
+EXPORT_SYMBOL(ttm_device_swapout);
 
 static void ttm_init_sysman(struct ttm_device *bdev)
 {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index b991422e156c..0e82b0662d9e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
 	vmw_execbuf_release_pinned_bo(dev_priv);
 	vmw_resource_evict_all(dev_priv);
 	vmw_release_device_early(dev_priv);
-	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
+	while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);
 	if (dev_priv->enable_fb)
 		vmw_fifo_resource_dec(dev_priv);
 	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 5044ac330858..3587f660e8f4 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -144,7 +144,6 @@ struct ttm_buffer_object {
 
 	struct list_head lru;
 	struct list_head ddestroy;
-	struct list_head swap;
 
 	/**
 	 * Members protected by a bo reservation.
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8959c0075cfd..d007feef7676 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
 struct ttm_lru_bulk_move {
 	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
 	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
-	struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
 };
 
 /*
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 6a0b267d4fe6..cda6efb4c34b 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -63,11 +63,6 @@ extern struct ttm_global {
 	 */
 	struct list_head device_list;
 
-	/**
-	 * Protected by the lru_lock.
-	 */
-	struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
-
 	/**
 	 * Internal protection.
 	 */
@@ -298,6 +293,8 @@ struct ttm_device {
 };
 
 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+		       gfp_t gfp_flags);
 
 static inline struct ttm_resource_manager *
 ttm_manager_type(struct ttm_device *bdev, int mem_type)
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
  2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
@ 2021-03-15 16:04 ` Christian König
  2021-03-15 20:17     ` kernel test robot
  2021-03-16  9:35   ` Daniel Vetter
  2021-03-15 18:47   ` kernel test robot
  2021-03-19  9:41   ` kernel test robot
  3 siblings, 2 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

Instead of having a global lock.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
 drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
 drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
 drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
 drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
 include/drm/ttm/ttm_bo_driver.h        |  4 +--
 include/drm/ttm/ttm_device.h           |  4 +--
 8 files changed, 43 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 	struct amdgpu_vm_bo_base *bo_base;
 
 	if (vm->bulk_moveable) {
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&adev->mman.bdev.lru_lock);
 		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&adev->mman.bdev.lru_lock);
 		return;
 	}
 
 	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&adev->mman.bdev.lru_lock);
 	list_for_each_entry(bo_base, &vm->idle, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 						&bo->shadow->tbo.mem,
 						&vm->lru_bulk_move);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&adev->mman.bdev.lru_lock);
 
 	vm->bulk_moveable = true;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index f5845c96d414..b19f2f00b215 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
 		       release->id | 0xf0000000, release->base.seqno);
 	trace_dma_fence_emit(&release->base);
 
-	spin_lock(&ttm_glob.lru_lock);
-
 	list_for_each_entry(entry, &release->bos, head) {
 		bo = entry->bo;
 
 		dma_resv_add_shared_fence(bo->base.resv, &release->base);
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 	ww_acquire_fini(&release->ticket);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a1be88be357b..a8103c8718a3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
 		 * reference it any more. The only tricky case is the trylock on
 		 * the resv object while holding the lru_lock.
 		 */
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 		bo->base.resv = &bo->base._resv;
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 	}
 
 	return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 
 		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
 						 30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 		else if (lret == 0)
 			return -EBUSY;
 
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
 			/*
 			 * We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			 * delayed destruction would succeed, so just return success
 			 * here.
 			 */
-			spin_unlock(&ttm_glob.lru_lock);
+			spin_unlock(&bo->bdev->lru_lock);
 			return 0;
 		}
 		ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 	if (ret || unlikely(list_empty(&bo->ddestroy))) {
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 		return ret;
 	}
 
 	ttm_bo_del_from_lru(bo);
 	list_del_init(&bo->ddestroy);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
 	if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
  */
 bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct list_head removed;
 	bool empty;
 
 	INIT_LIST_HEAD(&removed);
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	while (!list_empty(&bdev->ddestroy)) {
 		struct ttm_buffer_object *bo;
 
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
 			continue;
 
 		if (remove_all || bo->base.resv != &bo->base._resv) {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 			dma_resv_lock(bo->base.resv, NULL);
 
-			spin_lock(&glob->lru_lock);
+			spin_lock(&bdev->lru_lock);
 			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 
 		} else if (dma_resv_trylock(bo->base.resv)) {
 			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 		} else {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 		}
 
 		ttm_bo_put(bo);
-		spin_lock(&glob->lru_lock);
+		spin_lock(&bdev->lru_lock);
 	}
 	list_splice_tail(&removed, &bdev->ddestroy);
 	empty = list_empty(&bdev->ddestroy);
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	return empty;
 }
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
 		ttm_bo_flush_all_fences(bo);
 		bo->deleted = true;
 
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 
 		/*
 		 * Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
 
 		kref_init(&bo->kref);
 		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 
 		schedule_delayed_work(&bdev->wq,
 				      ((HZ / 100) < 1) ? 1 : HZ / 100);
 		return;
 	}
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_del_from_lru(bo);
 	list_del(&bo->ddestroy);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	ttm_bo_cleanup_memtype_use(bo);
 	dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 	unsigned i;
 	int ret;
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
 			bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 	if (!bo) {
 		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
 			busy_bo = NULL;
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
 		if (busy_bo)
 			ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 		return ret;
 	}
 
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	ret = ttm_bo_evict(bo, ctx);
 	if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
 	mem->mem_type = place->mem_type;
 	mem->placement = place->flags;
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_move_to_lru_tail(bo, mem, NULL);
-	spin_unlock(&ttm_glob.lru_lock);
-
+	spin_unlock(&bo->bdev->lru_lock);
 	return 0;
 }
 
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
 int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 		   gfp_t gfp_flags)
 {
-	struct ttm_global *glob = &ttm_glob;
 	bool locked;
 	int ret;
 
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 
 	ttm_bo_del_from_lru(bo);
 	/* TODO: Cleanup the locking */
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	/*
 	 * Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 2c280fb1e992..924d892109e8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
 	ttm_pool_mgr_init(num_pages * 50 / 100);
 	ttm_tt_mgr_init();
 
-	spin_lock_init(&glob->lru_lock);
 	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
 
 	if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			gfp_t gfp_flags)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct ttm_resource_manager *man;
 	struct ttm_buffer_object *bo;
 	unsigned i, j;
 	int ret;
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
 		man = ttm_manager_type(bdev, i);
 		if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			}
 		}
 	}
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 
 	bdev->vma_manager = vma_manager;
 	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+	spin_lock_init(&bdev->lru_lock);
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	bdev->dev_mapping = mapping;
 	mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
 
 void ttm_device_fini(struct ttm_device *bdev)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct ttm_resource_manager *man;
 	unsigned i;
 
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
 	if (ttm_bo_delayed_delete(bdev, true))
 		pr_debug("Delayed destroy list was clean\n");
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		if (list_empty(&man->lru[0]))
 			pr_debug("Swap list %d was clean\n", i);
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	ttm_pool_fini(&bdev->pool);
 	ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 	if (list_empty(list))
 		return;
 
-	spin_lock(&ttm_glob.lru_lock);
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 
 	if (ticket)
 		ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 	if (list_empty(list))
 		return;
 
-	spin_lock(&ttm_glob.lru_lock);
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 			dma_resv_add_shared_fence(bo->base.resv, fence);
 		else
 			dma_resv_add_excl_fence(bo->base.resv, fence);
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 	if (ticket)
 		ww_acquire_fini(ticket);
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 		.no_wait_gpu = false,
 		.force_alloc = true
 	};
-	struct ttm_global *glob = &ttm_glob;
 	struct dma_fence *fence;
 	int ret;
 	unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 	 * Can't use standard list traversal since we're unlocking.
 	 */
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
 						  NULL);
 			if (ret)
 				return ret;
-			spin_lock(&glob->lru_lock);
+			spin_lock(&bdev->lru_lock);
 		}
 	}
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	spin_lock(&man->move_lock);
 	fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
 static inline void
 ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
 {
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 }
 
 static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
 	 */
 
 	struct page *dummy_read_page;
-	spinlock_t lru_lock;
 
 	/**
 	 * Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
 	struct ttm_pool pool;
 
 	/*
-	 * Protected by the global:lru lock.
+	 * Protection for the per manager LRU and ddestroy lists.
 	 */
+	spinlock_t lru_lock;
 	struct list_head ddestroy;
 
 	/*
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around
  2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-03-15 18:47   ` kernel test robot
  2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:47 UTC (permalink / raw)
  To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3093 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
        git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
   drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead


vim +110 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
   109	long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 > 110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 64665 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-03-15 18:47   ` kernel test robot
  0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:47 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3165 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
        git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
   drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead


vim +110 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
   109	long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 > 110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 64665 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
  2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
@ 2021-03-15 18:54     ` kernel test robot
  2021-03-15 18:54   ` Matthew Auld
  1 sibling, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:54 UTC (permalink / raw)
  To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3216 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-s002-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-277-gc089cd2d-dirty
        # https://github.com/0day-ci/linux/commit/70ae63f3a85b9791dfcf38034c304aedda122e7b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
        git checkout 70ae63f3a85b9791dfcf38034c304aedda122e7b
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
   drivers/gpu/drm/ttm/ttm_device.c:42:1: sparse: sparse: symbol 'ttm_global_mutex' was not declared. Should it be static?
   drivers/gpu/drm/ttm/ttm_device.c:43:10: sparse: sparse: symbol 'ttm_glob_use_count' was not declared. Should it be static?
>> drivers/gpu/drm/ttm/ttm_device.c:125:6: sparse: sparse: context imbalance in 'ttm_device_swapout' - wrong count at exit

vim +/ttm_device_swapout +125 drivers/gpu/drm/ttm/ttm_device.c

   124	
 > 125	long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
   126				gfp_t gfp_flags)
   127	{
   128		struct ttm_global *glob = &ttm_glob;
   129		struct ttm_resource_manager *man;
   130		struct ttm_buffer_object *bo;
   131		unsigned i, j;
   132		int ret;
   133	
   134		spin_lock(&glob->lru_lock);
   135		for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
   136			man = ttm_manager_type(bdev, i);
   137			if (!man || !man->use_tt)
   138				continue;
   139	
   140			for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
   141				list_for_each_entry(bo, &man->lru[j], lru) {
   142					long num_pages;
   143	
   144					if (!bo->ttm ||
   145					    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
   146					    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
   147						continue;
   148	
   149					num_pages = bo->ttm->num_pages;
   150					ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   151					/* ttm_bo_swapout has dropped the lru_lock */
   152					if (!ret)
   153						return num_pages;
   154					if (ret != -EBUSY)
   155						return ret;
   156				}
   157			}
   158		}
   159		spin_unlock(&glob->lru_lock);
   160		return 0;
   161	}
   162	EXPORT_SYMBOL(ttm_device_swapout);
   163	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 39354 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
@ 2021-03-15 18:54     ` kernel test robot
  0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:54 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3297 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-s002-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-277-gc089cd2d-dirty
        # https://github.com/0day-ci/linux/commit/70ae63f3a85b9791dfcf38034c304aedda122e7b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
        git checkout 70ae63f3a85b9791dfcf38034c304aedda122e7b
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
   drivers/gpu/drm/ttm/ttm_device.c:42:1: sparse: sparse: symbol 'ttm_global_mutex' was not declared. Should it be static?
   drivers/gpu/drm/ttm/ttm_device.c:43:10: sparse: sparse: symbol 'ttm_glob_use_count' was not declared. Should it be static?
>> drivers/gpu/drm/ttm/ttm_device.c:125:6: sparse: sparse: context imbalance in 'ttm_device_swapout' - wrong count at exit

vim +/ttm_device_swapout +125 drivers/gpu/drm/ttm/ttm_device.c

   124	
 > 125	long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
   126				gfp_t gfp_flags)
   127	{
   128		struct ttm_global *glob = &ttm_glob;
   129		struct ttm_resource_manager *man;
   130		struct ttm_buffer_object *bo;
   131		unsigned i, j;
   132		int ret;
   133	
   134		spin_lock(&glob->lru_lock);
   135		for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
   136			man = ttm_manager_type(bdev, i);
   137			if (!man || !man->use_tt)
   138				continue;
   139	
   140			for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
   141				list_for_each_entry(bo, &man->lru[j], lru) {
   142					long num_pages;
   143	
   144					if (!bo->ttm ||
   145					    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
   146					    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
   147						continue;
   148	
   149					num_pages = bo->ttm->num_pages;
   150					ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   151					/* ttm_bo_swapout has dropped the lru_lock */
   152					if (!ret)
   153						return num_pages;
   154					if (ret != -EBUSY)
   155						return ret;
   156				}
   157			}
   158		}
   159		spin_unlock(&glob->lru_lock);
   160		return 0;
   161	}
   162	EXPORT_SYMBOL(ttm_device_swapout);
   163	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 39354 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
  2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
  2021-03-15 18:54     ` kernel test robot
@ 2021-03-15 18:54   ` Matthew Auld
  2021-03-15 19:27     ` Christian König
  1 sibling, 1 reply; 19+ messages in thread
From: Matthew Auld @ 2021-03-15 18:54 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, ML dri-devel

On Mon, 15 Mar 2021 at 16:04, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Instead evict round robin from each devices SYSTEM and TT domain.
>
> v2: reorder num_pages access reported by Dan's script
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c        | 33 ++--------------
>  drivers/gpu/drm/ttm/ttm_bo_util.c   |  1 -
>  drivers/gpu/drm/ttm/ttm_device.c    | 60 +++++++++++++++++++++--------
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
>  include/drm/ttm/ttm_bo_api.h        |  1 -
>  include/drm/ttm/ttm_bo_driver.h     |  1 -
>  include/drm/ttm/ttm_device.h        |  7 +---
>  7 files changed, 52 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 56d2e38af273..a1be88be357b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
>  {
>         struct ttm_device *bdev = bo->bdev;
>
> -       list_del_init(&bo->swap);
>         list_del_init(&bo->lru);
>
>         if (bdev->funcs->del_from_lru_notify)
> @@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>
>         man = ttm_manager_type(bdev, mem->mem_type);
>         list_move_tail(&bo->lru, &man->lru[bo->priority]);
> -       if (man->use_tt && bo->ttm &&
> -           !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> -                                    TTM_PAGE_FLAG_SWAPPED))) {
> -               struct list_head *swap;
> -
> -               swap = &ttm_glob.swap_lru[bo->priority];
> -               list_move_tail(&bo->swap, swap);
> -       } else {
> -               list_del_init(&bo->swap);
> -       }
>
>         if (bdev->funcs->del_from_lru_notify)
>                 bdev->funcs->del_from_lru_notify(bo);
> @@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>                         ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
>                         break;
>                 }
> -               if (bo->ttm && !(bo->ttm->page_flags &
> -                                (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
> -                       ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
>         }
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> @@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>                 list_bulk_move_tail(&man->lru[i], &pos->first->lru,
>                                     &pos->last->lru);
>         }
> -
> -       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -               struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
> -               struct list_head *lru;
> -
> -               if (!pos->first)
> -                       continue;
> -
> -               dma_resv_assert_held(pos->first->base.resv);
> -               dma_resv_assert_held(pos->last->base.resv);
> -
> -               lru = &ttm_glob.swap_lru[i];
> -               list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
> -       }
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>
> @@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>         kref_init(&bo->kref);
>         INIT_LIST_HEAD(&bo->lru);
>         INIT_LIST_HEAD(&bo->ddestroy);
> -       INIT_LIST_HEAD(&bo->swap);
>         bo->bdev = bdev;
>         bo->type = type;
>         bo->mem.mem_type = TTM_PL_SYSTEM;
> @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>         bool locked;
>         int ret;
>
> +       if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> +                                              TTM_PAGE_FLAG_SWAPPED))
> +               return false;
> +

return 0; ?

Seems inconsistent to return zero here and not drop the lru lock? Or
maybe turn this into a programmer error, since the current caller
already checks for the above?

>         if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
>                 return -EBUSY;
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 031e5819fec4..a2a17c84ceb3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>         atomic_inc(&ttm_glob.bo_count);
>         INIT_LIST_HEAD(&fbo->base.ddestroy);
>         INIT_LIST_HEAD(&fbo->base.lru);
> -       INIT_LIST_HEAD(&fbo->base.swap);
>         fbo->base.moving = NULL;
>         drm_vma_node_reset(&fbo->base.base.vma_node);
>
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index dfc2a7e4e490..2c280fb1e992 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -67,7 +67,6 @@ static int ttm_global_init(void)
>         unsigned long num_pages;
>         struct sysinfo si;
>         int ret = 0;
> -       unsigned i;
>
>         mutex_lock(&ttm_global_mutex);
>         if (++ttm_glob_use_count > 1)
> @@ -90,8 +89,6 @@ static int ttm_global_init(void)
>                 goto out;
>         }
>
> -       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> -               INIT_LIST_HEAD(&glob->swap_lru[i]);
>         INIT_LIST_HEAD(&glob->device_list);
>         atomic_set(&glob->bo_count, 0);
>
> @@ -109,27 +106,60 @@ static int ttm_global_init(void)
>  long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>  {
>         struct ttm_global *glob = &ttm_glob;
> +       struct ttm_device *bdev;
> +       int ret = -EBUSY;
> +
> +       mutex_lock(&ttm_global_mutex);
> +       list_for_each_entry(bdev, &glob->device_list, device_list) {
> +               ret = ttm_device_swapout(bdev, ctx, gfp_flags);

Mixing int and long for num_pages.

Does ttm enforce a maximum page count somewhere for object sizes?
Something like INT_MAX, since it doesn't look like ttm is consistently
using the same type(unsigned long?) when representing the number of
pages for an object?

> +               if (ret > 0) {
> +                       list_move_tail(&bdev->device_list, &glob->device_list);
> +                       break;
> +               }
> +       }
> +       mutex_unlock(&ttm_global_mutex);
> +       return ret;
> +}
> +EXPORT_SYMBOL(ttm_global_swapout);
> +
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> +                       gfp_t gfp_flags)
> +{
> +       struct ttm_global *glob = &ttm_glob;
> +       struct ttm_resource_manager *man;
>         struct ttm_buffer_object *bo;
> -       unsigned i;
> +       unsigned i, j;
>         int ret;
>
>         spin_lock(&glob->lru_lock);
> -       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -               list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> -                       uint32_t num_pages = bo->ttm->num_pages;
> -
> -                       ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> -                       /* ttm_bo_swapout has dropped the lru_lock */
> -                       if (!ret)
> -                               return num_pages;
> -                       if (ret != -EBUSY)
> -                               return ret;
> +       for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> +               man = ttm_manager_type(bdev, i);
> +               if (!man || !man->use_tt)
> +                       continue;
> +
> +               for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> +                       list_for_each_entry(bo, &man->lru[j], lru) {
> +                               long num_pages;
> +
> +                               if (!bo->ttm ||
> +                                   bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> +                                   bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> +                                       continue;
> +
> +                               num_pages = bo->ttm->num_pages;
> +                               ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> +                               /* ttm_bo_swapout has dropped the lru_lock */
> +                               if (!ret)
> +                                       return num_pages;
> +                               if (ret != -EBUSY)
> +                                       return ret;
> +                       }
>                 }
>         }
>         spin_unlock(&glob->lru_lock);
>         return 0;
>  }
> -EXPORT_SYMBOL(ttm_global_swapout);
> +EXPORT_SYMBOL(ttm_device_swapout);
>
>  static void ttm_init_sysman(struct ttm_device *bdev)
>  {
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index b991422e156c..0e82b0662d9e 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
>         vmw_execbuf_release_pinned_bo(dev_priv);
>         vmw_resource_evict_all(dev_priv);
>         vmw_release_device_early(dev_priv);
> -       while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
> +       while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);

Is this the intended behaviour? ttm_device_swapout() still just
returns num_pages if it swapped something out. I assume this wants to
keep swapping stuff out, until it can't anymore. Or am I missing
something?

>         if (dev_priv->enable_fb)
>                 vmw_fifo_resource_dec(dev_priv);
>         if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 5044ac330858..3587f660e8f4 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -144,7 +144,6 @@ struct ttm_buffer_object {
>
>         struct list_head lru;
>         struct list_head ddestroy;
> -       struct list_head swap;
>
>         /**
>          * Members protected by a bo reservation.
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 8959c0075cfd..d007feef7676 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
>  struct ttm_lru_bulk_move {
>         struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
>         struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> -       struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
>  };
>
>  /*
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 6a0b267d4fe6..cda6efb4c34b 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -63,11 +63,6 @@ extern struct ttm_global {
>          */
>         struct list_head device_list;
>
> -       /**
> -        * Protected by the lru_lock.
> -        */
> -       struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
> -
>         /**
>          * Internal protection.
>          */
> @@ -298,6 +293,8 @@ struct ttm_device {
>  };
>
>  long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> +                      gfp_t gfp_flags);
>
>  static inline struct ttm_resource_manager *
>  ttm_manager_type(struct ttm_device *bdev, int mem_type)
> --
> 2.25.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
  2021-03-15 18:54   ` Matthew Auld
@ 2021-03-15 19:27     ` Christian König
  0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 19:27 UTC (permalink / raw)
  To: Matthew Auld; +Cc: ray.huang, ML dri-devel

Am 15.03.21 um 19:54 schrieb Matthew Auld:
> On Mon, 15 Mar 2021 at 16:04, Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> [SNIP]
>> @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>          bool locked;
>>          int ret;
>>
>> +       if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
>> +                                              TTM_PAGE_FLAG_SWAPPED))
>> +               return false;
>> +
> return 0; ?
>
> Seems inconsistent to return zero here and not drop the lru lock? Or
> maybe turn this into a programmer error, since the current caller
> already checks for the above?

Thanks, that is just an artifact from rebasing and should be removed.

>> [SNIP]
>>
>> @@ -109,27 +106,60 @@ static int ttm_global_init(void)
>>   long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>>   {
>>          struct ttm_global *glob = &ttm_glob;
>> +       struct ttm_device *bdev;
>> +       int ret = -EBUSY;
>> +
>> +       mutex_lock(&ttm_global_mutex);
>> +       list_for_each_entry(bdev, &glob->device_list, device_list) {
>> +               ret = ttm_device_swapout(bdev, ctx, gfp_flags);
> Mixing int and long for num_pages.
>
> Does ttm enforce a maximum page count somewhere for object sizes?

We should use 32 bit values for the number of pages in TTM, even signed 
values allow for 8TB large BOs.

And I really hope that we can get rid of the BO approach in general 
before we ever come close to that limit.

> Something like INT_MAX, since it doesn't look like ttm is consistently
> using the same type(unsigned long?) when representing the number of
> pages for an object?

I should probably add a check for that in the tt code, yes.

> [SNIP]
>   static void ttm_init_sysman(struct ttm_device *bdev)
>   {
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index b991422e156c..0e82b0662d9e 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
>          vmw_execbuf_release_pinned_bo(dev_priv);
>          vmw_resource_evict_all(dev_priv);
>          vmw_release_device_early(dev_priv);
> -       while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
> +       while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);
> Is this the intended behaviour? ttm_device_swapout() still just
> returns num_pages if it swapped something out. I assume this wants to
> keep swapping stuff out, until it can't anymore. Or am I missing
> something?

Indeed that's a mix up. Thanks for pointing that out.

Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
@ 2021-03-15 20:17     ` kernel test robot
  2021-03-16  9:35   ` Daniel Vetter
  1 sibling, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw)
  To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4014 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-m001-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'.
drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662)

vim +158 drivers/gpu/drm/ttm/ttm_device.c

70ae63f3a85b97 Christian König 2021-03-15  123  
70ae63f3a85b97 Christian König 2021-03-15  124  long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
70ae63f3a85b97 Christian König 2021-03-15  125  			gfp_t gfp_flags)
70ae63f3a85b97 Christian König 2021-03-15  126  {
70ae63f3a85b97 Christian König 2021-03-15  127  	struct ttm_resource_manager *man;
824dca26fe3958 Christian König 2021-03-15  128  	struct ttm_buffer_object *bo;
70ae63f3a85b97 Christian König 2021-03-15  129  	unsigned i, j;
824dca26fe3958 Christian König 2021-03-15  130  	int ret;
824dca26fe3958 Christian König 2021-03-15  131  
1ed8d8fc515b90 Christian König 2021-03-15  132  	spin_lock(&bdev->lru_lock);
70ae63f3a85b97 Christian König 2021-03-15  133  	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
70ae63f3a85b97 Christian König 2021-03-15  134  		man = ttm_manager_type(bdev, i);
70ae63f3a85b97 Christian König 2021-03-15  135  		if (!man || !man->use_tt)
70ae63f3a85b97 Christian König 2021-03-15  136  			continue;
70ae63f3a85b97 Christian König 2021-03-15  137  
70ae63f3a85b97 Christian König 2021-03-15  138  		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
70ae63f3a85b97 Christian König 2021-03-15  139  			list_for_each_entry(bo, &man->lru[j], lru) {
70ae63f3a85b97 Christian König 2021-03-15  140  				long num_pages;
824dca26fe3958 Christian König 2021-03-15  141  
70ae63f3a85b97 Christian König 2021-03-15  142  				if (!bo->ttm ||
70ae63f3a85b97 Christian König 2021-03-15  143  				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
70ae63f3a85b97 Christian König 2021-03-15  144  				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
70ae63f3a85b97 Christian König 2021-03-15  145  					continue;
70ae63f3a85b97 Christian König 2021-03-15  146  
70ae63f3a85b97 Christian König 2021-03-15  147  				num_pages = bo->ttm->num_pages;
824dca26fe3958 Christian König 2021-03-15  148  				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
824dca26fe3958 Christian König 2021-03-15  149  				/* ttm_bo_swapout has dropped the lru_lock */
824dca26fe3958 Christian König 2021-03-15  150  				if (!ret)
824dca26fe3958 Christian König 2021-03-15  151  					return num_pages;
824dca26fe3958 Christian König 2021-03-15  152  				if (ret != -EBUSY)
824dca26fe3958 Christian König 2021-03-15  153  					return ret;
824dca26fe3958 Christian König 2021-03-15  154  			}
824dca26fe3958 Christian König 2021-03-15  155  		}
70ae63f3a85b97 Christian König 2021-03-15  156  	}
1ed8d8fc515b90 Christian König 2021-03-15  157  	spin_unlock(&bdev->lru_lock);
824dca26fe3958 Christian König 2021-03-15 @158  	return 0;
824dca26fe3958 Christian König 2021-03-15  159  }
70ae63f3a85b97 Christian König 2021-03-15  160  EXPORT_SYMBOL(ttm_device_swapout);
824dca26fe3958 Christian König 2021-03-15  161  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36557 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
@ 2021-03-15 20:17     ` kernel test robot
  0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4122 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-m001-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'.
drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662)

vim +158 drivers/gpu/drm/ttm/ttm_device.c

70ae63f3a85b97 Christian König 2021-03-15  123  
70ae63f3a85b97 Christian König 2021-03-15  124  long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
70ae63f3a85b97 Christian König 2021-03-15  125  			gfp_t gfp_flags)
70ae63f3a85b97 Christian König 2021-03-15  126  {
70ae63f3a85b97 Christian König 2021-03-15  127  	struct ttm_resource_manager *man;
824dca26fe3958 Christian König 2021-03-15  128  	struct ttm_buffer_object *bo;
70ae63f3a85b97 Christian König 2021-03-15  129  	unsigned i, j;
824dca26fe3958 Christian König 2021-03-15  130  	int ret;
824dca26fe3958 Christian König 2021-03-15  131  
1ed8d8fc515b90 Christian König 2021-03-15  132  	spin_lock(&bdev->lru_lock);
70ae63f3a85b97 Christian König 2021-03-15  133  	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
70ae63f3a85b97 Christian König 2021-03-15  134  		man = ttm_manager_type(bdev, i);
70ae63f3a85b97 Christian König 2021-03-15  135  		if (!man || !man->use_tt)
70ae63f3a85b97 Christian König 2021-03-15  136  			continue;
70ae63f3a85b97 Christian König 2021-03-15  137  
70ae63f3a85b97 Christian König 2021-03-15  138  		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
70ae63f3a85b97 Christian König 2021-03-15  139  			list_for_each_entry(bo, &man->lru[j], lru) {
70ae63f3a85b97 Christian König 2021-03-15  140  				long num_pages;
824dca26fe3958 Christian König 2021-03-15  141  
70ae63f3a85b97 Christian König 2021-03-15  142  				if (!bo->ttm ||
70ae63f3a85b97 Christian König 2021-03-15  143  				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
70ae63f3a85b97 Christian König 2021-03-15  144  				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
70ae63f3a85b97 Christian König 2021-03-15  145  					continue;
70ae63f3a85b97 Christian König 2021-03-15  146  
70ae63f3a85b97 Christian König 2021-03-15  147  				num_pages = bo->ttm->num_pages;
824dca26fe3958 Christian König 2021-03-15  148  				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
824dca26fe3958 Christian König 2021-03-15  149  				/* ttm_bo_swapout has dropped the lru_lock */
824dca26fe3958 Christian König 2021-03-15  150  				if (!ret)
824dca26fe3958 Christian König 2021-03-15  151  					return num_pages;
824dca26fe3958 Christian König 2021-03-15  152  				if (ret != -EBUSY)
824dca26fe3958 Christian König 2021-03-15  153  					return ret;
824dca26fe3958 Christian König 2021-03-15  154  			}
824dca26fe3958 Christian König 2021-03-15  155  		}
70ae63f3a85b97 Christian König 2021-03-15  156  	}
1ed8d8fc515b90 Christian König 2021-03-15  157  	spin_unlock(&bdev->lru_lock);
824dca26fe3958 Christian König 2021-03-15 @158  	return 0;
824dca26fe3958 Christian König 2021-03-15  159  }
70ae63f3a85b97 Christian König 2021-03-15  160  EXPORT_SYMBOL(ttm_device_swapout);
824dca26fe3958 Christian König 2021-03-15  161  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36557 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
  2021-03-15 20:17     ` kernel test robot
@ 2021-03-16  9:35   ` Daniel Vetter
  2021-03-16 12:03     ` Christian König
  1 sibling, 1 reply; 19+ messages in thread
From: Daniel Vetter @ 2021-03-16  9:35 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, dri-devel

On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
> Instead of having a global lock.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

I guess per zone lru lock is a lot more work since then we need to handle
ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
lingo.
-Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
>  drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
>  drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
>  drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
>  drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
>  include/drm/ttm/ttm_bo_driver.h        |  4 +--
>  include/drm/ttm/ttm_device.h           |  4 +--
>  8 files changed, 43 insertions(+), 56 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9d19078246c8..ae18c0e32347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  	struct amdgpu_vm_bo_base *bo_base;
>  
>  	if (vm->bulk_moveable) {
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&adev->mman.bdev.lru_lock);
>  		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&adev->mman.bdev.lru_lock);
>  		return;
>  	}
>  
>  	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&adev->mman.bdev.lru_lock);
>  	list_for_each_entry(bo_base, &vm->idle, vm_status) {
>  		struct amdgpu_bo *bo = bo_base->bo;
>  
> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  						&bo->shadow->tbo.mem,
>  						&vm->lru_bulk_move);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&adev->mman.bdev.lru_lock);
>  
>  	vm->bulk_moveable = true;
>  }
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index f5845c96d414..b19f2f00b215 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>  		       release->id | 0xf0000000, release->base.seqno);
>  	trace_dma_fence_emit(&release->base);
>  
> -	spin_lock(&ttm_glob.lru_lock);
> -
>  	list_for_each_entry(entry, &release->bos, head) {
>  		bo = entry->bo;
>  
>  		dma_resv_add_shared_fence(bo->base.resv, &release->base);
> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
>  	ww_acquire_fini(&release->ticket);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index a1be88be357b..a8103c8718a3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>  		 * reference it any more. The only tricky case is the trylock on
>  		 * the resv object while holding the lru_lock.
>  		 */
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&bo->bdev->lru_lock);
>  		bo->base.resv = &bo->base._resv;
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  	}
>  
>  	return r;
> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  
>  		if (unlock_resv)
>  			dma_resv_unlock(bo->base.resv);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  
>  		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>  						 30 * HZ);
> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  		else if (lret == 0)
>  			return -EBUSY;
>  
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&bo->bdev->lru_lock);
>  		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>  			/*
>  			 * We raced, and lost, someone else holds the reservation now,
> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  			 * delayed destruction would succeed, so just return success
>  			 * here.
>  			 */
> -			spin_unlock(&ttm_glob.lru_lock);
> +			spin_unlock(&bo->bdev->lru_lock);
>  			return 0;
>  		}
>  		ret = 0;
> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  	if (ret || unlikely(list_empty(&bo->ddestroy))) {
>  		if (unlock_resv)
>  			dma_resv_unlock(bo->base.resv);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  		return ret;
>  	}
>  
>  	ttm_bo_del_from_lru(bo);
>  	list_del_init(&bo->ddestroy);
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  	ttm_bo_cleanup_memtype_use(bo);
>  
>  	if (unlock_resv)
> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>   */
>  bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	struct list_head removed;
>  	bool empty;
>  
>  	INIT_LIST_HEAD(&removed);
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	while (!list_empty(&bdev->ddestroy)) {
>  		struct ttm_buffer_object *bo;
>  
> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>  			continue;
>  
>  		if (remove_all || bo->base.resv != &bo->base._resv) {
> -			spin_unlock(&glob->lru_lock);
> +			spin_unlock(&bdev->lru_lock);
>  			dma_resv_lock(bo->base.resv, NULL);
>  
> -			spin_lock(&glob->lru_lock);
> +			spin_lock(&bdev->lru_lock);
>  			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>  
>  		} else if (dma_resv_trylock(bo->base.resv)) {
>  			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>  		} else {
> -			spin_unlock(&glob->lru_lock);
> +			spin_unlock(&bdev->lru_lock);
>  		}
>  
>  		ttm_bo_put(bo);
> -		spin_lock(&glob->lru_lock);
> +		spin_lock(&bdev->lru_lock);
>  	}
>  	list_splice_tail(&removed, &bdev->ddestroy);
>  	empty = list_empty(&bdev->ddestroy);
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  
>  	return empty;
>  }
> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
>  		ttm_bo_flush_all_fences(bo);
>  		bo->deleted = true;
>  
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&bo->bdev->lru_lock);
>  
>  		/*
>  		 * Make pinned bos immediately available to
> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>  
>  		kref_init(&bo->kref);
>  		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  
>  		schedule_delayed_work(&bdev->wq,
>  				      ((HZ / 100) < 1) ? 1 : HZ / 100);
>  		return;
>  	}
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	ttm_bo_del_from_lru(bo);
>  	list_del(&bo->ddestroy);
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  
>  	ttm_bo_cleanup_memtype_use(bo);
>  	dma_resv_unlock(bo->base.resv);
> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  	unsigned i;
>  	int ret;
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		list_for_each_entry(bo, &man->lru[i], lru) {
>  			bool busy;
> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  	if (!bo) {
>  		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>  			busy_bo = NULL;
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>  		if (busy_bo)
>  			ttm_bo_put(busy_bo);
> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  		return ret;
>  	}
>  
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  
>  	ret = ttm_bo_evict(bo, ctx);
>  	if (locked)
> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>  	mem->mem_type = place->mem_type;
>  	mem->placement = place->flags;
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	ttm_bo_move_to_lru_tail(bo, mem, NULL);
> -	spin_unlock(&ttm_glob.lru_lock);
> -
> +	spin_unlock(&bo->bdev->lru_lock);
>  	return 0;
>  }
>  
> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>  int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  		   gfp_t gfp_flags)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	bool locked;
>  	int ret;
>  
> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  
>  	ttm_bo_del_from_lru(bo);
>  	/* TODO: Cleanup the locking */
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  
>  	/*
>  	 * Move to system cached
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 2c280fb1e992..924d892109e8 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>  	ttm_pool_mgr_init(num_pages * 50 / 100);
>  	ttm_tt_mgr_init();
>  
> -	spin_lock_init(&glob->lru_lock);
>  	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>  
>  	if (unlikely(glob->dummy_read_page == NULL)) {
> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>  long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  			gfp_t gfp_flags)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	struct ttm_resource_manager *man;
>  	struct ttm_buffer_object *bo;
>  	unsigned i, j;
>  	int ret;
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>  		man = ttm_manager_type(bdev, i);
>  		if (!man || !man->use_tt)
> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  			}
>  		}
>  	}
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  	return 0;
>  }
>  EXPORT_SYMBOL(ttm_device_swapout);
> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>  
>  	bdev->vma_manager = vma_manager;
>  	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> +	spin_lock_init(&bdev->lru_lock);
>  	INIT_LIST_HEAD(&bdev->ddestroy);
>  	bdev->dev_mapping = mapping;
>  	mutex_lock(&ttm_global_mutex);
> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>  
>  void ttm_device_fini(struct ttm_device *bdev)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	struct ttm_resource_manager *man;
>  	unsigned i;
>  
> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>  	if (ttm_bo_delayed_delete(bdev, true))
>  		pr_debug("Delayed destroy list was clean\n");
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>  		if (list_empty(&man->lru[0]))
>  			pr_debug("Swap list %d was clean\n", i);
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  
>  	ttm_pool_fini(&bdev->pool);
>  	ttm_global_release();
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 690ab97d52b7..071c48d672c6 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>  	if (list_empty(list))
>  		return;
>  
> -	spin_lock(&ttm_glob.lru_lock);
>  	list_for_each_entry(entry, list, head) {
>  		struct ttm_buffer_object *bo = entry->bo;
>  
> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
>  
>  	if (ticket)
>  		ww_acquire_fini(ticket);
> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>  	if (list_empty(list))
>  		return;
>  
> -	spin_lock(&ttm_glob.lru_lock);
>  	list_for_each_entry(entry, list, head) {
>  		struct ttm_buffer_object *bo = entry->bo;
>  
> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>  			dma_resv_add_shared_fence(bo->base.resv, fence);
>  		else
>  			dma_resv_add_excl_fence(bo->base.resv, fence);
> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
>  	if (ticket)
>  		ww_acquire_fini(ticket);
>  }
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index ed1672a9f332..04f2eef653ab 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>  		.no_wait_gpu = false,
>  		.force_alloc = true
>  	};
> -	struct ttm_global *glob = &ttm_glob;
>  	struct dma_fence *fence;
>  	int ret;
>  	unsigned i;
> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>  	 * Can't use standard list traversal since we're unlocking.
>  	 */
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		while (!list_empty(&man->lru[i])) {
> -			spin_unlock(&glob->lru_lock);
> +			spin_unlock(&bdev->lru_lock);
>  			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>  						  NULL);
>  			if (ret)
>  				return ret;
> -			spin_lock(&glob->lru_lock);
> +			spin_lock(&bdev->lru_lock);
>  		}
>  	}
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  
>  	spin_lock(&man->move_lock);
>  	fence = dma_fence_get(man->move);
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index d007feef7676..dbccac957f8f 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>  static inline void
>  ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>  {
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
>  static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index cda6efb4c34b..bae56d29e8ff 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -56,7 +56,6 @@ extern struct ttm_global {
>  	 */
>  
>  	struct page *dummy_read_page;
> -	spinlock_t lru_lock;
>  
>  	/**
>  	 * Protected by ttm_global_mutex.
> @@ -277,8 +276,9 @@ struct ttm_device {
>  	struct ttm_pool pool;
>  
>  	/*
> -	 * Protected by the global:lru lock.
> +	 * Protection for the per manager LRU and ddestroy lists.
>  	 */
> +	spinlock_t lru_lock;
>  	struct list_head ddestroy;
>  
>  	/*
> -- 
> 2.25.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-16  9:35   ` Daniel Vetter
@ 2021-03-16 12:03     ` Christian König
  2021-03-16 12:05       ` Daniel Vetter
  0 siblings, 1 reply; 19+ messages in thread
From: Christian König @ 2021-03-16 12:03 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: ray.huang, dri-devel

Am 16.03.21 um 10:35 schrieb Daniel Vetter:
> On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
>> Instead of having a global lock.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> I guess per zone lru lock is a lot more work since then we need to handle
> ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
> lingo.

Making the LRU per resource manager is the long term goal, yes.

My key idea so far is that we make bo->mem a pointer and then move the 
LRU handling into the resource object instead of the BO.

The resource object then just references the BO and so that we can 
figure out which BO to evict or which fence to wait for to free up a 
resource.

Regards,
Christian.

> -Daniel
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
>>   drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
>>   drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
>>   drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
>>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
>>   drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
>>   include/drm/ttm/ttm_bo_driver.h        |  4 +--
>>   include/drm/ttm/ttm_device.h           |  4 +--
>>   8 files changed, 43 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9d19078246c8..ae18c0e32347 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>   	struct amdgpu_vm_bo_base *bo_base;
>>   
>>   	if (vm->bulk_moveable) {
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&adev->mman.bdev.lru_lock);
>>   		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&adev->mman.bdev.lru_lock);
>>   		return;
>>   	}
>>   
>>   	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&adev->mman.bdev.lru_lock);
>>   	list_for_each_entry(bo_base, &vm->idle, vm_status) {
>>   		struct amdgpu_bo *bo = bo_base->bo;
>>   
>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>   						&bo->shadow->tbo.mem,
>>   						&vm->lru_bulk_move);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&adev->mman.bdev.lru_lock);
>>   
>>   	vm->bulk_moveable = true;
>>   }
>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>> index f5845c96d414..b19f2f00b215 100644
>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>>   		       release->id | 0xf0000000, release->base.seqno);
>>   	trace_dma_fence_emit(&release->base);
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> -
>>   	list_for_each_entry(entry, &release->bos, head) {
>>   		bo = entry->bo;
>>   
>>   		dma_resv_add_shared_fence(bo->base.resv, &release->base);
>> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>>   		dma_resv_unlock(bo->base.resv);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>>   	ww_acquire_fini(&release->ticket);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index a1be88be357b..a8103c8718a3 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>>   		 * reference it any more. The only tricky case is the trylock on
>>   		 * the resv object while holding the lru_lock.
>>   		 */
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&bo->bdev->lru_lock);
>>   		bo->base.resv = &bo->base._resv;
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   	}
>>   
>>   	return r;
>> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   
>>   		if (unlock_resv)
>>   			dma_resv_unlock(bo->base.resv);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   
>>   		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>>   						 30 * HZ);
>> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   		else if (lret == 0)
>>   			return -EBUSY;
>>   
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&bo->bdev->lru_lock);
>>   		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>>   			/*
>>   			 * We raced, and lost, someone else holds the reservation now,
>> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   			 * delayed destruction would succeed, so just return success
>>   			 * here.
>>   			 */
>> -			spin_unlock(&ttm_glob.lru_lock);
>> +			spin_unlock(&bo->bdev->lru_lock);
>>   			return 0;
>>   		}
>>   		ret = 0;
>> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   	if (ret || unlikely(list_empty(&bo->ddestroy))) {
>>   		if (unlock_resv)
>>   			dma_resv_unlock(bo->base.resv);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   		return ret;
>>   	}
>>   
>>   	ttm_bo_del_from_lru(bo);
>>   	list_del_init(&bo->ddestroy);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   	ttm_bo_cleanup_memtype_use(bo);
>>   
>>   	if (unlock_resv)
>> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>    */
>>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct list_head removed;
>>   	bool empty;
>>   
>>   	INIT_LIST_HEAD(&removed);
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	while (!list_empty(&bdev->ddestroy)) {
>>   		struct ttm_buffer_object *bo;
>>   
>> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>   			continue;
>>   
>>   		if (remove_all || bo->base.resv != &bo->base._resv) {
>> -			spin_unlock(&glob->lru_lock);
>> +			spin_unlock(&bdev->lru_lock);
>>   			dma_resv_lock(bo->base.resv, NULL);
>>   
>> -			spin_lock(&glob->lru_lock);
>> +			spin_lock(&bdev->lru_lock);
>>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>   
>>   		} else if (dma_resv_trylock(bo->base.resv)) {
>>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>   		} else {
>> -			spin_unlock(&glob->lru_lock);
>> +			spin_unlock(&bdev->lru_lock);
>>   		}
>>   
>>   		ttm_bo_put(bo);
>> -		spin_lock(&glob->lru_lock);
>> +		spin_lock(&bdev->lru_lock);
>>   	}
>>   	list_splice_tail(&removed, &bdev->ddestroy);
>>   	empty = list_empty(&bdev->ddestroy);
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   
>>   	return empty;
>>   }
>> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
>>   		ttm_bo_flush_all_fences(bo);
>>   		bo->deleted = true;
>>   
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&bo->bdev->lru_lock);
>>   
>>   		/*
>>   		 * Make pinned bos immediately available to
>> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>>   
>>   		kref_init(&bo->kref);
>>   		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   
>>   		schedule_delayed_work(&bdev->wq,
>>   				      ((HZ / 100) < 1) ? 1 : HZ / 100);
>>   		return;
>>   	}
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	ttm_bo_del_from_lru(bo);
>>   	list_del(&bo->ddestroy);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   
>>   	ttm_bo_cleanup_memtype_use(bo);
>>   	dma_resv_unlock(bo->base.resv);
>> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>   	unsigned i;
>>   	int ret;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>   		list_for_each_entry(bo, &man->lru[i], lru) {
>>   			bool busy;
>> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>   	if (!bo) {
>>   		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>>   			busy_bo = NULL;
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>>   		if (busy_bo)
>>   			ttm_bo_put(busy_bo);
>> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>   		return ret;
>>   	}
>>   
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   
>>   	ret = ttm_bo_evict(bo, ctx);
>>   	if (locked)
>> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>>   	mem->mem_type = place->mem_type;
>>   	mem->placement = place->flags;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	ttm_bo_move_to_lru_tail(bo, mem, NULL);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> -
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   	return 0;
>>   }
>>   
>> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>>   int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>   		   gfp_t gfp_flags)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	bool locked;
>>   	int ret;
>>   
>> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>   
>>   	ttm_bo_del_from_lru(bo);
>>   	/* TODO: Cleanup the locking */
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   
>>   	/*
>>   	 * Move to system cached
>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>> index 2c280fb1e992..924d892109e8 100644
>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>>   	ttm_pool_mgr_init(num_pages * 50 / 100);
>>   	ttm_tt_mgr_init();
>>   
>> -	spin_lock_init(&glob->lru_lock);
>>   	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>   
>>   	if (unlikely(glob->dummy_read_page == NULL)) {
>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>>   long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>   			gfp_t gfp_flags)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct ttm_resource_manager *man;
>>   	struct ttm_buffer_object *bo;
>>   	unsigned i, j;
>>   	int ret;
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>>   		man = ttm_manager_type(bdev, i);
>>   		if (!man || !man->use_tt)
>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>   			}
>>   		}
>>   	}
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   	return 0;
>>   }
>>   EXPORT_SYMBOL(ttm_device_swapout);
>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>   
>>   	bdev->vma_manager = vma_manager;
>>   	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>> +	spin_lock_init(&bdev->lru_lock);
>>   	INIT_LIST_HEAD(&bdev->ddestroy);
>>   	bdev->dev_mapping = mapping;
>>   	mutex_lock(&ttm_global_mutex);
>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>   
>>   void ttm_device_fini(struct ttm_device *bdev)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct ttm_resource_manager *man;
>>   	unsigned i;
>>   
>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>>   	if (ttm_bo_delayed_delete(bdev, true))
>>   		pr_debug("Delayed destroy list was clean\n");
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>>   		if (list_empty(&man->lru[0]))
>>   			pr_debug("Swap list %d was clean\n", i);
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   
>>   	ttm_pool_fini(&bdev->pool);
>>   	ttm_global_release();
>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> index 690ab97d52b7..071c48d672c6 100644
>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>>   	if (list_empty(list))
>>   		return;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>>   	list_for_each_entry(entry, list, head) {
>>   		struct ttm_buffer_object *bo = entry->bo;
>>   
>> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>>   		dma_resv_unlock(bo->base.resv);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>>   
>>   	if (ticket)
>>   		ww_acquire_fini(ticket);
>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>   	if (list_empty(list))
>>   		return;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>>   	list_for_each_entry(entry, list, head) {
>>   		struct ttm_buffer_object *bo = entry->bo;
>>   
>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>   			dma_resv_add_shared_fence(bo->base.resv, fence);
>>   		else
>>   			dma_resv_add_excl_fence(bo->base.resv, fence);
>> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>>   		dma_resv_unlock(bo->base.resv);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>>   	if (ticket)
>>   		ww_acquire_fini(ticket);
>>   }
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>> index ed1672a9f332..04f2eef653ab 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>   		.no_wait_gpu = false,
>>   		.force_alloc = true
>>   	};
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct dma_fence *fence;
>>   	int ret;
>>   	unsigned i;
>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>   	 * Can't use standard list traversal since we're unlocking.
>>   	 */
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>   		while (!list_empty(&man->lru[i])) {
>> -			spin_unlock(&glob->lru_lock);
>> +			spin_unlock(&bdev->lru_lock);
>>   			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>>   						  NULL);
>>   			if (ret)
>>   				return ret;
>> -			spin_lock(&glob->lru_lock);
>> +			spin_lock(&bdev->lru_lock);
>>   		}
>>   	}
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   
>>   	spin_lock(&man->move_lock);
>>   	fence = dma_fence_get(man->move);
>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>> index d007feef7676..dbccac957f8f 100644
>> --- a/include/drm/ttm/ttm_bo_driver.h
>> +++ b/include/drm/ttm/ttm_bo_driver.h
>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>>   static inline void
>>   ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>>   {
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   }
>>   
>>   static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index cda6efb4c34b..bae56d29e8ff 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>>   	 */
>>   
>>   	struct page *dummy_read_page;
>> -	spinlock_t lru_lock;
>>   
>>   	/**
>>   	 * Protected by ttm_global_mutex.
>> @@ -277,8 +276,9 @@ struct ttm_device {
>>   	struct ttm_pool pool;
>>   
>>   	/*
>> -	 * Protected by the global:lru lock.
>> +	 * Protection for the per manager LRU and ddestroy lists.
>>   	 */
>> +	spinlock_t lru_lock;
>>   	struct list_head ddestroy;
>>   
>>   	/*
>> -- 
>> 2.25.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-16 12:03     ` Christian König
@ 2021-03-16 12:05       ` Daniel Vetter
  2021-03-16 15:13         ` Christian König
  0 siblings, 1 reply; 19+ messages in thread
From: Daniel Vetter @ 2021-03-16 12:05 UTC (permalink / raw)
  To: Christian König; +Cc: Huang Rui, dri-devel

On Tue, Mar 16, 2021 at 1:03 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 16.03.21 um 10:35 schrieb Daniel Vetter:
> > On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
> >> Instead of having a global lock.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > I guess per zone lru lock is a lot more work since then we need to handle
> > ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
> > lingo.
>
> Making the LRU per resource manager is the long term goal, yes.
>
> My key idea so far is that we make bo->mem a pointer and then move the
> LRU handling into the resource object instead of the BO.
>
> The resource object then just references the BO and so that we can
> figure out which BO to evict or which fence to wait for to free up a
> resource.

Hm yeah that could work out fairly nicely. Both from locking but also
refcounting pov. And maybe we could then use entirely free-standing
mem objects instead of ghost objects? Since that's a part of ttm I
don't grok and it always looks a bit like a hack to me. So for these
ghost mem objects you'd only need the lru + dma_fence_wait (can grab a
fence ref under the lru and then drop lru lock for that) for eviction,
no dma_resv_lock.
-Daniel
>
> Regards,
> Christian.
>
> > -Daniel
> >
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
> >>   drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
> >>   drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
> >>   drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
> >>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
> >>   drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
> >>   include/drm/ttm/ttm_bo_driver.h        |  4 +--
> >>   include/drm/ttm/ttm_device.h           |  4 +--
> >>   8 files changed, 43 insertions(+), 56 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 9d19078246c8..ae18c0e32347 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >>      struct amdgpu_vm_bo_base *bo_base;
> >>
> >>      if (vm->bulk_moveable) {
> >> -            spin_lock(&ttm_glob.lru_lock);
> >> +            spin_lock(&adev->mman.bdev.lru_lock);
> >>              ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> >> -            spin_unlock(&ttm_glob.lru_lock);
> >> +            spin_unlock(&adev->mman.bdev.lru_lock);
> >>              return;
> >>      }
> >>
> >>      memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> >>
> >> -    spin_lock(&ttm_glob.lru_lock);
> >> +    spin_lock(&adev->mman.bdev.lru_lock);
> >>      list_for_each_entry(bo_base, &vm->idle, vm_status) {
> >>              struct amdgpu_bo *bo = bo_base->bo;
> >>
> >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >>                                              &bo->shadow->tbo.mem,
> >>                                              &vm->lru_bulk_move);
> >>      }
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >> +    spin_unlock(&adev->mman.bdev.lru_lock);
> >>
> >>      vm->bulk_moveable = true;
> >>   }
> >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> >> index f5845c96d414..b19f2f00b215 100644
> >> --- a/drivers/gpu/drm/qxl/qxl_release.c
> >> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> >>                     release->id | 0xf0000000, release->base.seqno);
> >>      trace_dma_fence_emit(&release->base);
> >>
> >> -    spin_lock(&ttm_glob.lru_lock);
> >> -
> >>      list_for_each_entry(entry, &release->bos, head) {
> >>              bo = entry->bo;
> >>
> >>              dma_resv_add_shared_fence(bo->base.resv, &release->base);
> >> -            ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> +            ttm_bo_move_to_lru_tail_unlocked(bo);
> >>              dma_resv_unlock(bo->base.resv);
> >>      }
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >>      ww_acquire_fini(&release->ticket);
> >>   }
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index a1be88be357b..a8103c8718a3 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> >>               * reference it any more. The only tricky case is the trylock on
> >>               * the resv object while holding the lru_lock.
> >>               */
> >> -            spin_lock(&ttm_glob.lru_lock);
> >> +            spin_lock(&bo->bdev->lru_lock);
> >>              bo->base.resv = &bo->base._resv;
> >> -            spin_unlock(&ttm_glob.lru_lock);
> >> +            spin_unlock(&bo->bdev->lru_lock);
> >>      }
> >>
> >>      return r;
> >> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>
> >>              if (unlock_resv)
> >>                      dma_resv_unlock(bo->base.resv);
> >> -            spin_unlock(&ttm_glob.lru_lock);
> >> +            spin_unlock(&bo->bdev->lru_lock);
> >>
> >>              lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> >>                                               30 * HZ);
> >> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>              else if (lret == 0)
> >>                      return -EBUSY;
> >>
> >> -            spin_lock(&ttm_glob.lru_lock);
> >> +            spin_lock(&bo->bdev->lru_lock);
> >>              if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> >>                      /*
> >>                       * We raced, and lost, someone else holds the reservation now,
> >> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>                       * delayed destruction would succeed, so just return success
> >>                       * here.
> >>                       */
> >> -                    spin_unlock(&ttm_glob.lru_lock);
> >> +                    spin_unlock(&bo->bdev->lru_lock);
> >>                      return 0;
> >>              }
> >>              ret = 0;
> >> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>      if (ret || unlikely(list_empty(&bo->ddestroy))) {
> >>              if (unlock_resv)
> >>                      dma_resv_unlock(bo->base.resv);
> >> -            spin_unlock(&ttm_glob.lru_lock);
> >> +            spin_unlock(&bo->bdev->lru_lock);
> >>              return ret;
> >>      }
> >>
> >>      ttm_bo_del_from_lru(bo);
> >>      list_del_init(&bo->ddestroy);
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >> +    spin_unlock(&bo->bdev->lru_lock);
> >>      ttm_bo_cleanup_memtype_use(bo);
> >>
> >>      if (unlock_resv)
> >> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>    */
> >>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >>   {
> >> -    struct ttm_global *glob = &ttm_glob;
> >>      struct list_head removed;
> >>      bool empty;
> >>
> >>      INIT_LIST_HEAD(&removed);
> >>
> >> -    spin_lock(&glob->lru_lock);
> >> +    spin_lock(&bdev->lru_lock);
> >>      while (!list_empty(&bdev->ddestroy)) {
> >>              struct ttm_buffer_object *bo;
> >>
> >> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >>                      continue;
> >>
> >>              if (remove_all || bo->base.resv != &bo->base._resv) {
> >> -                    spin_unlock(&glob->lru_lock);
> >> +                    spin_unlock(&bdev->lru_lock);
> >>                      dma_resv_lock(bo->base.resv, NULL);
> >>
> >> -                    spin_lock(&glob->lru_lock);
> >> +                    spin_lock(&bdev->lru_lock);
> >>                      ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >>
> >>              } else if (dma_resv_trylock(bo->base.resv)) {
> >>                      ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >>              } else {
> >> -                    spin_unlock(&glob->lru_lock);
> >> +                    spin_unlock(&bdev->lru_lock);
> >>              }
> >>
> >>              ttm_bo_put(bo);
> >> -            spin_lock(&glob->lru_lock);
> >> +            spin_lock(&bdev->lru_lock);
> >>      }
> >>      list_splice_tail(&removed, &bdev->ddestroy);
> >>      empty = list_empty(&bdev->ddestroy);
> >> -    spin_unlock(&glob->lru_lock);
> >> +    spin_unlock(&bdev->lru_lock);
> >>
> >>      return empty;
> >>   }
> >> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
> >>              ttm_bo_flush_all_fences(bo);
> >>              bo->deleted = true;
> >>
> >> -            spin_lock(&ttm_glob.lru_lock);
> >> +            spin_lock(&bo->bdev->lru_lock);
> >>
> >>              /*
> >>               * Make pinned bos immediately available to
> >> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
> >>
> >>              kref_init(&bo->kref);
> >>              list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> >> -            spin_unlock(&ttm_glob.lru_lock);
> >> +            spin_unlock(&bo->bdev->lru_lock);
> >>
> >>              schedule_delayed_work(&bdev->wq,
> >>                                    ((HZ / 100) < 1) ? 1 : HZ / 100);
> >>              return;
> >>      }
> >>
> >> -    spin_lock(&ttm_glob.lru_lock);
> >> +    spin_lock(&bo->bdev->lru_lock);
> >>      ttm_bo_del_from_lru(bo);
> >>      list_del(&bo->ddestroy);
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >> +    spin_unlock(&bo->bdev->lru_lock);
> >>
> >>      ttm_bo_cleanup_memtype_use(bo);
> >>      dma_resv_unlock(bo->base.resv);
> >> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >>      unsigned i;
> >>      int ret;
> >>
> >> -    spin_lock(&ttm_glob.lru_lock);
> >> +    spin_lock(&bo->bdev->lru_lock);
> >>      for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >>              list_for_each_entry(bo, &man->lru[i], lru) {
> >>                      bool busy;
> >> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >>      if (!bo) {
> >>              if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> >>                      busy_bo = NULL;
> >> -            spin_unlock(&ttm_glob.lru_lock);
> >> +            spin_unlock(&bo->bdev->lru_lock);
> >>              ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> >>              if (busy_bo)
> >>                      ttm_bo_put(busy_bo);
> >> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >>              return ret;
> >>      }
> >>
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >> +    spin_unlock(&bo->bdev->lru_lock);
> >>
> >>      ret = ttm_bo_evict(bo, ctx);
> >>      if (locked)
> >> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> >>      mem->mem_type = place->mem_type;
> >>      mem->placement = place->flags;
> >>
> >> -    spin_lock(&ttm_glob.lru_lock);
> >> +    spin_lock(&bo->bdev->lru_lock);
> >>      ttm_bo_move_to_lru_tail(bo, mem, NULL);
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >> -
> >> +    spin_unlock(&bo->bdev->lru_lock);
> >>      return 0;
> >>   }
> >>
> >> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> >>   int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >>                 gfp_t gfp_flags)
> >>   {
> >> -    struct ttm_global *glob = &ttm_glob;
> >>      bool locked;
> >>      int ret;
> >>
> >> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >>
> >>      ttm_bo_del_from_lru(bo);
> >>      /* TODO: Cleanup the locking */
> >> -    spin_unlock(&glob->lru_lock);
> >> +    spin_unlock(&bo->bdev->lru_lock);
> >>
> >>      /*
> >>       * Move to system cached
> >> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> >> index 2c280fb1e992..924d892109e8 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_device.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> >> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> >>      ttm_pool_mgr_init(num_pages * 50 / 100);
> >>      ttm_tt_mgr_init();
> >>
> >> -    spin_lock_init(&glob->lru_lock);
> >>      glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
> >>
> >>      if (unlikely(glob->dummy_read_page == NULL)) {
> >> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> >>   long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >>                      gfp_t gfp_flags)
> >>   {
> >> -    struct ttm_global *glob = &ttm_glob;
> >>      struct ttm_resource_manager *man;
> >>      struct ttm_buffer_object *bo;
> >>      unsigned i, j;
> >>      int ret;
> >>
> >> -    spin_lock(&glob->lru_lock);
> >> +    spin_lock(&bdev->lru_lock);
> >>      for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> >>              man = ttm_manager_type(bdev, i);
> >>              if (!man || !man->use_tt)
> >> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >>                      }
> >>              }
> >>      }
> >> -    spin_unlock(&glob->lru_lock);
> >> +    spin_unlock(&bdev->lru_lock);
> >>      return 0;
> >>   }
> >>   EXPORT_SYMBOL(ttm_device_swapout);
> >> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
> >>
> >>      bdev->vma_manager = vma_manager;
> >>      INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> >> +    spin_lock_init(&bdev->lru_lock);
> >>      INIT_LIST_HEAD(&bdev->ddestroy);
> >>      bdev->dev_mapping = mapping;
> >>      mutex_lock(&ttm_global_mutex);
> >> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
> >>
> >>   void ttm_device_fini(struct ttm_device *bdev)
> >>   {
> >> -    struct ttm_global *glob = &ttm_glob;
> >>      struct ttm_resource_manager *man;
> >>      unsigned i;
> >>
> >> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> >>      if (ttm_bo_delayed_delete(bdev, true))
> >>              pr_debug("Delayed destroy list was clean\n");
> >>
> >> -    spin_lock(&glob->lru_lock);
> >> +    spin_lock(&bdev->lru_lock);
> >>      for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> >>              if (list_empty(&man->lru[0]))
> >>                      pr_debug("Swap list %d was clean\n", i);
> >> -    spin_unlock(&glob->lru_lock);
> >> +    spin_unlock(&bdev->lru_lock);
> >>
> >>      ttm_pool_fini(&bdev->pool);
> >>      ttm_global_release();
> >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> index 690ab97d52b7..071c48d672c6 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> >>      if (list_empty(list))
> >>              return;
> >>
> >> -    spin_lock(&ttm_glob.lru_lock);
> >>      list_for_each_entry(entry, list, head) {
> >>              struct ttm_buffer_object *bo = entry->bo;
> >>
> >> -            ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> +            ttm_bo_move_to_lru_tail_unlocked(bo);
> >>              dma_resv_unlock(bo->base.resv);
> >>      }
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >>
> >>      if (ticket)
> >>              ww_acquire_fini(ticket);
> >> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >>      if (list_empty(list))
> >>              return;
> >>
> >> -    spin_lock(&ttm_glob.lru_lock);
> >>      list_for_each_entry(entry, list, head) {
> >>              struct ttm_buffer_object *bo = entry->bo;
> >>
> >> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >>                      dma_resv_add_shared_fence(bo->base.resv, fence);
> >>              else
> >>                      dma_resv_add_excl_fence(bo->base.resv, fence);
> >> -            ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> +            ttm_bo_move_to_lru_tail_unlocked(bo);
> >>              dma_resv_unlock(bo->base.resv);
> >>      }
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >>      if (ticket)
> >>              ww_acquire_fini(ticket);
> >>   }
> >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> >> index ed1672a9f332..04f2eef653ab 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> >> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >>              .no_wait_gpu = false,
> >>              .force_alloc = true
> >>      };
> >> -    struct ttm_global *glob = &ttm_glob;
> >>      struct dma_fence *fence;
> >>      int ret;
> >>      unsigned i;
> >> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >>       * Can't use standard list traversal since we're unlocking.
> >>       */
> >>
> >> -    spin_lock(&glob->lru_lock);
> >> +    spin_lock(&bdev->lru_lock);
> >>      for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >>              while (!list_empty(&man->lru[i])) {
> >> -                    spin_unlock(&glob->lru_lock);
> >> +                    spin_unlock(&bdev->lru_lock);
> >>                      ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> >>                                                NULL);
> >>                      if (ret)
> >>                              return ret;
> >> -                    spin_lock(&glob->lru_lock);
> >> +                    spin_lock(&bdev->lru_lock);
> >>              }
> >>      }
> >> -    spin_unlock(&glob->lru_lock);
> >> +    spin_unlock(&bdev->lru_lock);
> >>
> >>      spin_lock(&man->move_lock);
> >>      fence = dma_fence_get(man->move);
> >> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> >> index d007feef7676..dbccac957f8f 100644
> >> --- a/include/drm/ttm/ttm_bo_driver.h
> >> +++ b/include/drm/ttm/ttm_bo_driver.h
> >> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> >>   static inline void
> >>   ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> >>   {
> >> -    spin_lock(&ttm_glob.lru_lock);
> >> +    spin_lock(&bo->bdev->lru_lock);
> >>      ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> -    spin_unlock(&ttm_glob.lru_lock);
> >> +    spin_unlock(&bo->bdev->lru_lock);
> >>   }
> >>
> >>   static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> >> index cda6efb4c34b..bae56d29e8ff 100644
> >> --- a/include/drm/ttm/ttm_device.h
> >> +++ b/include/drm/ttm/ttm_device.h
> >> @@ -56,7 +56,6 @@ extern struct ttm_global {
> >>       */
> >>
> >>      struct page *dummy_read_page;
> >> -    spinlock_t lru_lock;
> >>
> >>      /**
> >>       * Protected by ttm_global_mutex.
> >> @@ -277,8 +276,9 @@ struct ttm_device {
> >>      struct ttm_pool pool;
> >>
> >>      /*
> >> -     * Protected by the global:lru lock.
> >> +     * Protection for the per manager LRU and ddestroy lists.
> >>       */
> >> +    spinlock_t lru_lock;
> >>      struct list_head ddestroy;
> >>
> >>      /*
> >> --
> >> 2.25.1
> >>
> >> _______________________________________________
> >> dri-devel mailing list
> >> dri-devel@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-16 12:05       ` Daniel Vetter
@ 2021-03-16 15:13         ` Christian König
  0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-03-16 15:13 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Huang Rui, dri-devel



Am 16.03.21 um 13:05 schrieb Daniel Vetter:
> On Tue, Mar 16, 2021 at 1:03 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 16.03.21 um 10:35 schrieb Daniel Vetter:
>>> On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
>>>> Instead of having a global lock.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> I guess per zone lru lock is a lot more work since then we need to handle
>>> ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
>>> lingo.
>> Making the LRU per resource manager is the long term goal, yes.
>>
>> My key idea so far is that we make bo->mem a pointer and then move the
>> LRU handling into the resource object instead of the BO.
>>
>> The resource object then just references the BO and so that we can
>> figure out which BO to evict or which fence to wait for to free up a
>> resource.
> Hm yeah that could work out fairly nicely. Both from locking but also
> refcounting pov. And maybe we could then use entirely free-standing
> mem objects instead of ghost objects? Since that's a part of ttm I
> don't grok and it always looks a bit like a hack to me. So for these
> ghost mem objects you'd only need the lru + dma_fence_wait (can grab a
> fence ref under the lru and then drop lru lock for that) for eviction,
> no dma_resv_lock.

Exactly that's the background here, yes.

Those ghost objects are more than just a bit of a hack and result in 
tons of checks in the driver if a BO is really a BO or a ghost.

Moving all that handling into the resource objects not only allows us to 
remove that, but also makes things like delayed delete work out pretty 
nicely.

Christian.

> -Daniel
>> Regards,
>> Christian.
>>
>>> -Daniel
>>>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
>>>>    drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
>>>>    drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
>>>>    drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
>>>>    drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
>>>>    drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
>>>>    include/drm/ttm/ttm_bo_driver.h        |  4 +--
>>>>    include/drm/ttm/ttm_device.h           |  4 +--
>>>>    8 files changed, 43 insertions(+), 56 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 9d19078246c8..ae18c0e32347 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>>>       struct amdgpu_vm_bo_base *bo_base;
>>>>
>>>>       if (vm->bulk_moveable) {
>>>> -            spin_lock(&ttm_glob.lru_lock);
>>>> +            spin_lock(&adev->mman.bdev.lru_lock);
>>>>               ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>>>> -            spin_unlock(&ttm_glob.lru_lock);
>>>> +            spin_unlock(&adev->mman.bdev.lru_lock);
>>>>               return;
>>>>       }
>>>>
>>>>       memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>>>
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>> +    spin_lock(&adev->mman.bdev.lru_lock);
>>>>       list_for_each_entry(bo_base, &vm->idle, vm_status) {
>>>>               struct amdgpu_bo *bo = bo_base->bo;
>>>>
>>>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>>>                                               &bo->shadow->tbo.mem,
>>>>                                               &vm->lru_bulk_move);
>>>>       }
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>> +    spin_unlock(&adev->mman.bdev.lru_lock);
>>>>
>>>>       vm->bulk_moveable = true;
>>>>    }
>>>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>>>> index f5845c96d414..b19f2f00b215 100644
>>>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>>>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>>>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>>>>                      release->id | 0xf0000000, release->base.seqno);
>>>>       trace_dma_fence_emit(&release->base);
>>>>
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>> -
>>>>       list_for_each_entry(entry, &release->bos, head) {
>>>>               bo = entry->bo;
>>>>
>>>>               dma_resv_add_shared_fence(bo->base.resv, &release->base);
>>>> -            ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> +            ttm_bo_move_to_lru_tail_unlocked(bo);
>>>>               dma_resv_unlock(bo->base.resv);
>>>>       }
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>>       ww_acquire_fini(&release->ticket);
>>>>    }
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index a1be88be357b..a8103c8718a3 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>>>>                * reference it any more. The only tricky case is the trylock on
>>>>                * the resv object while holding the lru_lock.
>>>>                */
>>>> -            spin_lock(&ttm_glob.lru_lock);
>>>> +            spin_lock(&bo->bdev->lru_lock);
>>>>               bo->base.resv = &bo->base._resv;
>>>> -            spin_unlock(&ttm_glob.lru_lock);
>>>> +            spin_unlock(&bo->bdev->lru_lock);
>>>>       }
>>>>
>>>>       return r;
>>>> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>>
>>>>               if (unlock_resv)
>>>>                       dma_resv_unlock(bo->base.resv);
>>>> -            spin_unlock(&ttm_glob.lru_lock);
>>>> +            spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>>               lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>>>>                                                30 * HZ);
>>>> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>>               else if (lret == 0)
>>>>                       return -EBUSY;
>>>>
>>>> -            spin_lock(&ttm_glob.lru_lock);
>>>> +            spin_lock(&bo->bdev->lru_lock);
>>>>               if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>>>>                       /*
>>>>                        * We raced, and lost, someone else holds the reservation now,
>>>> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>>                        * delayed destruction would succeed, so just return success
>>>>                        * here.
>>>>                        */
>>>> -                    spin_unlock(&ttm_glob.lru_lock);
>>>> +                    spin_unlock(&bo->bdev->lru_lock);
>>>>                       return 0;
>>>>               }
>>>>               ret = 0;
>>>> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>>       if (ret || unlikely(list_empty(&bo->ddestroy))) {
>>>>               if (unlock_resv)
>>>>                       dma_resv_unlock(bo->base.resv);
>>>> -            spin_unlock(&ttm_glob.lru_lock);
>>>> +            spin_unlock(&bo->bdev->lru_lock);
>>>>               return ret;
>>>>       }
>>>>
>>>>       ttm_bo_del_from_lru(bo);
>>>>       list_del_init(&bo->ddestroy);
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>> +    spin_unlock(&bo->bdev->lru_lock);
>>>>       ttm_bo_cleanup_memtype_use(bo);
>>>>
>>>>       if (unlock_resv)
>>>> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>>     */
>>>>    bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>>>    {
>>>> -    struct ttm_global *glob = &ttm_glob;
>>>>       struct list_head removed;
>>>>       bool empty;
>>>>
>>>>       INIT_LIST_HEAD(&removed);
>>>>
>>>> -    spin_lock(&glob->lru_lock);
>>>> +    spin_lock(&bdev->lru_lock);
>>>>       while (!list_empty(&bdev->ddestroy)) {
>>>>               struct ttm_buffer_object *bo;
>>>>
>>>> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>>>                       continue;
>>>>
>>>>               if (remove_all || bo->base.resv != &bo->base._resv) {
>>>> -                    spin_unlock(&glob->lru_lock);
>>>> +                    spin_unlock(&bdev->lru_lock);
>>>>                       dma_resv_lock(bo->base.resv, NULL);
>>>>
>>>> -                    spin_lock(&glob->lru_lock);
>>>> +                    spin_lock(&bdev->lru_lock);
>>>>                       ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>>>
>>>>               } else if (dma_resv_trylock(bo->base.resv)) {
>>>>                       ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>>>               } else {
>>>> -                    spin_unlock(&glob->lru_lock);
>>>> +                    spin_unlock(&bdev->lru_lock);
>>>>               }
>>>>
>>>>               ttm_bo_put(bo);
>>>> -            spin_lock(&glob->lru_lock);
>>>> +            spin_lock(&bdev->lru_lock);
>>>>       }
>>>>       list_splice_tail(&removed, &bdev->ddestroy);
>>>>       empty = list_empty(&bdev->ddestroy);
>>>> -    spin_unlock(&glob->lru_lock);
>>>> +    spin_unlock(&bdev->lru_lock);
>>>>
>>>>       return empty;
>>>>    }
>>>> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
>>>>               ttm_bo_flush_all_fences(bo);
>>>>               bo->deleted = true;
>>>>
>>>> -            spin_lock(&ttm_glob.lru_lock);
>>>> +            spin_lock(&bo->bdev->lru_lock);
>>>>
>>>>               /*
>>>>                * Make pinned bos immediately available to
>>>> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>>>>
>>>>               kref_init(&bo->kref);
>>>>               list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>>>> -            spin_unlock(&ttm_glob.lru_lock);
>>>> +            spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>>               schedule_delayed_work(&bdev->wq,
>>>>                                     ((HZ / 100) < 1) ? 1 : HZ / 100);
>>>>               return;
>>>>       }
>>>>
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>> +    spin_lock(&bo->bdev->lru_lock);
>>>>       ttm_bo_del_from_lru(bo);
>>>>       list_del(&bo->ddestroy);
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>> +    spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>>       ttm_bo_cleanup_memtype_use(bo);
>>>>       dma_resv_unlock(bo->base.resv);
>>>> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>>       unsigned i;
>>>>       int ret;
>>>>
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>> +    spin_lock(&bo->bdev->lru_lock);
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>               list_for_each_entry(bo, &man->lru[i], lru) {
>>>>                       bool busy;
>>>> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>>       if (!bo) {
>>>>               if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>>>>                       busy_bo = NULL;
>>>> -            spin_unlock(&ttm_glob.lru_lock);
>>>> +            spin_unlock(&bo->bdev->lru_lock);
>>>>               ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>>>>               if (busy_bo)
>>>>                       ttm_bo_put(busy_bo);
>>>> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>>               return ret;
>>>>       }
>>>>
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>> +    spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>>       ret = ttm_bo_evict(bo, ctx);
>>>>       if (locked)
>>>> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>>>>       mem->mem_type = place->mem_type;
>>>>       mem->placement = place->flags;
>>>>
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>> +    spin_lock(&bo->bdev->lru_lock);
>>>>       ttm_bo_move_to_lru_tail(bo, mem, NULL);
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>> -
>>>> +    spin_unlock(&bo->bdev->lru_lock);
>>>>       return 0;
>>>>    }
>>>>
>>>> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>>>>    int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>>>                  gfp_t gfp_flags)
>>>>    {
>>>> -    struct ttm_global *glob = &ttm_glob;
>>>>       bool locked;
>>>>       int ret;
>>>>
>>>> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>>>
>>>>       ttm_bo_del_from_lru(bo);
>>>>       /* TODO: Cleanup the locking */
>>>> -    spin_unlock(&glob->lru_lock);
>>>> +    spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>>       /*
>>>>        * Move to system cached
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>>>> index 2c280fb1e992..924d892109e8 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>>>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>>>>       ttm_pool_mgr_init(num_pages * 50 / 100);
>>>>       ttm_tt_mgr_init();
>>>>
>>>> -    spin_lock_init(&glob->lru_lock);
>>>>       glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>>>
>>>>       if (unlikely(glob->dummy_read_page == NULL)) {
>>>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>>>>    long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>>>                       gfp_t gfp_flags)
>>>>    {
>>>> -    struct ttm_global *glob = &ttm_glob;
>>>>       struct ttm_resource_manager *man;
>>>>       struct ttm_buffer_object *bo;
>>>>       unsigned i, j;
>>>>       int ret;
>>>>
>>>> -    spin_lock(&glob->lru_lock);
>>>> +    spin_lock(&bdev->lru_lock);
>>>>       for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>>>>               man = ttm_manager_type(bdev, i);
>>>>               if (!man || !man->use_tt)
>>>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>>>                       }
>>>>               }
>>>>       }
>>>> -    spin_unlock(&glob->lru_lock);
>>>> +    spin_unlock(&bdev->lru_lock);
>>>>       return 0;
>>>>    }
>>>>    EXPORT_SYMBOL(ttm_device_swapout);
>>>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>>>
>>>>       bdev->vma_manager = vma_manager;
>>>>       INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>>>> +    spin_lock_init(&bdev->lru_lock);
>>>>       INIT_LIST_HEAD(&bdev->ddestroy);
>>>>       bdev->dev_mapping = mapping;
>>>>       mutex_lock(&ttm_global_mutex);
>>>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>>>
>>>>    void ttm_device_fini(struct ttm_device *bdev)
>>>>    {
>>>> -    struct ttm_global *glob = &ttm_glob;
>>>>       struct ttm_resource_manager *man;
>>>>       unsigned i;
>>>>
>>>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>>>>       if (ttm_bo_delayed_delete(bdev, true))
>>>>               pr_debug("Delayed destroy list was clean\n");
>>>>
>>>> -    spin_lock(&glob->lru_lock);
>>>> +    spin_lock(&bdev->lru_lock);
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>>>>               if (list_empty(&man->lru[0]))
>>>>                       pr_debug("Swap list %d was clean\n", i);
>>>> -    spin_unlock(&glob->lru_lock);
>>>> +    spin_unlock(&bdev->lru_lock);
>>>>
>>>>       ttm_pool_fini(&bdev->pool);
>>>>       ttm_global_release();
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> index 690ab97d52b7..071c48d672c6 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>>>>       if (list_empty(list))
>>>>               return;
>>>>
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>>       list_for_each_entry(entry, list, head) {
>>>>               struct ttm_buffer_object *bo = entry->bo;
>>>>
>>>> -            ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> +            ttm_bo_move_to_lru_tail_unlocked(bo);
>>>>               dma_resv_unlock(bo->base.resv);
>>>>       }
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>>
>>>>       if (ticket)
>>>>               ww_acquire_fini(ticket);
>>>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>>>       if (list_empty(list))
>>>>               return;
>>>>
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>>       list_for_each_entry(entry, list, head) {
>>>>               struct ttm_buffer_object *bo = entry->bo;
>>>>
>>>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>>>                       dma_resv_add_shared_fence(bo->base.resv, fence);
>>>>               else
>>>>                       dma_resv_add_excl_fence(bo->base.resv, fence);
>>>> -            ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> +            ttm_bo_move_to_lru_tail_unlocked(bo);
>>>>               dma_resv_unlock(bo->base.resv);
>>>>       }
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>>       if (ticket)
>>>>               ww_acquire_fini(ticket);
>>>>    }
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> index ed1672a9f332..04f2eef653ab 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>>>               .no_wait_gpu = false,
>>>>               .force_alloc = true
>>>>       };
>>>> -    struct ttm_global *glob = &ttm_glob;
>>>>       struct dma_fence *fence;
>>>>       int ret;
>>>>       unsigned i;
>>>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>>>        * Can't use standard list traversal since we're unlocking.
>>>>        */
>>>>
>>>> -    spin_lock(&glob->lru_lock);
>>>> +    spin_lock(&bdev->lru_lock);
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>               while (!list_empty(&man->lru[i])) {
>>>> -                    spin_unlock(&glob->lru_lock);
>>>> +                    spin_unlock(&bdev->lru_lock);
>>>>                       ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>>>>                                                 NULL);
>>>>                       if (ret)
>>>>                               return ret;
>>>> -                    spin_lock(&glob->lru_lock);
>>>> +                    spin_lock(&bdev->lru_lock);
>>>>               }
>>>>       }
>>>> -    spin_unlock(&glob->lru_lock);
>>>> +    spin_unlock(&bdev->lru_lock);
>>>>
>>>>       spin_lock(&man->move_lock);
>>>>       fence = dma_fence_get(man->move);
>>>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>>>> index d007feef7676..dbccac957f8f 100644
>>>> --- a/include/drm/ttm/ttm_bo_driver.h
>>>> +++ b/include/drm/ttm/ttm_bo_driver.h
>>>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>>>>    static inline void
>>>>    ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>>>>    {
>>>> -    spin_lock(&ttm_glob.lru_lock);
>>>> +    spin_lock(&bo->bdev->lru_lock);
>>>>       ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> -    spin_unlock(&ttm_glob.lru_lock);
>>>> +    spin_unlock(&bo->bdev->lru_lock);
>>>>    }
>>>>
>>>>    static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>>>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>>>> index cda6efb4c34b..bae56d29e8ff 100644
>>>> --- a/include/drm/ttm/ttm_device.h
>>>> +++ b/include/drm/ttm/ttm_device.h
>>>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>>>>        */
>>>>
>>>>       struct page *dummy_read_page;
>>>> -    spinlock_t lru_lock;
>>>>
>>>>       /**
>>>>        * Protected by ttm_global_mutex.
>>>> @@ -277,8 +276,9 @@ struct ttm_device {
>>>>       struct ttm_pool pool;
>>>>
>>>>       /*
>>>> -     * Protected by the global:lru lock.
>>>> +     * Protection for the per manager LRU and ddestroy lists.
>>>>        */
>>>> +    spinlock_t lru_lock;
>>>>       struct list_head ddestroy;
>>>>
>>>>       /*
>>>> --
>>>> 2.25.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around
  2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-03-19  9:41   ` kernel test robot
  2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-19  9:41 UTC (permalink / raw)
  To: Christian König, dri-devel; +Cc: clang-built-linux, ray.huang, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3513 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on next-20210319]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project fcc1ce00931751ac02498986feb37744e9ace8de)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
        git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
   drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead


vim +110 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
   109	long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 > 110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36289 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-03-19  9:41   ` kernel test robot
  0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-19  9:41 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3590 bytes --]

Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on next-20210319]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project fcc1ce00931751ac02498986feb37744e9ace8de)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
        git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
   drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
   drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead


vim +110 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
   109	long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 > 110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36289 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-02-11 13:29 Christian König
  0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-02-11 13:29 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang

Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c        | 57 ++++++++---------------------
 drivers/gpu/drm/ttm/ttm_device.c    | 29 +++++++++++++++
 drivers/gpu/drm/ttm/ttm_tt.c        |  2 +-
 drivers/gpu/drm/vmwgfx/ttm_memory.c |  3 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 include/drm/ttm/ttm_bo_api.h        |  3 +-
 include/drm/ttm/ttm_device.h        |  2 +
 7 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e38102282fd5..d33578a112b4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1184,56 +1184,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_wait);
 
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags)
 {
 	struct ttm_global *glob = &ttm_glob;
-	struct ttm_buffer_object *bo;
-	int ret = -EBUSY;
 	bool locked;
-	unsigned i;
-
-	spin_lock(&glob->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
-							    NULL))
-				continue;
-
-			if (!ttm_bo_get_unless_zero(bo)) {
-				if (locked)
-					dma_resv_unlock(bo->base.resv);
-				continue;
-			}
+	int ret;
 
-			ret = 0;
-			break;
-		}
-		if (!ret)
-			break;
-	}
+	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+		return -EBUSY;
 
-	if (ret) {
-		spin_unlock(&glob->lru_lock);
-		return ret;
+	if (!ttm_bo_get_unless_zero(bo)) {
+		if (locked)
+			dma_resv_unlock(bo->base.resv);
+		return -EBUSY;
 	}
 
 	if (bo->deleted) {
-		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+		ttm_bo_cleanup_refs(bo, false, false, locked);
 		ttm_bo_put(bo);
-		return ret;
+		return 0;
 	}
 
 	ttm_bo_del_from_lru(bo);
+	/* TODO: Cleanup the locking */
 	spin_unlock(&glob->lru_lock);
 
-	/**
+	/*
 	 * Move to system cached
 	 */
-
 	if (bo->mem.mem_type != TTM_PL_SYSTEM) {
 		struct ttm_operation_ctx ctx = { false, false };
 		struct ttm_resource evict_mem;
@@ -1253,29 +1232,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 		}
 	}
 
-	/**
+	/*
 	 * Make sure BO is idle.
 	 */
-
 	ret = ttm_bo_wait(bo, false, false);
 	if (unlikely(ret != 0))
 		goto out;
 
 	ttm_bo_unmap_virtual(bo);
 
-	/**
+	/*
 	 * Swap out. Buffer will be swapped in again as soon as
 	 * anyone tries to access a ttm page.
 	 */
-
 	if (bo->bdev->funcs->swap_notify)
 		bo->bdev->funcs->swap_notify(bo);
 
 	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
 out:
 
-	/**
-	 *
+	/*
 	 * Unreserve without putting on LRU to avoid swapping out an
 	 * already swapped buffer.
 	 */
@@ -1284,7 +1260,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 	ttm_bo_put(bo);
 	return ret;
 }
-EXPORT_SYMBOL(ttm_bo_swapout);
 
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
 	return ret;
 }
 
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+	struct ttm_global *glob = &ttm_glob;
+	struct ttm_buffer_object *bo;
+	unsigned i;
+	int ret;
+
+	spin_lock(&glob->lru_lock);
+	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+			uint32_t num_pages = bo->ttm->num_pages;
+
+			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+			/* ttm_bo_swapout has dropped the lru_lock */
+			if (!ret)
+				return num_pages;
+			if (ret != -EBUSY)
+				return ret;
+		}
+	}
+	spin_unlock(&glob->lru_lock);
+	return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
 static void ttm_init_sysman(struct ttm_device *bdev)
 {
 	struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
 	};
 	int ret;
 
-	ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+	ret = ttm_global_swapout(&ctx, GFP_NOFS);
 	return ret < 0 ? SHRINK_EMPTY : ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
 
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
 
 #include "ttm_memory.h"
 
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
 
 	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
 		spin_unlock(&glob->lock);
-		ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+		ret = ttm_global_swapout(ctx, GFP_KERNEL);
 		spin_lock(&glob->lock);
 		if (unlikely(ret < 0))
 			break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4efed3bf0ef9..01da355b86f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev)
 	vmw_execbuf_release_pinned_bo(dev_priv);
 	vmw_resource_evict_all(dev_priv);
 	vmw_release_device_early(dev_priv);
-	while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
 	if (dev_priv->enable_fb)
 		vmw_fifo_resource_dec(dev_priv);
 	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
 		  const char __user *wbuf, char __user *rbuf,
 		  size_t count, loff_t *f_pos, bool write);
 
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags);
 
 /**
  * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
 	struct delayed_work wq;
 };
 
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
 static inline struct ttm_resource_manager *
 ttm_manager_type(struct ttm_device *bdev, int mem_type)
 {
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-02-10 15:21 Christian König
  0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-02-10 15:21 UTC (permalink / raw)
  To: dri-devel, ray.huang

Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c        | 57 ++++++++---------------------
 drivers/gpu/drm/ttm/ttm_device.c    | 29 +++++++++++++++
 drivers/gpu/drm/ttm/ttm_tt.c        |  2 +-
 drivers/gpu/drm/vmwgfx/ttm_memory.c |  3 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 include/drm/ttm/ttm_bo_api.h        |  3 +-
 include/drm/ttm/ttm_device.h        |  2 +
 7 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e38102282fd5..d33578a112b4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1184,56 +1184,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_wait);
 
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags)
 {
 	struct ttm_global *glob = &ttm_glob;
-	struct ttm_buffer_object *bo;
-	int ret = -EBUSY;
 	bool locked;
-	unsigned i;
-
-	spin_lock(&glob->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
-							    NULL))
-				continue;
-
-			if (!ttm_bo_get_unless_zero(bo)) {
-				if (locked)
-					dma_resv_unlock(bo->base.resv);
-				continue;
-			}
+	int ret;
 
-			ret = 0;
-			break;
-		}
-		if (!ret)
-			break;
-	}
+	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+		return -EBUSY;
 
-	if (ret) {
-		spin_unlock(&glob->lru_lock);
-		return ret;
+	if (!ttm_bo_get_unless_zero(bo)) {
+		if (locked)
+			dma_resv_unlock(bo->base.resv);
+		return -EBUSY;
 	}
 
 	if (bo->deleted) {
-		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+		ttm_bo_cleanup_refs(bo, false, false, locked);
 		ttm_bo_put(bo);
-		return ret;
+		return 0;
 	}
 
 	ttm_bo_del_from_lru(bo);
+	/* TODO: Cleanup the locking */
 	spin_unlock(&glob->lru_lock);
 
-	/**
+	/*
 	 * Move to system cached
 	 */
-
 	if (bo->mem.mem_type != TTM_PL_SYSTEM) {
 		struct ttm_operation_ctx ctx = { false, false };
 		struct ttm_resource evict_mem;
@@ -1253,29 +1232,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 		}
 	}
 
-	/**
+	/*
 	 * Make sure BO is idle.
 	 */
-
 	ret = ttm_bo_wait(bo, false, false);
 	if (unlikely(ret != 0))
 		goto out;
 
 	ttm_bo_unmap_virtual(bo);
 
-	/**
+	/*
 	 * Swap out. Buffer will be swapped in again as soon as
 	 * anyone tries to access a ttm page.
 	 */
-
 	if (bo->bdev->funcs->swap_notify)
 		bo->bdev->funcs->swap_notify(bo);
 
 	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
 out:
 
-	/**
-	 *
+	/*
 	 * Unreserve without putting on LRU to avoid swapping out an
 	 * already swapped buffer.
 	 */
@@ -1284,7 +1260,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 	ttm_bo_put(bo);
 	return ret;
 }
-EXPORT_SYMBOL(ttm_bo_swapout);
 
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
 	return ret;
 }
 
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+	struct ttm_global *glob = &ttm_glob;
+	struct ttm_buffer_object *bo;
+	unsigned i;
+	int ret;
+
+	spin_lock(&glob->lru_lock);
+	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+			uint32_t num_pages = bo->ttm->num_pages;
+
+			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+			/* ttm_bo_swapout has dropped the lru_lock */
+			if (!ret)
+				return num_pages;
+			if (ret != -EBUSY)
+				return ret;
+		}
+	}
+	spin_unlock(&glob->lru_lock);
+	return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
 static void ttm_init_sysman(struct ttm_device *bdev)
 {
 	struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
 	};
 	int ret;
 
-	ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+	ret = ttm_global_swapout(&ctx, GFP_NOFS);
 	return ret < 0 ? SHRINK_EMPTY : ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
 
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
 
 #include "ttm_memory.h"
 
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
 
 	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
 		spin_unlock(&glob->lock);
-		ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+		ret = ttm_global_swapout(ctx, GFP_KERNEL);
 		spin_lock(&glob->lock);
 		if (unlikely(ret < 0))
 			break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4efed3bf0ef9..01da355b86f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev)
 	vmw_execbuf_release_pinned_bo(dev_priv);
 	vmw_resource_evict_all(dev_priv);
 	vmw_release_device_early(dev_priv);
-	while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
 	if (dev_priv->enable_fb)
 		vmw_fifo_resource_dec(dev_priv);
 	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
 		  const char __user *wbuf, char __user *rbuf,
 		  size_t count, loff_t *f_pos, bool write);
 
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags);
 
 /**
  * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
 	struct delayed_work wq;
 };
 
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
 static inline struct ttm_resource_manager *
 ttm_manager_type(struct ttm_device *bdev, int mem_type)
 {
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2021-03-19  9:41 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
2021-03-15 18:54   ` kernel test robot
2021-03-15 18:54     ` kernel test robot
2021-03-15 18:54   ` Matthew Auld
2021-03-15 19:27     ` Christian König
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-15 20:17   ` kernel test robot
2021-03-15 20:17     ` kernel test robot
2021-03-16  9:35   ` Daniel Vetter
2021-03-16 12:03     ` Christian König
2021-03-16 12:05       ` Daniel Vetter
2021-03-16 15:13         ` Christian König
2021-03-15 18:47 ` [PATCH 1/3] drm/ttm: move swapout logic around kernel test robot
2021-03-15 18:47   ` kernel test robot
2021-03-19  9:41 ` kernel test robot
2021-03-19  9:41   ` kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2021-02-11 13:29 Christian König
2021-02-10 15:21 Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.