All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-18 12:47 Christian König
  2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Christian König @ 2021-03-18 12:47 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang, matthew.william.auld

Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.

v2: consistently return int

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c        | 57 ++++++++---------------------
 drivers/gpu/drm/ttm/ttm_device.c    | 29 +++++++++++++++
 drivers/gpu/drm/ttm/ttm_tt.c        |  2 +-
 drivers/gpu/drm/vmwgfx/ttm_memory.c |  3 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 include/drm/ttm/ttm_bo_api.h        |  3 +-
 include/drm/ttm/ttm_device.h        |  2 +
 7 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3c23e863a3f0..66e00b404ec3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1193,56 +1193,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_wait);
 
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags)
 {
 	struct ttm_global *glob = &ttm_glob;
-	struct ttm_buffer_object *bo;
-	int ret = -EBUSY;
 	bool locked;
-	unsigned i;
-
-	spin_lock(&glob->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
-							    NULL))
-				continue;
-
-			if (!ttm_bo_get_unless_zero(bo)) {
-				if (locked)
-					dma_resv_unlock(bo->base.resv);
-				continue;
-			}
+	int ret;
 
-			ret = 0;
-			break;
-		}
-		if (!ret)
-			break;
-	}
+	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+		return -EBUSY;
 
-	if (ret) {
-		spin_unlock(&glob->lru_lock);
-		return ret;
+	if (!ttm_bo_get_unless_zero(bo)) {
+		if (locked)
+			dma_resv_unlock(bo->base.resv);
+		return -EBUSY;
 	}
 
 	if (bo->deleted) {
-		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+		ttm_bo_cleanup_refs(bo, false, false, locked);
 		ttm_bo_put(bo);
-		return ret;
+		return 0;
 	}
 
 	ttm_bo_del_from_lru(bo);
+	/* TODO: Cleanup the locking */
 	spin_unlock(&glob->lru_lock);
 
-	/**
+	/*
 	 * Move to system cached
 	 */
-
 	if (bo->mem.mem_type != TTM_PL_SYSTEM) {
 		struct ttm_operation_ctx ctx = { false, false };
 		struct ttm_resource evict_mem;
@@ -1262,29 +1241,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 		}
 	}
 
-	/**
+	/*
 	 * Make sure BO is idle.
 	 */
-
 	ret = ttm_bo_wait(bo, false, false);
 	if (unlikely(ret != 0))
 		goto out;
 
 	ttm_bo_unmap_virtual(bo);
 
-	/**
+	/*
 	 * Swap out. Buffer will be swapped in again as soon as
 	 * anyone tries to access a ttm page.
 	 */
-
 	if (bo->bdev->funcs->swap_notify)
 		bo->bdev->funcs->swap_notify(bo);
 
 	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
 out:
 
-	/**
-	 *
+	/*
 	 * Unreserve without putting on LRU to avoid swapping out an
 	 * already swapped buffer.
 	 */
@@ -1293,7 +1269,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 	ttm_bo_put(bo);
 	return ret;
 }
-EXPORT_SYMBOL(ttm_bo_swapout);
 
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..b1424189fdfb 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
 	return ret;
 }
 
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+	struct ttm_global *glob = &ttm_glob;
+	struct ttm_buffer_object *bo;
+	unsigned i;
+	int ret;
+
+	spin_lock(&glob->lru_lock);
+	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+			uint32_t num_pages = bo->ttm->num_pages;
+
+			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+			/* ttm_bo_swapout has dropped the lru_lock */
+			if (!ret)
+				return num_pages;
+			if (ret != -EBUSY)
+				return ret;
+		}
+	}
+	spin_unlock(&glob->lru_lock);
+	return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
 static void ttm_init_sysman(struct ttm_device *bdev)
 {
 	struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
 	};
 	int ret;
 
-	ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+	ret = ttm_global_swapout(&ctx, GFP_NOFS);
 	return ret < 0 ? SHRINK_EMPTY : ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
 
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
 
 #include "ttm_memory.h"
 
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
 
 	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
 		spin_unlock(&glob->lock);
-		ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+		ret = ttm_global_swapout(ctx, GFP_KERNEL);
 		spin_lock(&glob->lock);
 		if (unlikely(ret < 0))
 			break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6910111099c8..b991422e156c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
 	vmw_execbuf_release_pinned_bo(dev_priv);
 	vmw_resource_evict_all(dev_priv);
 	vmw_release_device_early(dev_priv);
-	while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
 	if (dev_priv->enable_fb)
 		vmw_fifo_resource_dec(dev_priv);
 	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
 		  const char __user *wbuf, char __user *rbuf,
 		  size_t count, loff_t *f_pos, bool write);
 
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+		   gfp_t gfp_flags);
 
 /**
  * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
 	struct delayed_work wq;
 };
 
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
 static inline struct ttm_resource_manager *
 ttm_manager_type(struct ttm_device *bdev, int mem_type)
 {
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/3] drm/ttm: remove swap LRU v3
  2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
@ 2021-03-18 12:47 ` Christian König
  2021-03-19  4:30   ` Huang Rui
  2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-03-18 12:47 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang, matthew.william.auld

Instead evict round robin from each devices SYSTEM and TT domain.

v2: reorder num_pages access reported by Dan's script
v3: fix rebase fallout, num_pages should be 32bit

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c        | 29 --------------
 drivers/gpu/drm/ttm/ttm_bo_util.c   |  1 -
 drivers/gpu/drm/ttm/ttm_device.c    | 60 +++++++++++++++++++++--------
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 include/drm/ttm/ttm_bo_api.h        |  1 -
 include/drm/ttm/ttm_bo_driver.h     |  1 -
 include/drm/ttm/ttm_device.h        |  7 +---
 7 files changed, 48 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 66e00b404ec3..3673157527ff 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
 {
 	struct ttm_device *bdev = bo->bdev;
 
-	list_del_init(&bo->swap);
 	list_del_init(&bo->lru);
 
 	if (bdev->funcs->del_from_lru_notify)
@@ -105,16 +104,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 
 	man = ttm_manager_type(bdev, mem->mem_type);
 	list_move_tail(&bo->lru, &man->lru[bo->priority]);
-	if (man->use_tt && bo->ttm &&
-	    !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
-				     TTM_PAGE_FLAG_SWAPPED))) {
-		struct list_head *swap;
-
-		swap = &ttm_glob.swap_lru[bo->priority];
-		list_move_tail(&bo->swap, swap);
-	} else {
-		list_del_init(&bo->swap);
-	}
 
 	if (bdev->funcs->del_from_lru_notify)
 		bdev->funcs->del_from_lru_notify(bo);
@@ -129,9 +118,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 			ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
 			break;
 		}
-		if (bo->ttm && !(bo->ttm->page_flags &
-				 (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
-			ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
 	}
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
@@ -169,20 +155,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
 				    &pos->last->lru);
 	}
-
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
-		struct list_head *lru;
-
-		if (!pos->first)
-			continue;
-
-		dma_resv_assert_held(pos->first->base.resv);
-		dma_resv_assert_held(pos->last->base.resv);
-
-		lru = &ttm_glob.swap_lru[i];
-		list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
-	}
 }
 EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
 
@@ -1065,7 +1037,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
 	kref_init(&bo->kref);
 	INIT_LIST_HEAD(&bo->lru);
 	INIT_LIST_HEAD(&bo->ddestroy);
-	INIT_LIST_HEAD(&bo->swap);
 	bo->bdev = bdev;
 	bo->type = type;
 	bo->mem.mem_type = TTM_PL_SYSTEM;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e5819fec4..a2a17c84ceb3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	atomic_inc(&ttm_glob.bo_count);
 	INIT_LIST_HEAD(&fbo->base.ddestroy);
 	INIT_LIST_HEAD(&fbo->base.lru);
-	INIT_LIST_HEAD(&fbo->base.swap);
 	fbo->base.moving = NULL;
 	drm_vma_node_reset(&fbo->base.base.vma_node);
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index b1424189fdfb..2096a0fd9c35 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -67,7 +67,6 @@ static int ttm_global_init(void)
 	unsigned long num_pages;
 	struct sysinfo si;
 	int ret = 0;
-	unsigned i;
 
 	mutex_lock(&ttm_global_mutex);
 	if (++ttm_glob_use_count > 1)
@@ -90,8 +89,6 @@ static int ttm_global_init(void)
 		goto out;
 	}
 
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
-		INIT_LIST_HEAD(&glob->swap_lru[i]);
 	INIT_LIST_HEAD(&glob->device_list);
 	atomic_set(&glob->bo_count, 0);
 
@@ -109,27 +106,60 @@ static int ttm_global_init(void)
 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
 {
 	struct ttm_global *glob = &ttm_glob;
+	struct ttm_device *bdev;
+	int ret = -EBUSY;
+
+	mutex_lock(&ttm_global_mutex);
+	list_for_each_entry(bdev, &glob->device_list, device_list) {
+		ret = ttm_device_swapout(bdev, ctx, gfp_flags);
+		if (ret > 0) {
+			list_move_tail(&bdev->device_list, &glob->device_list);
+			break;
+		}
+	}
+	mutex_unlock(&ttm_global_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+			gfp_t gfp_flags)
+{
+	struct ttm_global *glob = &ttm_glob;
+	struct ttm_resource_manager *man;
 	struct ttm_buffer_object *bo;
-	unsigned i;
+	unsigned i, j;
 	int ret;
 
 	spin_lock(&glob->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			uint32_t num_pages = bo->ttm->num_pages;
-
-			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
-			/* ttm_bo_swapout has dropped the lru_lock */
-			if (!ret)
-				return num_pages;
-			if (ret != -EBUSY)
-				return ret;
+	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
+		man = ttm_manager_type(bdev, i);
+		if (!man || !man->use_tt)
+			continue;
+
+		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
+			list_for_each_entry(bo, &man->lru[j], lru) {
+				uint32_t num_pages;
+
+				if (!bo->ttm ||
+				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
+				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
+					continue;
+
+				num_pages = bo->ttm->num_pages;
+				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+				/* ttm_bo_swapout has dropped the lru_lock */
+				if (!ret)
+					return num_pages;
+				if (ret != -EBUSY)
+					return ret;
+			}
 		}
 	}
 	spin_unlock(&glob->lru_lock);
 	return 0;
 }
-EXPORT_SYMBOL(ttm_global_swapout);
+EXPORT_SYMBOL(ttm_device_swapout);
 
 static void ttm_init_sysman(struct ttm_device *bdev)
 {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index b991422e156c..4e41d8221f06 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
 	vmw_execbuf_release_pinned_bo(dev_priv);
 	vmw_resource_evict_all(dev_priv);
 	vmw_release_device_early(dev_priv);
-	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
+	while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) > 0);
 	if (dev_priv->enable_fb)
 		vmw_fifo_resource_dec(dev_priv);
 	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 5044ac330858..3587f660e8f4 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -144,7 +144,6 @@ struct ttm_buffer_object {
 
 	struct list_head lru;
 	struct list_head ddestroy;
-	struct list_head swap;
 
 	/**
 	 * Members protected by a bo reservation.
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8959c0075cfd..d007feef7676 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
 struct ttm_lru_bulk_move {
 	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
 	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
-	struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
 };
 
 /*
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 6a0b267d4fe6..cda6efb4c34b 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -63,11 +63,6 @@ extern struct ttm_global {
 	 */
 	struct list_head device_list;
 
-	/**
-	 * Protected by the lru_lock.
-	 */
-	struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
-
 	/**
 	 * Internal protection.
 	 */
@@ -298,6 +293,8 @@ struct ttm_device {
 };
 
 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+		       gfp_t gfp_flags);
 
 static inline struct ttm_resource_manager *
 ttm_manager_type(struct ttm_device *bdev, int mem_type)
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
  2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
@ 2021-03-18 12:47 ` Christian König
  2021-03-19  4:32   ` Huang Rui
  2021-03-18 14:43 ` [PATCH 1/3] drm/ttm: move swapout logic around v2 Nirmoy
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-03-18 12:47 UTC (permalink / raw)
  To: dri-devel; +Cc: ray.huang, matthew.william.auld

Instead of having a global lock.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
 drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
 drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
 drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
 drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
 include/drm/ttm/ttm_bo_driver.h        |  4 +--
 include/drm/ttm/ttm_device.h           |  4 +--
 8 files changed, 43 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 	struct amdgpu_vm_bo_base *bo_base;
 
 	if (vm->bulk_moveable) {
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&adev->mman.bdev.lru_lock);
 		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&adev->mman.bdev.lru_lock);
 		return;
 	}
 
 	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&adev->mman.bdev.lru_lock);
 	list_for_each_entry(bo_base, &vm->idle, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 						&bo->shadow->tbo.mem,
 						&vm->lru_bulk_move);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&adev->mman.bdev.lru_lock);
 
 	vm->bulk_moveable = true;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index f5845c96d414..b19f2f00b215 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
 		       release->id | 0xf0000000, release->base.seqno);
 	trace_dma_fence_emit(&release->base);
 
-	spin_lock(&ttm_glob.lru_lock);
-
 	list_for_each_entry(entry, &release->bos, head) {
 		bo = entry->bo;
 
 		dma_resv_add_shared_fence(bo->base.resv, &release->base);
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 	ww_acquire_fini(&release->ticket);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3673157527ff..2d2ac532987e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
 		 * reference it any more. The only tricky case is the trylock on
 		 * the resv object while holding the lru_lock.
 		 */
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 		bo->base.resv = &bo->base._resv;
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 	}
 
 	return r;
@@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 
 		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
 						 30 * HZ);
@@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 		else if (lret == 0)
 			return -EBUSY;
 
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
 			/*
 			 * We raced, and lost, someone else holds the reservation now,
@@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			 * delayed destruction would succeed, so just return success
 			 * here.
 			 */
-			spin_unlock(&ttm_glob.lru_lock);
+			spin_unlock(&bo->bdev->lru_lock);
 			return 0;
 		}
 		ret = 0;
@@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 	if (ret || unlikely(list_empty(&bo->ddestroy))) {
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 		return ret;
 	}
 
 	ttm_bo_del_from_lru(bo);
 	list_del_init(&bo->ddestroy);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
 	if (unlock_resv)
@@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
  */
 bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct list_head removed;
 	bool empty;
 
 	INIT_LIST_HEAD(&removed);
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	while (!list_empty(&bdev->ddestroy)) {
 		struct ttm_buffer_object *bo;
 
@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
 			continue;
 
 		if (remove_all || bo->base.resv != &bo->base._resv) {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 			dma_resv_lock(bo->base.resv, NULL);
 
-			spin_lock(&glob->lru_lock);
+			spin_lock(&bdev->lru_lock);
 			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 
 		} else if (dma_resv_trylock(bo->base.resv)) {
 			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 		} else {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 		}
 
 		ttm_bo_put(bo);
-		spin_lock(&glob->lru_lock);
+		spin_lock(&bdev->lru_lock);
 	}
 	list_splice_tail(&removed, &bdev->ddestroy);
 	empty = list_empty(&bdev->ddestroy);
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	return empty;
 }
@@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
 		ttm_bo_flush_all_fences(bo);
 		bo->deleted = true;
 
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 
 		/*
 		 * Make pinned bos immediately available to
@@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
 
 		kref_init(&bo->kref);
 		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 
 		schedule_delayed_work(&bdev->wq,
 				      ((HZ / 100) < 1) ? 1 : HZ / 100);
 		return;
 	}
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_del_from_lru(bo);
 	list_del(&bo->ddestroy);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	ttm_bo_cleanup_memtype_use(bo);
 	dma_resv_unlock(bo->base.resv);
@@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 	unsigned i;
 	int ret;
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
 			bool busy;
@@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 	if (!bo) {
 		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
 			busy_bo = NULL;
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
 		if (busy_bo)
 			ttm_bo_put(busy_bo);
@@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 		return ret;
 	}
 
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	ret = ttm_bo_evict(bo, ctx);
 	if (locked)
@@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
 	mem->mem_type = place->mem_type;
 	mem->placement = place->flags;
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_move_to_lru_tail(bo, mem, NULL);
-	spin_unlock(&ttm_glob.lru_lock);
-
+	spin_unlock(&bo->bdev->lru_lock);
 	return 0;
 }
 
@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
 int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 		   gfp_t gfp_flags)
 {
-	struct ttm_global *glob = &ttm_glob;
 	bool locked;
 	int ret;
 
@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 
 	ttm_bo_del_from_lru(bo);
 	/* TODO: Cleanup the locking */
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	/*
 	 * Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 2096a0fd9c35..85f6975d9872 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
 	ttm_pool_mgr_init(num_pages * 50 / 100);
 	ttm_tt_mgr_init();
 
-	spin_lock_init(&glob->lru_lock);
 	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
 
 	if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			gfp_t gfp_flags)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct ttm_resource_manager *man;
 	struct ttm_buffer_object *bo;
 	unsigned i, j;
 	int ret;
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
 		man = ttm_manager_type(bdev, i);
 		if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			}
 		}
 	}
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 
 	bdev->vma_manager = vma_manager;
 	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+	spin_lock_init(&bdev->lru_lock);
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	bdev->dev_mapping = mapping;
 	mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
 
 void ttm_device_fini(struct ttm_device *bdev)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct ttm_resource_manager *man;
 	unsigned i;
 
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
 	if (ttm_bo_delayed_delete(bdev, true))
 		pr_debug("Delayed destroy list was clean\n");
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		if (list_empty(&man->lru[0]))
 			pr_debug("Swap list %d was clean\n", i);
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	ttm_pool_fini(&bdev->pool);
 	ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 	if (list_empty(list))
 		return;
 
-	spin_lock(&ttm_glob.lru_lock);
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 
 	if (ticket)
 		ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 	if (list_empty(list))
 		return;
 
-	spin_lock(&ttm_glob.lru_lock);
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 			dma_resv_add_shared_fence(bo->base.resv, fence);
 		else
 			dma_resv_add_excl_fence(bo->base.resv, fence);
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 	if (ticket)
 		ww_acquire_fini(ticket);
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 		.no_wait_gpu = false,
 		.force_alloc = true
 	};
-	struct ttm_global *glob = &ttm_glob;
 	struct dma_fence *fence;
 	int ret;
 	unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 	 * Can't use standard list traversal since we're unlocking.
 	 */
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
 						  NULL);
 			if (ret)
 				return ret;
-			spin_lock(&glob->lru_lock);
+			spin_lock(&bdev->lru_lock);
 		}
 	}
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	spin_lock(&man->move_lock);
 	fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
 static inline void
 ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
 {
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 }
 
 static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
 	 */
 
 	struct page *dummy_read_page;
-	spinlock_t lru_lock;
 
 	/**
 	 * Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
 	struct ttm_pool pool;
 
 	/*
-	 * Protected by the global:lru lock.
+	 * Protection for the per manager LRU and ddestroy lists.
 	 */
+	spinlock_t lru_lock;
 	struct list_head ddestroy;
 
 	/*
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
  2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
  2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
@ 2021-03-18 14:43 ` Nirmoy
  2021-03-18 15:26   ` Christian König
  2021-03-18 15:08   ` kernel test robot
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Nirmoy @ 2021-03-18 14:43 UTC (permalink / raw)
  To: Christian König, dri-devel; +Cc: ray.huang, matthew.william.auld

Hi Christian,

On 3/18/21 1:47 PM, Christian König wrote:
<snip>
>   /**
>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 035bbc044a3b..6a0b267d4fe6 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -297,6 +297,8 @@ struct ttm_device {
>   	struct delayed_work wq;
>   };
>   
> +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);


There is a typo here, long -> int.


Thanks,

Nirmoy


> +
>   static inline struct ttm_resource_manager *
>   ttm_manager_type(struct ttm_device *bdev, int mem_type)
>   {
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
@ 2021-03-18 15:08   ` kernel test robot
  2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-03-18 15:08 UTC (permalink / raw)
  To: Christian König, dri-devel
  Cc: matthew.william.auld, ray.huang, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4552 bytes --]

Hi "Christian,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-m021-20210318 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
        git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
     109 | int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
         |     ^~~~~~~~~~~~~~~~~~
   In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
   include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
     300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
         |      ^~~~~~~~~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:7,
                    from include/asm-generic/bug.h:20,
                    from arch/x86/include/asm/bug.h:93,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from drivers/gpu/drm/ttm/ttm_device.c:30:
   drivers/gpu/drm/ttm/ttm_device.c:132:15: error: conflicting types for 'ttm_global_swapout'
     132 | EXPORT_SYMBOL(ttm_global_swapout);
         |               ^~~~~~~~~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/gpu/drm/ttm/ttm_device.c:132:1: note: in expansion of macro 'EXPORT_SYMBOL'
     132 | EXPORT_SYMBOL(ttm_global_swapout);
         | ^~~~~~~~~~~~~
   In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
   include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
     300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
         |      ^~~~~~~~~~~~~~~~~~


vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
 > 109	int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
   110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 37450 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-18 15:08   ` kernel test robot
  0 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-03-18 15:08 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4654 bytes --]

Hi "Christian,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-m021-20210318 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
        git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
     109 | int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
         |     ^~~~~~~~~~~~~~~~~~
   In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
   include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
     300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
         |      ^~~~~~~~~~~~~~~~~~
   In file included from include/linux/linkage.h:7,
                    from include/linux/kernel.h:7,
                    from include/asm-generic/bug.h:20,
                    from arch/x86/include/asm/bug.h:93,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from drivers/gpu/drm/ttm/ttm_device.c:30:
   drivers/gpu/drm/ttm/ttm_device.c:132:15: error: conflicting types for 'ttm_global_swapout'
     132 | EXPORT_SYMBOL(ttm_global_swapout);
         |               ^~~~~~~~~~~~~~~~~~
   include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
      98 |  extern typeof(sym) sym;       \
         |                     ^~~
   include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
     155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
         |                                  ^~~~~~~~~~~~~~~
   include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
     158 | #define EXPORT_SYMBOL(sym)  _EXPORT_SYMBOL(sym, "")
         |                             ^~~~~~~~~~~~~~
   drivers/gpu/drm/ttm/ttm_device.c:132:1: note: in expansion of macro 'EXPORT_SYMBOL'
     132 | EXPORT_SYMBOL(ttm_global_swapout);
         | ^~~~~~~~~~~~~
   In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
   include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
     300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
         |      ^~~~~~~~~~~~~~~~~~


vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
 > 109	int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
   110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 37450 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 14:43 ` [PATCH 1/3] drm/ttm: move swapout logic around v2 Nirmoy
@ 2021-03-18 15:26   ` Christian König
  2021-03-18 19:46     ` Nirmoy
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-03-18 15:26 UTC (permalink / raw)
  To: Nirmoy, dri-devel; +Cc: ray.huang, matthew.william.auld

Am 18.03.21 um 15:43 schrieb Nirmoy:
> Hi Christian,
>
> On 3/18/21 1:47 PM, Christian König wrote:
> <snip>
>>   /**
>>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index 035bbc044a3b..6a0b267d4fe6 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -297,6 +297,8 @@ struct ttm_device {
>>       struct delayed_work wq;
>>   };
>>   +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t 
>> gfp_flags);
>
>
> There is a typo here, long -> int.

Ah, yes. I missed to add the change to the header file.

Thanks for pointing that out,
Christian.

>
>
> Thanks,
>
> Nirmoy
>
>
>> +
>>   static inline struct ttm_resource_manager *
>>   ttm_manager_type(struct ttm_device *bdev, int mem_type)
>>   {

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
@ 2021-03-18 18:13   ` kernel test robot
  2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-03-18 18:13 UTC (permalink / raw)
  To: Christian König, dri-devel
  Cc: clang-built-linux, ray.huang, kbuild-all, matthew.william.auld

[-- Attachment #1: Type: text/plain, Size: 3143 bytes --]

Hi "Christian,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
        git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
   int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
       ^
   include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
   long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
        ^
   1 error generated.


vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
 > 109	int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
   110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36290 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-18 18:13   ` kernel test robot
  0 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-03-18 18:13 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3222 bytes --]

Hi "Christian,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
        git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
   int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
       ^
   include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
   long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
        ^
   1 error generated.


vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c

   104	
   105	/**
   106	 * A buffer object shrink method that tries to swap out the first
   107	 * buffer object on the global::swap_lru list.
   108	 */
 > 109	int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
   110	{
   111		struct ttm_global *glob = &ttm_glob;
   112		struct ttm_buffer_object *bo;
   113		unsigned i;
   114		int ret;
   115	
   116		spin_lock(&glob->lru_lock);
   117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
   118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
   119				uint32_t num_pages = bo->ttm->num_pages;
   120	
   121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
   122				/* ttm_bo_swapout has dropped the lru_lock */
   123				if (!ret)
   124					return num_pages;
   125				if (ret != -EBUSY)
   126					return ret;
   127			}
   128		}
   129		spin_unlock(&glob->lru_lock);
   130		return 0;
   131	}
   132	EXPORT_SYMBOL(ttm_global_swapout);
   133	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36290 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 15:26   ` Christian König
@ 2021-03-18 19:46     ` Nirmoy
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy @ 2021-03-18 19:46 UTC (permalink / raw)
  To: Christian König, dri-devel; +Cc: ray.huang, matthew.william.auld


On 3/18/21 4:26 PM, Christian König wrote:
> Am 18.03.21 um 15:43 schrieb Nirmoy:
>> Hi Christian,
>>
>> On 3/18/21 1:47 PM, Christian König wrote:
>> <snip>
>>>   /**
>>>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>>> diff --git a/include/drm/ttm/ttm_device.h 
>>> b/include/drm/ttm/ttm_device.h
>>> index 035bbc044a3b..6a0b267d4fe6 100644
>>> --- a/include/drm/ttm/ttm_device.h
>>> +++ b/include/drm/ttm/ttm_device.h
>>> @@ -297,6 +297,8 @@ struct ttm_device {
>>>       struct delayed_work wq;
>>>   };
>>>   +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t 
>>> gfp_flags);
>>
>>
>> There is a typo here, long -> int.
>
> Ah, yes. I missed to add the change to the header file.


I have tested the patch series on qxl and vmwgfx.

With that typo fixed, the series is Tested-by: Nirmoy Das 
<nirmoy.das@amd.com>


>
> Thanks for pointing that out,
> Christian.
>
>>
>>
>> Thanks,
>>
>> Nirmoy
>>
>>
>>> +
>>>   static inline struct ttm_resource_manager *
>>>   ttm_manager_type(struct ttm_device *bdev, int mem_type)
>>>   {
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
                   ` (4 preceding siblings ...)
  2021-03-18 18:13   ` kernel test robot
@ 2021-03-19  4:28 ` Huang Rui
  2021-03-19  9:41 ` Matthew Auld
  6 siblings, 0 replies; 18+ messages in thread
From: Huang Rui @ 2021-03-19  4:28 UTC (permalink / raw)
  To: Christian K�nig; +Cc: matthew.william.auld, dri-devel

On Thu, Mar 18, 2021 at 08:47:17PM +0800, Christian K�nig wrote:
> Move the iteration of the global lru into the new function
> ttm_global_swapout() and use that instead in drivers.
> 
> v2: consistently return int
> 
> Signed-off-by: Christian K?nig <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c        | 57 ++++++++---------------------
>  drivers/gpu/drm/ttm/ttm_device.c    | 29 +++++++++++++++
>  drivers/gpu/drm/ttm/ttm_tt.c        |  2 +-
>  drivers/gpu/drm/vmwgfx/ttm_memory.c |  3 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
>  include/drm/ttm/ttm_bo_api.h        |  3 +-
>  include/drm/ttm/ttm_device.h        |  2 +
>  7 files changed, 53 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3c23e863a3f0..66e00b404ec3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1193,56 +1193,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_wait);
>  
> -/*
> - * A buffer object shrink method that tries to swap out the first
> - * buffer object on the bo_global::swap_lru list.
> - */
> -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> +		   gfp_t gfp_flags)
>  {
>  	struct ttm_global *glob = &ttm_glob;
> -	struct ttm_buffer_object *bo;
> -	int ret = -EBUSY;
>  	bool locked;
> -	unsigned i;
> -
> -	spin_lock(&glob->lru_lock);
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> -							    NULL))
> -				continue;
> -
> -			if (!ttm_bo_get_unless_zero(bo)) {
> -				if (locked)
> -					dma_resv_unlock(bo->base.resv);
> -				continue;
> -			}
> +	int ret;
>  
> -			ret = 0;
> -			break;
> -		}
> -		if (!ret)
> -			break;
> -	}
> +	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
> +		return -EBUSY;
>  
> -	if (ret) {
> -		spin_unlock(&glob->lru_lock);
> -		return ret;
> +	if (!ttm_bo_get_unless_zero(bo)) {
> +		if (locked)
> +			dma_resv_unlock(bo->base.resv);
> +		return -EBUSY;
>  	}
>  
>  	if (bo->deleted) {
> -		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
> +		ttm_bo_cleanup_refs(bo, false, false, locked);
>  		ttm_bo_put(bo);
> -		return ret;
> +		return 0;
>  	}
>  
>  	ttm_bo_del_from_lru(bo);
> +	/* TODO: Cleanup the locking */
>  	spin_unlock(&glob->lru_lock);
>  
> -	/**
> +	/*
>  	 * Move to system cached
>  	 */
> -
>  	if (bo->mem.mem_type != TTM_PL_SYSTEM) {
>  		struct ttm_operation_ctx ctx = { false, false };
>  		struct ttm_resource evict_mem;
> @@ -1262,29 +1241,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>  		}
>  	}
>  
> -	/**
> +	/*
>  	 * Make sure BO is idle.
>  	 */
> -
>  	ret = ttm_bo_wait(bo, false, false);
>  	if (unlikely(ret != 0))
>  		goto out;
>  
>  	ttm_bo_unmap_virtual(bo);
>  
> -	/**
> +	/*
>  	 * Swap out. Buffer will be swapped in again as soon as
>  	 * anyone tries to access a ttm page.
>  	 */
> -
>  	if (bo->bdev->funcs->swap_notify)
>  		bo->bdev->funcs->swap_notify(bo);
>  
>  	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>  out:
>  
> -	/**
> -	 *
> +	/*
>  	 * Unreserve without putting on LRU to avoid swapping out an
>  	 * already swapped buffer.
>  	 */
> @@ -1293,7 +1269,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>  	ttm_bo_put(bo);
>  	return ret;
>  }
> -EXPORT_SYMBOL(ttm_bo_swapout);
>  
>  void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
>  {
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 95e1b7b1f2e6..b1424189fdfb 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -102,6 +102,35 @@ static int ttm_global_init(void)
>  	return ret;
>  }
>  
> +/**
> + * A buffer object shrink method that tries to swap out the first
> + * buffer object on the global::swap_lru list.
> + */
> +int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> +{
> +	struct ttm_global *glob = &ttm_glob;
> +	struct ttm_buffer_object *bo;
> +	unsigned i;
> +	int ret;
> +
> +	spin_lock(&glob->lru_lock);
> +	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> +			uint32_t num_pages = bo->ttm->num_pages;
> +
> +			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> +			/* ttm_bo_swapout has dropped the lru_lock */
> +			if (!ret)
> +				return num_pages;
> +			if (ret != -EBUSY)
> +				return ret;
> +		}
> +	}
> +	spin_unlock(&glob->lru_lock);
> +	return 0;
> +}
> +EXPORT_SYMBOL(ttm_global_swapout);
> +
>  static void ttm_init_sysman(struct ttm_device *bdev)
>  {
>  	struct ttm_resource_manager *man = &bdev->sysman;
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 2f0833c98d2c..95b5cff25f4c 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
>  	};
>  	int ret;
>  
> -	ret = ttm_bo_swapout(&ctx, GFP_NOFS);
> +	ret = ttm_global_swapout(&ctx, GFP_NOFS);
>  	return ret < 0 ? SHRINK_EMPTY : ret;
>  }
>  
> diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
> index e972af07d029..104b95a8c7a2 100644
> --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
> +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
> @@ -38,6 +38,7 @@
>  
>  #include <drm/drm_device.h>
>  #include <drm/drm_file.h>
> +#include <drm/ttm/ttm_device.h>
>  
>  #include "ttm_memory.h"
>  
> @@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
>  
>  	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
>  		spin_unlock(&glob->lock);
> -		ret = ttm_bo_swapout(ctx, GFP_KERNEL);
> +		ret = ttm_global_swapout(ctx, GFP_KERNEL);
>  		spin_lock(&glob->lock);
>  		if (unlikely(ret < 0))
>  			break;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index 6910111099c8..b991422e156c 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
>  	vmw_execbuf_release_pinned_bo(dev_priv);
>  	vmw_resource_evict_all(dev_priv);
>  	vmw_release_device_early(dev_priv);
> -	while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
> +	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
>  	if (dev_priv->enable_fb)
>  		vmw_fifo_resource_dec(dev_priv);
>  	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 4fb523dfab32..5044ac330858 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
>  		  const char __user *wbuf, char __user *rbuf,
>  		  size_t count, loff_t *f_pos, bool write);
>  
> -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> +		   gfp_t gfp_flags);
>  
>  /**
>   * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 035bbc044a3b..6a0b267d4fe6 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -297,6 +297,8 @@ struct ttm_device {
>  	struct delayed_work wq;
>  };
>  
> +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +
>  static inline struct ttm_resource_manager *
>  ttm_manager_type(struct ttm_device *bdev, int mem_type)
>  {
> -- 
> 2.25.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cray.huang%40amd.com%7C5e884dc7218341f5405a08d8ea0bfab3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637516684478741238%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=brmyK0mL95w5aG52hDFdMDw1CymMDnuIRTQK64roKKk%3D&amp;reserved=0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/3] drm/ttm: remove swap LRU v3
  2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
@ 2021-03-19  4:30   ` Huang Rui
  0 siblings, 0 replies; 18+ messages in thread
From: Huang Rui @ 2021-03-19  4:30 UTC (permalink / raw)
  To: Christian König; +Cc: matthew.william.auld, dri-devel

On Thu, Mar 18, 2021 at 08:47:18PM +0800, Christian König wrote:
> Instead evict round robin from each devices SYSTEM and TT domain.
> 
> v2: reorder num_pages access reported by Dan's script
> v3: fix rebase fallout, num_pages should be 32bit
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c        | 29 --------------
>  drivers/gpu/drm/ttm/ttm_bo_util.c   |  1 -
>  drivers/gpu/drm/ttm/ttm_device.c    | 60 +++++++++++++++++++++--------
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
>  include/drm/ttm/ttm_bo_api.h        |  1 -
>  include/drm/ttm/ttm_bo_driver.h     |  1 -
>  include/drm/ttm/ttm_device.h        |  7 +---
>  7 files changed, 48 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 66e00b404ec3..3673157527ff 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
>  {
>  	struct ttm_device *bdev = bo->bdev;
>  
> -	list_del_init(&bo->swap);
>  	list_del_init(&bo->lru);
>  
>  	if (bdev->funcs->del_from_lru_notify)
> @@ -105,16 +104,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>  
>  	man = ttm_manager_type(bdev, mem->mem_type);
>  	list_move_tail(&bo->lru, &man->lru[bo->priority]);
> -	if (man->use_tt && bo->ttm &&
> -	    !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> -				     TTM_PAGE_FLAG_SWAPPED))) {
> -		struct list_head *swap;
> -
> -		swap = &ttm_glob.swap_lru[bo->priority];
> -		list_move_tail(&bo->swap, swap);
> -	} else {
> -		list_del_init(&bo->swap);
> -	}
>  
>  	if (bdev->funcs->del_from_lru_notify)
>  		bdev->funcs->del_from_lru_notify(bo);
> @@ -129,9 +118,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>  			ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
>  			break;
>  		}
> -		if (bo->ttm && !(bo->ttm->page_flags &
> -				 (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
> -			ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
>  	}
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> @@ -169,20 +155,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
>  				    &pos->last->lru);
>  	}
> -
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
> -		struct list_head *lru;
> -
> -		if (!pos->first)
> -			continue;
> -
> -		dma_resv_assert_held(pos->first->base.resv);
> -		dma_resv_assert_held(pos->last->base.resv);
> -
> -		lru = &ttm_glob.swap_lru[i];
> -		list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
> -	}
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>  
> @@ -1065,7 +1037,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  	kref_init(&bo->kref);
>  	INIT_LIST_HEAD(&bo->lru);
>  	INIT_LIST_HEAD(&bo->ddestroy);
> -	INIT_LIST_HEAD(&bo->swap);
>  	bo->bdev = bdev;
>  	bo->type = type;
>  	bo->mem.mem_type = TTM_PL_SYSTEM;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 031e5819fec4..a2a17c84ceb3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  	atomic_inc(&ttm_glob.bo_count);
>  	INIT_LIST_HEAD(&fbo->base.ddestroy);
>  	INIT_LIST_HEAD(&fbo->base.lru);
> -	INIT_LIST_HEAD(&fbo->base.swap);
>  	fbo->base.moving = NULL;
>  	drm_vma_node_reset(&fbo->base.base.vma_node);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index b1424189fdfb..2096a0fd9c35 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -67,7 +67,6 @@ static int ttm_global_init(void)
>  	unsigned long num_pages;
>  	struct sysinfo si;
>  	int ret = 0;
> -	unsigned i;
>  
>  	mutex_lock(&ttm_global_mutex);
>  	if (++ttm_glob_use_count > 1)
> @@ -90,8 +89,6 @@ static int ttm_global_init(void)
>  		goto out;
>  	}
>  
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> -		INIT_LIST_HEAD(&glob->swap_lru[i]);
>  	INIT_LIST_HEAD(&glob->device_list);
>  	atomic_set(&glob->bo_count, 0);
>  
> @@ -109,27 +106,60 @@ static int ttm_global_init(void)
>  int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>  {
>  	struct ttm_global *glob = &ttm_glob;
> +	struct ttm_device *bdev;
> +	int ret = -EBUSY;
> +
> +	mutex_lock(&ttm_global_mutex);
> +	list_for_each_entry(bdev, &glob->device_list, device_list) {
> +		ret = ttm_device_swapout(bdev, ctx, gfp_flags);
> +		if (ret > 0) {
> +			list_move_tail(&bdev->device_list, &glob->device_list);
> +			break;
> +		}
> +	}
> +	mutex_unlock(&ttm_global_mutex);
> +	return ret;
> +}
> +EXPORT_SYMBOL(ttm_global_swapout);
> +
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> +			gfp_t gfp_flags)
> +{
> +	struct ttm_global *glob = &ttm_glob;
> +	struct ttm_resource_manager *man;
>  	struct ttm_buffer_object *bo;
> -	unsigned i;
> +	unsigned i, j;
>  	int ret;
>  
>  	spin_lock(&glob->lru_lock);
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> -			uint32_t num_pages = bo->ttm->num_pages;
> -
> -			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> -			/* ttm_bo_swapout has dropped the lru_lock */
> -			if (!ret)
> -				return num_pages;
> -			if (ret != -EBUSY)
> -				return ret;
> +	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> +		man = ttm_manager_type(bdev, i);
> +		if (!man || !man->use_tt)
> +			continue;
> +
> +		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> +			list_for_each_entry(bo, &man->lru[j], lru) {
> +				uint32_t num_pages;
> +
> +				if (!bo->ttm ||
> +				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> +				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> +					continue;
> +
> +				num_pages = bo->ttm->num_pages;
> +				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> +				/* ttm_bo_swapout has dropped the lru_lock */
> +				if (!ret)
> +					return num_pages;
> +				if (ret != -EBUSY)
> +					return ret;
> +			}
>  		}
>  	}
>  	spin_unlock(&glob->lru_lock);
>  	return 0;
>  }
> -EXPORT_SYMBOL(ttm_global_swapout);
> +EXPORT_SYMBOL(ttm_device_swapout);
>  
>  static void ttm_init_sysman(struct ttm_device *bdev)
>  {
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index b991422e156c..4e41d8221f06 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
>  	vmw_execbuf_release_pinned_bo(dev_priv);
>  	vmw_resource_evict_all(dev_priv);
>  	vmw_release_device_early(dev_priv);
> -	while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
> +	while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) > 0);
>  	if (dev_priv->enable_fb)
>  		vmw_fifo_resource_dec(dev_priv);
>  	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 5044ac330858..3587f660e8f4 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -144,7 +144,6 @@ struct ttm_buffer_object {
>  
>  	struct list_head lru;
>  	struct list_head ddestroy;
> -	struct list_head swap;
>  
>  	/**
>  	 * Members protected by a bo reservation.
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 8959c0075cfd..d007feef7676 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
>  struct ttm_lru_bulk_move {
>  	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
>  	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> -	struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
>  };
>  
>  /*
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 6a0b267d4fe6..cda6efb4c34b 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -63,11 +63,6 @@ extern struct ttm_global {
>  	 */
>  	struct list_head device_list;
>  
> -	/**
> -	 * Protected by the lru_lock.
> -	 */
> -	struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
> -
>  	/**
>  	 * Internal protection.
>  	 */
> @@ -298,6 +293,8 @@ struct ttm_device {
>  };
>  
>  long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> +		       gfp_t gfp_flags);
>  
>  static inline struct ttm_resource_manager *
>  ttm_manager_type(struct ttm_device *bdev, int mem_type)
> -- 
> 2.25.1
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
@ 2021-03-19  4:32   ` Huang Rui
  2021-03-19 12:10     ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Huang Rui @ 2021-03-19  4:32 UTC (permalink / raw)
  To: Christian König; +Cc: matthew.william.auld, dri-devel

On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
> Instead of having a global lock.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
>  drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
>  drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
>  drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
>  drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
>  include/drm/ttm/ttm_bo_driver.h        |  4 +--
>  include/drm/ttm/ttm_device.h           |  4 +--
>  8 files changed, 43 insertions(+), 56 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9d19078246c8..ae18c0e32347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  	struct amdgpu_vm_bo_base *bo_base;
>  
>  	if (vm->bulk_moveable) {
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&adev->mman.bdev.lru_lock);

Could you please explain why do you want to instead of the global lock?

Thanks,
Ray

>  		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&adev->mman.bdev.lru_lock);
>  		return;
>  	}
>  
>  	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&adev->mman.bdev.lru_lock);
>  	list_for_each_entry(bo_base, &vm->idle, vm_status) {
>  		struct amdgpu_bo *bo = bo_base->bo;
>  
> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  						&bo->shadow->tbo.mem,
>  						&vm->lru_bulk_move);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&adev->mman.bdev.lru_lock);
>  
>  	vm->bulk_moveable = true;
>  }
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index f5845c96d414..b19f2f00b215 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>  		       release->id | 0xf0000000, release->base.seqno);
>  	trace_dma_fence_emit(&release->base);
>  
> -	spin_lock(&ttm_glob.lru_lock);
> -
>  	list_for_each_entry(entry, &release->bos, head) {
>  		bo = entry->bo;
>  
>  		dma_resv_add_shared_fence(bo->base.resv, &release->base);
> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
>  	ww_acquire_fini(&release->ticket);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3673157527ff..2d2ac532987e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>  		 * reference it any more. The only tricky case is the trylock on
>  		 * the resv object while holding the lru_lock.
>  		 */
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&bo->bdev->lru_lock);
>  		bo->base.resv = &bo->base._resv;
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  	}
>  
>  	return r;
> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  
>  		if (unlock_resv)
>  			dma_resv_unlock(bo->base.resv);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  
>  		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>  						 30 * HZ);
> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  		else if (lret == 0)
>  			return -EBUSY;
>  
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&bo->bdev->lru_lock);
>  		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>  			/*
>  			 * We raced, and lost, someone else holds the reservation now,
> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  			 * delayed destruction would succeed, so just return success
>  			 * here.
>  			 */
> -			spin_unlock(&ttm_glob.lru_lock);
> +			spin_unlock(&bo->bdev->lru_lock);
>  			return 0;
>  		}
>  		ret = 0;
> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  	if (ret || unlikely(list_empty(&bo->ddestroy))) {
>  		if (unlock_resv)
>  			dma_resv_unlock(bo->base.resv);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  		return ret;
>  	}
>  
>  	ttm_bo_del_from_lru(bo);
>  	list_del_init(&bo->ddestroy);
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  	ttm_bo_cleanup_memtype_use(bo);
>  
>  	if (unlock_resv)
> @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>   */
>  bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	struct list_head removed;
>  	bool empty;
>  
>  	INIT_LIST_HEAD(&removed);
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	while (!list_empty(&bdev->ddestroy)) {
>  		struct ttm_buffer_object *bo;
>  
> @@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>  			continue;
>  
>  		if (remove_all || bo->base.resv != &bo->base._resv) {
> -			spin_unlock(&glob->lru_lock);
> +			spin_unlock(&bdev->lru_lock);
>  			dma_resv_lock(bo->base.resv, NULL);
>  
> -			spin_lock(&glob->lru_lock);
> +			spin_lock(&bdev->lru_lock);
>  			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>  
>  		} else if (dma_resv_trylock(bo->base.resv)) {
>  			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>  		} else {
> -			spin_unlock(&glob->lru_lock);
> +			spin_unlock(&bdev->lru_lock);
>  		}
>  
>  		ttm_bo_put(bo);
> -		spin_lock(&glob->lru_lock);
> +		spin_lock(&bdev->lru_lock);
>  	}
>  	list_splice_tail(&removed, &bdev->ddestroy);
>  	empty = list_empty(&bdev->ddestroy);
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  
>  	return empty;
>  }
> @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
>  		ttm_bo_flush_all_fences(bo);
>  		bo->deleted = true;
>  
> -		spin_lock(&ttm_glob.lru_lock);
> +		spin_lock(&bo->bdev->lru_lock);
>  
>  		/*
>  		 * Make pinned bos immediately available to
> @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
>  
>  		kref_init(&bo->kref);
>  		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  
>  		schedule_delayed_work(&bdev->wq,
>  				      ((HZ / 100) < 1) ? 1 : HZ / 100);
>  		return;
>  	}
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	ttm_bo_del_from_lru(bo);
>  	list_del(&bo->ddestroy);
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  
>  	ttm_bo_cleanup_memtype_use(bo);
>  	dma_resv_unlock(bo->base.resv);
> @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  	unsigned i;
>  	int ret;
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		list_for_each_entry(bo, &man->lru[i], lru) {
>  			bool busy;
> @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  	if (!bo) {
>  		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>  			busy_bo = NULL;
> -		spin_unlock(&ttm_glob.lru_lock);
> +		spin_unlock(&bo->bdev->lru_lock);
>  		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>  		if (busy_bo)
>  			ttm_bo_put(busy_bo);
> @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  		return ret;
>  	}
>  
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  
>  	ret = ttm_bo_evict(bo, ctx);
>  	if (locked)
> @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>  	mem->mem_type = place->mem_type;
>  	mem->placement = place->flags;
>  
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	ttm_bo_move_to_lru_tail(bo, mem, NULL);
> -	spin_unlock(&ttm_glob.lru_lock);
> -
> +	spin_unlock(&bo->bdev->lru_lock);
>  	return 0;
>  }
>  
> @@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>  int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  		   gfp_t gfp_flags)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	bool locked;
>  	int ret;
>  
> @@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  
>  	ttm_bo_del_from_lru(bo);
>  	/* TODO: Cleanup the locking */
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  
>  	/*
>  	 * Move to system cached
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 2096a0fd9c35..85f6975d9872 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>  	ttm_pool_mgr_init(num_pages * 50 / 100);
>  	ttm_tt_mgr_init();
>  
> -	spin_lock_init(&glob->lru_lock);
>  	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>  
>  	if (unlikely(glob->dummy_read_page == NULL)) {
> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>  long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  			gfp_t gfp_flags)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	struct ttm_resource_manager *man;
>  	struct ttm_buffer_object *bo;
>  	unsigned i, j;
>  	int ret;
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>  		man = ttm_manager_type(bdev, i);
>  		if (!man || !man->use_tt)
> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  			}
>  		}
>  	}
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  	return 0;
>  }
>  EXPORT_SYMBOL(ttm_device_swapout);
> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>  
>  	bdev->vma_manager = vma_manager;
>  	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> +	spin_lock_init(&bdev->lru_lock);
>  	INIT_LIST_HEAD(&bdev->ddestroy);
>  	bdev->dev_mapping = mapping;
>  	mutex_lock(&ttm_global_mutex);
> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>  
>  void ttm_device_fini(struct ttm_device *bdev)
>  {
> -	struct ttm_global *glob = &ttm_glob;
>  	struct ttm_resource_manager *man;
>  	unsigned i;
>  
> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>  	if (ttm_bo_delayed_delete(bdev, true))
>  		pr_debug("Delayed destroy list was clean\n");
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>  		if (list_empty(&man->lru[0]))
>  			pr_debug("Swap list %d was clean\n", i);
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  
>  	ttm_pool_fini(&bdev->pool);
>  	ttm_global_release();
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 690ab97d52b7..071c48d672c6 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>  	if (list_empty(list))
>  		return;
>  
> -	spin_lock(&ttm_glob.lru_lock);
>  	list_for_each_entry(entry, list, head) {
>  		struct ttm_buffer_object *bo = entry->bo;
>  
> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
>  
>  	if (ticket)
>  		ww_acquire_fini(ticket);
> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>  	if (list_empty(list))
>  		return;
>  
> -	spin_lock(&ttm_glob.lru_lock);
>  	list_for_each_entry(entry, list, head) {
>  		struct ttm_buffer_object *bo = entry->bo;
>  
> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>  			dma_resv_add_shared_fence(bo->base.resv, fence);
>  		else
>  			dma_resv_add_excl_fence(bo->base.resv, fence);
> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> -	spin_unlock(&ttm_glob.lru_lock);
>  	if (ticket)
>  		ww_acquire_fini(ticket);
>  }
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index ed1672a9f332..04f2eef653ab 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>  		.no_wait_gpu = false,
>  		.force_alloc = true
>  	};
> -	struct ttm_global *glob = &ttm_glob;
>  	struct dma_fence *fence;
>  	int ret;
>  	unsigned i;
> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>  	 * Can't use standard list traversal since we're unlocking.
>  	 */
>  
> -	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		while (!list_empty(&man->lru[i])) {
> -			spin_unlock(&glob->lru_lock);
> +			spin_unlock(&bdev->lru_lock);
>  			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>  						  NULL);
>  			if (ret)
>  				return ret;
> -			spin_lock(&glob->lru_lock);
> +			spin_lock(&bdev->lru_lock);
>  		}
>  	}
> -	spin_unlock(&glob->lru_lock);
> +	spin_unlock(&bdev->lru_lock);
>  
>  	spin_lock(&man->move_lock);
>  	fence = dma_fence_get(man->move);
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index d007feef7676..dbccac957f8f 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>  static inline void
>  ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>  {
> -	spin_lock(&ttm_glob.lru_lock);
> +	spin_lock(&bo->bdev->lru_lock);
>  	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> -	spin_unlock(&ttm_glob.lru_lock);
> +	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
>  static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index cda6efb4c34b..bae56d29e8ff 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -56,7 +56,6 @@ extern struct ttm_global {
>  	 */
>  
>  	struct page *dummy_read_page;
> -	spinlock_t lru_lock;
>  
>  	/**
>  	 * Protected by ttm_global_mutex.
> @@ -277,8 +276,9 @@ struct ttm_device {
>  	struct ttm_pool pool;
>  
>  	/*
> -	 * Protected by the global:lru lock.
> +	 * Protection for the per manager LRU and ddestroy lists.
>  	 */
> +	spinlock_t lru_lock;
>  	struct list_head ddestroy;
>  
>  	/*
> -- 
> 2.25.1
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
                   ` (5 preceding siblings ...)
  2021-03-19  4:28 ` Huang Rui
@ 2021-03-19  9:41 ` Matthew Auld
  6 siblings, 0 replies; 18+ messages in thread
From: Matthew Auld @ 2021-03-19  9:41 UTC (permalink / raw)
  To: Christian König; +Cc: ray.huang, ML dri-devel

On Thu, 18 Mar 2021 at 12:47, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Move the iteration of the global lru into the new function
> ttm_global_swapout() and use that instead in drivers.
>
> v2: consistently return int
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

For the series,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
  2021-03-18 18:13   ` kernel test robot
@ 2021-03-19 12:07     ` Christian König
  -1 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2021-03-19 12:07 UTC (permalink / raw)
  To: dri-devel, lkml

General question for the audience: Is there any way to silence the build 
bot here?

This is a well known false positive.

Regards,
Christian.

Am 18.03.21 um 19:13 schrieb kernel test robot:
> Hi "Christian,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on drm-tip/drm-tip]
> [also build test ERROR on next-20210318]
> [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
> base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
> config: x86_64-randconfig-a005-20210318 (attached as .config)
> compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
> reproduce (this is a W=1 build):
>          wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>          chmod +x ~/bin/make.cross
>          # install x86_64 cross compiling tool for clang build
>          # apt-get install binutils-x86-64-linux-gnu
>          # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
>          git remote add linux-review https://github.com/0day-ci/linux
>          git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
>          git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
>          # save the attached .config to linux build tree
>          COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
>     int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>         ^
>     include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
>     long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
>          ^
>     1 error generated.
>
>
> vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
>
>     104	
>     105	/**
>     106	 * A buffer object shrink method that tries to swap out the first
>     107	 * buffer object on the global::swap_lru list.
>     108	 */
>   > 109	int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>     110	{
>     111		struct ttm_global *glob = &ttm_glob;
>     112		struct ttm_buffer_object *bo;
>     113		unsigned i;
>     114		int ret;
>     115	
>     116		spin_lock(&glob->lru_lock);
>     117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>     118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>     119				uint32_t num_pages = bo->ttm->num_pages;
>     120	
>     121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
>     122				/* ttm_bo_swapout has dropped the lru_lock */
>     123				if (!ret)
>     124					return num_pages;
>     125				if (ret != -EBUSY)
>     126					return ret;
>     127			}
>     128		}
>     129		spin_unlock(&glob->lru_lock);
>     130		return 0;
>     131	}
>     132	EXPORT_SYMBOL(ttm_global_swapout);
>     133	
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-19 12:07     ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2021-03-19 12:07 UTC (permalink / raw)
  To: dri-devel, lkml

General question for the audience: Is there any way to silence the build 
bot here?

This is a well known false positive.

Regards,
Christian.

Am 18.03.21 um 19:13 schrieb kernel test robot:
> Hi "Christian,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on drm-tip/drm-tip]
> [also build test ERROR on next-20210318]
> [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
> base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
> config: x86_64-randconfig-a005-20210318 (attached as .config)
> compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
> reproduce (this is a W=1 build):
>          wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>          chmod +x ~/bin/make.cross
>          # install x86_64 cross compiling tool for clang build
>          # apt-get install binutils-x86-64-linux-gnu
>          # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
>          git remote add linux-review https://github.com/0day-ci/linux
>          git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
>          git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
>          # save the attached .config to linux build tree
>          COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
>     int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>         ^
>     include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
>     long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
>          ^
>     1 error generated.
>
>
> vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
>
>     104	
>     105	/**
>     106	 * A buffer object shrink method that tries to swap out the first
>     107	 * buffer object on the global::swap_lru list.
>     108	 */
>   > 109	int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>     110	{
>     111		struct ttm_global *glob = &ttm_glob;
>     112		struct ttm_buffer_object *bo;
>     113		unsigned i;
>     114		int ret;
>     115	
>     116		spin_lock(&glob->lru_lock);
>     117		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>     118			list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>     119				uint32_t num_pages = bo->ttm->num_pages;
>     120	
>     121				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
>     122				/* ttm_bo_swapout has dropped the lru_lock */
>     123				if (!ret)
>     124					return num_pages;
>     125				if (ret != -EBUSY)
>     126					return ret;
>     127			}
>     128		}
>     129		spin_unlock(&glob->lru_lock);
>     130		return 0;
>     131	}
>     132	EXPORT_SYMBOL(ttm_global_swapout);
>     133	
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-19  4:32   ` Huang Rui
@ 2021-03-19 12:10     ` Christian König
  2021-03-22  7:13       ` Huang Rui
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-03-19 12:10 UTC (permalink / raw)
  To: Huang Rui; +Cc: matthew.william.auld, dri-devel

Am 19.03.21 um 05:32 schrieb Huang Rui:
> On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
>> Instead of having a global lock.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
>>   drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
>>   drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
>>   drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
>>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
>>   drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
>>   include/drm/ttm/ttm_bo_driver.h        |  4 +--
>>   include/drm/ttm/ttm_device.h           |  4 +--
>>   8 files changed, 43 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9d19078246c8..ae18c0e32347 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>   	struct amdgpu_vm_bo_base *bo_base;
>>   
>>   	if (vm->bulk_moveable) {
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&adev->mman.bdev.lru_lock);
> Could you please explain why do you want to instead of the global lock?

Potentially less contention.

But you are right mentioning this in the commit message is a good idea.

Regards,
Christian.

>
> Thanks,
> Ray
>
>>   		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&adev->mman.bdev.lru_lock);
>>   		return;
>>   	}
>>   
>>   	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&adev->mman.bdev.lru_lock);
>>   	list_for_each_entry(bo_base, &vm->idle, vm_status) {
>>   		struct amdgpu_bo *bo = bo_base->bo;
>>   
>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>   						&bo->shadow->tbo.mem,
>>   						&vm->lru_bulk_move);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&adev->mman.bdev.lru_lock);
>>   
>>   	vm->bulk_moveable = true;
>>   }
>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>> index f5845c96d414..b19f2f00b215 100644
>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>>   		       release->id | 0xf0000000, release->base.seqno);
>>   	trace_dma_fence_emit(&release->base);
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> -
>>   	list_for_each_entry(entry, &release->bos, head) {
>>   		bo = entry->bo;
>>   
>>   		dma_resv_add_shared_fence(bo->base.resv, &release->base);
>> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>>   		dma_resv_unlock(bo->base.resv);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>>   	ww_acquire_fini(&release->ticket);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 3673157527ff..2d2ac532987e 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>>   		 * reference it any more. The only tricky case is the trylock on
>>   		 * the resv object while holding the lru_lock.
>>   		 */
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&bo->bdev->lru_lock);
>>   		bo->base.resv = &bo->base._resv;
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   	}
>>   
>>   	return r;
>> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   
>>   		if (unlock_resv)
>>   			dma_resv_unlock(bo->base.resv);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   
>>   		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>>   						 30 * HZ);
>> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   		else if (lret == 0)
>>   			return -EBUSY;
>>   
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&bo->bdev->lru_lock);
>>   		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>>   			/*
>>   			 * We raced, and lost, someone else holds the reservation now,
>> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   			 * delayed destruction would succeed, so just return success
>>   			 * here.
>>   			 */
>> -			spin_unlock(&ttm_glob.lru_lock);
>> +			spin_unlock(&bo->bdev->lru_lock);
>>   			return 0;
>>   		}
>>   		ret = 0;
>> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>   	if (ret || unlikely(list_empty(&bo->ddestroy))) {
>>   		if (unlock_resv)
>>   			dma_resv_unlock(bo->base.resv);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   		return ret;
>>   	}
>>   
>>   	ttm_bo_del_from_lru(bo);
>>   	list_del_init(&bo->ddestroy);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   	ttm_bo_cleanup_memtype_use(bo);
>>   
>>   	if (unlock_resv)
>> @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>    */
>>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct list_head removed;
>>   	bool empty;
>>   
>>   	INIT_LIST_HEAD(&removed);
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	while (!list_empty(&bdev->ddestroy)) {
>>   		struct ttm_buffer_object *bo;
>>   
>> @@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>   			continue;
>>   
>>   		if (remove_all || bo->base.resv != &bo->base._resv) {
>> -			spin_unlock(&glob->lru_lock);
>> +			spin_unlock(&bdev->lru_lock);
>>   			dma_resv_lock(bo->base.resv, NULL);
>>   
>> -			spin_lock(&glob->lru_lock);
>> +			spin_lock(&bdev->lru_lock);
>>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>   
>>   		} else if (dma_resv_trylock(bo->base.resv)) {
>>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>   		} else {
>> -			spin_unlock(&glob->lru_lock);
>> +			spin_unlock(&bdev->lru_lock);
>>   		}
>>   
>>   		ttm_bo_put(bo);
>> -		spin_lock(&glob->lru_lock);
>> +		spin_lock(&bdev->lru_lock);
>>   	}
>>   	list_splice_tail(&removed, &bdev->ddestroy);
>>   	empty = list_empty(&bdev->ddestroy);
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   
>>   	return empty;
>>   }
>> @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
>>   		ttm_bo_flush_all_fences(bo);
>>   		bo->deleted = true;
>>   
>> -		spin_lock(&ttm_glob.lru_lock);
>> +		spin_lock(&bo->bdev->lru_lock);
>>   
>>   		/*
>>   		 * Make pinned bos immediately available to
>> @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
>>   
>>   		kref_init(&bo->kref);
>>   		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   
>>   		schedule_delayed_work(&bdev->wq,
>>   				      ((HZ / 100) < 1) ? 1 : HZ / 100);
>>   		return;
>>   	}
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	ttm_bo_del_from_lru(bo);
>>   	list_del(&bo->ddestroy);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   
>>   	ttm_bo_cleanup_memtype_use(bo);
>>   	dma_resv_unlock(bo->base.resv);
>> @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>   	unsigned i;
>>   	int ret;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>   		list_for_each_entry(bo, &man->lru[i], lru) {
>>   			bool busy;
>> @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>   	if (!bo) {
>>   		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>>   			busy_bo = NULL;
>> -		spin_unlock(&ttm_glob.lru_lock);
>> +		spin_unlock(&bo->bdev->lru_lock);
>>   		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>>   		if (busy_bo)
>>   			ttm_bo_put(busy_bo);
>> @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>   		return ret;
>>   	}
>>   
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   
>>   	ret = ttm_bo_evict(bo, ctx);
>>   	if (locked)
>> @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>>   	mem->mem_type = place->mem_type;
>>   	mem->placement = place->flags;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	ttm_bo_move_to_lru_tail(bo, mem, NULL);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> -
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   	return 0;
>>   }
>>   
>> @@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>>   int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>   		   gfp_t gfp_flags)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	bool locked;
>>   	int ret;
>>   
>> @@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>   
>>   	ttm_bo_del_from_lru(bo);
>>   	/* TODO: Cleanup the locking */
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   
>>   	/*
>>   	 * Move to system cached
>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>> index 2096a0fd9c35..85f6975d9872 100644
>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>>   	ttm_pool_mgr_init(num_pages * 50 / 100);
>>   	ttm_tt_mgr_init();
>>   
>> -	spin_lock_init(&glob->lru_lock);
>>   	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>   
>>   	if (unlikely(glob->dummy_read_page == NULL)) {
>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>>   long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>   			gfp_t gfp_flags)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct ttm_resource_manager *man;
>>   	struct ttm_buffer_object *bo;
>>   	unsigned i, j;
>>   	int ret;
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>>   		man = ttm_manager_type(bdev, i);
>>   		if (!man || !man->use_tt)
>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>   			}
>>   		}
>>   	}
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   	return 0;
>>   }
>>   EXPORT_SYMBOL(ttm_device_swapout);
>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>   
>>   	bdev->vma_manager = vma_manager;
>>   	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>> +	spin_lock_init(&bdev->lru_lock);
>>   	INIT_LIST_HEAD(&bdev->ddestroy);
>>   	bdev->dev_mapping = mapping;
>>   	mutex_lock(&ttm_global_mutex);
>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>   
>>   void ttm_device_fini(struct ttm_device *bdev)
>>   {
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct ttm_resource_manager *man;
>>   	unsigned i;
>>   
>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>>   	if (ttm_bo_delayed_delete(bdev, true))
>>   		pr_debug("Delayed destroy list was clean\n");
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>>   		if (list_empty(&man->lru[0]))
>>   			pr_debug("Swap list %d was clean\n", i);
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   
>>   	ttm_pool_fini(&bdev->pool);
>>   	ttm_global_release();
>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> index 690ab97d52b7..071c48d672c6 100644
>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>>   	if (list_empty(list))
>>   		return;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>>   	list_for_each_entry(entry, list, head) {
>>   		struct ttm_buffer_object *bo = entry->bo;
>>   
>> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>>   		dma_resv_unlock(bo->base.resv);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>>   
>>   	if (ticket)
>>   		ww_acquire_fini(ticket);
>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>   	if (list_empty(list))
>>   		return;
>>   
>> -	spin_lock(&ttm_glob.lru_lock);
>>   	list_for_each_entry(entry, list, head) {
>>   		struct ttm_buffer_object *bo = entry->bo;
>>   
>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>   			dma_resv_add_shared_fence(bo->base.resv, fence);
>>   		else
>>   			dma_resv_add_excl_fence(bo->base.resv, fence);
>> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> +		ttm_bo_move_to_lru_tail_unlocked(bo);
>>   		dma_resv_unlock(bo->base.resv);
>>   	}
>> -	spin_unlock(&ttm_glob.lru_lock);
>>   	if (ticket)
>>   		ww_acquire_fini(ticket);
>>   }
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>> index ed1672a9f332..04f2eef653ab 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>   		.no_wait_gpu = false,
>>   		.force_alloc = true
>>   	};
>> -	struct ttm_global *glob = &ttm_glob;
>>   	struct dma_fence *fence;
>>   	int ret;
>>   	unsigned i;
>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>   	 * Can't use standard list traversal since we're unlocking.
>>   	 */
>>   
>> -	spin_lock(&glob->lru_lock);
>> +	spin_lock(&bdev->lru_lock);
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>   		while (!list_empty(&man->lru[i])) {
>> -			spin_unlock(&glob->lru_lock);
>> +			spin_unlock(&bdev->lru_lock);
>>   			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>>   						  NULL);
>>   			if (ret)
>>   				return ret;
>> -			spin_lock(&glob->lru_lock);
>> +			spin_lock(&bdev->lru_lock);
>>   		}
>>   	}
>> -	spin_unlock(&glob->lru_lock);
>> +	spin_unlock(&bdev->lru_lock);
>>   
>>   	spin_lock(&man->move_lock);
>>   	fence = dma_fence_get(man->move);
>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>> index d007feef7676..dbccac957f8f 100644
>> --- a/include/drm/ttm/ttm_bo_driver.h
>> +++ b/include/drm/ttm/ttm_bo_driver.h
>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>>   static inline void
>>   ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>>   {
>> -	spin_lock(&ttm_glob.lru_lock);
>> +	spin_lock(&bo->bdev->lru_lock);
>>   	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> -	spin_unlock(&ttm_glob.lru_lock);
>> +	spin_unlock(&bo->bdev->lru_lock);
>>   }
>>   
>>   static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index cda6efb4c34b..bae56d29e8ff 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>>   	 */
>>   
>>   	struct page *dummy_read_page;
>> -	spinlock_t lru_lock;
>>   
>>   	/**
>>   	 * Protected by ttm_global_mutex.
>> @@ -277,8 +276,9 @@ struct ttm_device {
>>   	struct ttm_pool pool;
>>   
>>   	/*
>> -	 * Protected by the global:lru lock.
>> +	 * Protection for the per manager LRU and ddestroy lists.
>>   	 */
>> +	spinlock_t lru_lock;
>>   	struct list_head ddestroy;
>>   
>>   	/*
>> -- 
>> 2.25.1
>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
  2021-03-19 12:10     ` Christian König
@ 2021-03-22  7:13       ` Huang Rui
  0 siblings, 0 replies; 18+ messages in thread
From: Huang Rui @ 2021-03-22  7:13 UTC (permalink / raw)
  To: Christian König; +Cc: matthew.william.auld, dri-devel

On Fri, Mar 19, 2021 at 08:10:21PM +0800, Christian König wrote:
> Am 19.03.21 um 05:32 schrieb Huang Rui:
> > On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
> >> Instead of having a global lock.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
> >>   drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
> >>   drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
> >>   drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
> >>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
> >>   drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
> >>   include/drm/ttm/ttm_bo_driver.h        |  4 +--
> >>   include/drm/ttm/ttm_device.h           |  4 +--
> >>   8 files changed, 43 insertions(+), 56 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 9d19078246c8..ae18c0e32347 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >>   	struct amdgpu_vm_bo_base *bo_base;
> >>   
> >>   	if (vm->bulk_moveable) {
> >> -		spin_lock(&ttm_glob.lru_lock);
> >> +		spin_lock(&adev->mman.bdev.lru_lock);
> > Could you please explain why do you want to instead of the global lock?
> 
> Potentially less contention.
> 
> But you are right mentioning this in the commit message is a good idea.
> 

Thanks.

Patch is Acked-by: Huang Rui <ray.huang@amd.com>

> Regards,
> Christian.
> 
> >
> > Thanks,
> > Ray
> >
> >>   		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> >> -		spin_unlock(&ttm_glob.lru_lock);
> >> +		spin_unlock(&adev->mman.bdev.lru_lock);
> >>   		return;
> >>   	}
> >>   
> >>   	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> >>   
> >> -	spin_lock(&ttm_glob.lru_lock);
> >> +	spin_lock(&adev->mman.bdev.lru_lock);
> >>   	list_for_each_entry(bo_base, &vm->idle, vm_status) {
> >>   		struct amdgpu_bo *bo = bo_base->bo;
> >>   
> >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >>   						&bo->shadow->tbo.mem,
> >>   						&vm->lru_bulk_move);
> >>   	}
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >> +	spin_unlock(&adev->mman.bdev.lru_lock);
> >>   
> >>   	vm->bulk_moveable = true;
> >>   }
> >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> >> index f5845c96d414..b19f2f00b215 100644
> >> --- a/drivers/gpu/drm/qxl/qxl_release.c
> >> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> >>   		       release->id | 0xf0000000, release->base.seqno);
> >>   	trace_dma_fence_emit(&release->base);
> >>   
> >> -	spin_lock(&ttm_glob.lru_lock);
> >> -
> >>   	list_for_each_entry(entry, &release->bos, head) {
> >>   		bo = entry->bo;
> >>   
> >>   		dma_resv_add_shared_fence(bo->base.resv, &release->base);
> >> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> +		ttm_bo_move_to_lru_tail_unlocked(bo);
> >>   		dma_resv_unlock(bo->base.resv);
> >>   	}
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >>   	ww_acquire_fini(&release->ticket);
> >>   }
> >>   
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index 3673157527ff..2d2ac532987e 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> >>   		 * reference it any more. The only tricky case is the trylock on
> >>   		 * the resv object while holding the lru_lock.
> >>   		 */
> >> -		spin_lock(&ttm_glob.lru_lock);
> >> +		spin_lock(&bo->bdev->lru_lock);
> >>   		bo->base.resv = &bo->base._resv;
> >> -		spin_unlock(&ttm_glob.lru_lock);
> >> +		spin_unlock(&bo->bdev->lru_lock);
> >>   	}
> >>   
> >>   	return r;
> >> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>   
> >>   		if (unlock_resv)
> >>   			dma_resv_unlock(bo->base.resv);
> >> -		spin_unlock(&ttm_glob.lru_lock);
> >> +		spin_unlock(&bo->bdev->lru_lock);
> >>   
> >>   		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> >>   						 30 * HZ);
> >> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>   		else if (lret == 0)
> >>   			return -EBUSY;
> >>   
> >> -		spin_lock(&ttm_glob.lru_lock);
> >> +		spin_lock(&bo->bdev->lru_lock);
> >>   		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> >>   			/*
> >>   			 * We raced, and lost, someone else holds the reservation now,
> >> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>   			 * delayed destruction would succeed, so just return success
> >>   			 * here.
> >>   			 */
> >> -			spin_unlock(&ttm_glob.lru_lock);
> >> +			spin_unlock(&bo->bdev->lru_lock);
> >>   			return 0;
> >>   		}
> >>   		ret = 0;
> >> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>   	if (ret || unlikely(list_empty(&bo->ddestroy))) {
> >>   		if (unlock_resv)
> >>   			dma_resv_unlock(bo->base.resv);
> >> -		spin_unlock(&ttm_glob.lru_lock);
> >> +		spin_unlock(&bo->bdev->lru_lock);
> >>   		return ret;
> >>   	}
> >>   
> >>   	ttm_bo_del_from_lru(bo);
> >>   	list_del_init(&bo->ddestroy);
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >> +	spin_unlock(&bo->bdev->lru_lock);
> >>   	ttm_bo_cleanup_memtype_use(bo);
> >>   
> >>   	if (unlock_resv)
> >> @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>    */
> >>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >>   {
> >> -	struct ttm_global *glob = &ttm_glob;
> >>   	struct list_head removed;
> >>   	bool empty;
> >>   
> >>   	INIT_LIST_HEAD(&removed);
> >>   
> >> -	spin_lock(&glob->lru_lock);
> >> +	spin_lock(&bdev->lru_lock);
> >>   	while (!list_empty(&bdev->ddestroy)) {
> >>   		struct ttm_buffer_object *bo;
> >>   
> >> @@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >>   			continue;
> >>   
> >>   		if (remove_all || bo->base.resv != &bo->base._resv) {
> >> -			spin_unlock(&glob->lru_lock);
> >> +			spin_unlock(&bdev->lru_lock);
> >>   			dma_resv_lock(bo->base.resv, NULL);
> >>   
> >> -			spin_lock(&glob->lru_lock);
> >> +			spin_lock(&bdev->lru_lock);
> >>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >>   
> >>   		} else if (dma_resv_trylock(bo->base.resv)) {
> >>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >>   		} else {
> >> -			spin_unlock(&glob->lru_lock);
> >> +			spin_unlock(&bdev->lru_lock);
> >>   		}
> >>   
> >>   		ttm_bo_put(bo);
> >> -		spin_lock(&glob->lru_lock);
> >> +		spin_lock(&bdev->lru_lock);
> >>   	}
> >>   	list_splice_tail(&removed, &bdev->ddestroy);
> >>   	empty = list_empty(&bdev->ddestroy);
> >> -	spin_unlock(&glob->lru_lock);
> >> +	spin_unlock(&bdev->lru_lock);
> >>   
> >>   	return empty;
> >>   }
> >> @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
> >>   		ttm_bo_flush_all_fences(bo);
> >>   		bo->deleted = true;
> >>   
> >> -		spin_lock(&ttm_glob.lru_lock);
> >> +		spin_lock(&bo->bdev->lru_lock);
> >>   
> >>   		/*
> >>   		 * Make pinned bos immediately available to
> >> @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
> >>   
> >>   		kref_init(&bo->kref);
> >>   		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> >> -		spin_unlock(&ttm_glob.lru_lock);
> >> +		spin_unlock(&bo->bdev->lru_lock);
> >>   
> >>   		schedule_delayed_work(&bdev->wq,
> >>   				      ((HZ / 100) < 1) ? 1 : HZ / 100);
> >>   		return;
> >>   	}
> >>   
> >> -	spin_lock(&ttm_glob.lru_lock);
> >> +	spin_lock(&bo->bdev->lru_lock);
> >>   	ttm_bo_del_from_lru(bo);
> >>   	list_del(&bo->ddestroy);
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >> +	spin_unlock(&bo->bdev->lru_lock);
> >>   
> >>   	ttm_bo_cleanup_memtype_use(bo);
> >>   	dma_resv_unlock(bo->base.resv);
> >> @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >>   	unsigned i;
> >>   	int ret;
> >>   
> >> -	spin_lock(&ttm_glob.lru_lock);
> >> +	spin_lock(&bo->bdev->lru_lock);
> >>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >>   		list_for_each_entry(bo, &man->lru[i], lru) {
> >>   			bool busy;
> >> @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >>   	if (!bo) {
> >>   		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> >>   			busy_bo = NULL;
> >> -		spin_unlock(&ttm_glob.lru_lock);
> >> +		spin_unlock(&bo->bdev->lru_lock);
> >>   		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> >>   		if (busy_bo)
> >>   			ttm_bo_put(busy_bo);
> >> @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >>   		return ret;
> >>   	}
> >>   
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >> +	spin_unlock(&bo->bdev->lru_lock);
> >>   
> >>   	ret = ttm_bo_evict(bo, ctx);
> >>   	if (locked)
> >> @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> >>   	mem->mem_type = place->mem_type;
> >>   	mem->placement = place->flags;
> >>   
> >> -	spin_lock(&ttm_glob.lru_lock);
> >> +	spin_lock(&bo->bdev->lru_lock);
> >>   	ttm_bo_move_to_lru_tail(bo, mem, NULL);
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >> -
> >> +	spin_unlock(&bo->bdev->lru_lock);
> >>   	return 0;
> >>   }
> >>   
> >> @@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> >>   int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >>   		   gfp_t gfp_flags)
> >>   {
> >> -	struct ttm_global *glob = &ttm_glob;
> >>   	bool locked;
> >>   	int ret;
> >>   
> >> @@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >>   
> >>   	ttm_bo_del_from_lru(bo);
> >>   	/* TODO: Cleanup the locking */
> >> -	spin_unlock(&glob->lru_lock);
> >> +	spin_unlock(&bo->bdev->lru_lock);
> >>   
> >>   	/*
> >>   	 * Move to system cached
> >> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> >> index 2096a0fd9c35..85f6975d9872 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_device.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> >> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> >>   	ttm_pool_mgr_init(num_pages * 50 / 100);
> >>   	ttm_tt_mgr_init();
> >>   
> >> -	spin_lock_init(&glob->lru_lock);
> >>   	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
> >>   
> >>   	if (unlikely(glob->dummy_read_page == NULL)) {
> >> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> >>   long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >>   			gfp_t gfp_flags)
> >>   {
> >> -	struct ttm_global *glob = &ttm_glob;
> >>   	struct ttm_resource_manager *man;
> >>   	struct ttm_buffer_object *bo;
> >>   	unsigned i, j;
> >>   	int ret;
> >>   
> >> -	spin_lock(&glob->lru_lock);
> >> +	spin_lock(&bdev->lru_lock);
> >>   	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> >>   		man = ttm_manager_type(bdev, i);
> >>   		if (!man || !man->use_tt)
> >> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >>   			}
> >>   		}
> >>   	}
> >> -	spin_unlock(&glob->lru_lock);
> >> +	spin_unlock(&bdev->lru_lock);
> >>   	return 0;
> >>   }
> >>   EXPORT_SYMBOL(ttm_device_swapout);
> >> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
> >>   
> >>   	bdev->vma_manager = vma_manager;
> >>   	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> >> +	spin_lock_init(&bdev->lru_lock);
> >>   	INIT_LIST_HEAD(&bdev->ddestroy);
> >>   	bdev->dev_mapping = mapping;
> >>   	mutex_lock(&ttm_global_mutex);
> >> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
> >>   
> >>   void ttm_device_fini(struct ttm_device *bdev)
> >>   {
> >> -	struct ttm_global *glob = &ttm_glob;
> >>   	struct ttm_resource_manager *man;
> >>   	unsigned i;
> >>   
> >> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> >>   	if (ttm_bo_delayed_delete(bdev, true))
> >>   		pr_debug("Delayed destroy list was clean\n");
> >>   
> >> -	spin_lock(&glob->lru_lock);
> >> +	spin_lock(&bdev->lru_lock);
> >>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> >>   		if (list_empty(&man->lru[0]))
> >>   			pr_debug("Swap list %d was clean\n", i);
> >> -	spin_unlock(&glob->lru_lock);
> >> +	spin_unlock(&bdev->lru_lock);
> >>   
> >>   	ttm_pool_fini(&bdev->pool);
> >>   	ttm_global_release();
> >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> index 690ab97d52b7..071c48d672c6 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> >>   	if (list_empty(list))
> >>   		return;
> >>   
> >> -	spin_lock(&ttm_glob.lru_lock);
> >>   	list_for_each_entry(entry, list, head) {
> >>   		struct ttm_buffer_object *bo = entry->bo;
> >>   
> >> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> +		ttm_bo_move_to_lru_tail_unlocked(bo);
> >>   		dma_resv_unlock(bo->base.resv);
> >>   	}
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >>   
> >>   	if (ticket)
> >>   		ww_acquire_fini(ticket);
> >> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >>   	if (list_empty(list))
> >>   		return;
> >>   
> >> -	spin_lock(&ttm_glob.lru_lock);
> >>   	list_for_each_entry(entry, list, head) {
> >>   		struct ttm_buffer_object *bo = entry->bo;
> >>   
> >> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >>   			dma_resv_add_shared_fence(bo->base.resv, fence);
> >>   		else
> >>   			dma_resv_add_excl_fence(bo->base.resv, fence);
> >> -		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> +		ttm_bo_move_to_lru_tail_unlocked(bo);
> >>   		dma_resv_unlock(bo->base.resv);
> >>   	}
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >>   	if (ticket)
> >>   		ww_acquire_fini(ticket);
> >>   }
> >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> >> index ed1672a9f332..04f2eef653ab 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> >> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >>   		.no_wait_gpu = false,
> >>   		.force_alloc = true
> >>   	};
> >> -	struct ttm_global *glob = &ttm_glob;
> >>   	struct dma_fence *fence;
> >>   	int ret;
> >>   	unsigned i;
> >> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >>   	 * Can't use standard list traversal since we're unlocking.
> >>   	 */
> >>   
> >> -	spin_lock(&glob->lru_lock);
> >> +	spin_lock(&bdev->lru_lock);
> >>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >>   		while (!list_empty(&man->lru[i])) {
> >> -			spin_unlock(&glob->lru_lock);
> >> +			spin_unlock(&bdev->lru_lock);
> >>   			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> >>   						  NULL);
> >>   			if (ret)
> >>   				return ret;
> >> -			spin_lock(&glob->lru_lock);
> >> +			spin_lock(&bdev->lru_lock);
> >>   		}
> >>   	}
> >> -	spin_unlock(&glob->lru_lock);
> >> +	spin_unlock(&bdev->lru_lock);
> >>   
> >>   	spin_lock(&man->move_lock);
> >>   	fence = dma_fence_get(man->move);
> >> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> >> index d007feef7676..dbccac957f8f 100644
> >> --- a/include/drm/ttm/ttm_bo_driver.h
> >> +++ b/include/drm/ttm/ttm_bo_driver.h
> >> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> >>   static inline void
> >>   ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> >>   {
> >> -	spin_lock(&ttm_glob.lru_lock);
> >> +	spin_lock(&bo->bdev->lru_lock);
> >>   	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> -	spin_unlock(&ttm_glob.lru_lock);
> >> +	spin_unlock(&bo->bdev->lru_lock);
> >>   }
> >>   
> >>   static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> >> index cda6efb4c34b..bae56d29e8ff 100644
> >> --- a/include/drm/ttm/ttm_device.h
> >> +++ b/include/drm/ttm/ttm_device.h
> >> @@ -56,7 +56,6 @@ extern struct ttm_global {
> >>   	 */
> >>   
> >>   	struct page *dummy_read_page;
> >> -	spinlock_t lru_lock;
> >>   
> >>   	/**
> >>   	 * Protected by ttm_global_mutex.
> >> @@ -277,8 +276,9 @@ struct ttm_device {
> >>   	struct ttm_pool pool;
> >>   
> >>   	/*
> >> -	 * Protected by the global:lru lock.
> >> +	 * Protection for the per manager LRU and ddestroy lists.
> >>   	 */
> >> +	spinlock_t lru_lock;
> >>   	struct list_head ddestroy;
> >>   
> >>   	/*
> >> -- 
> >> 2.25.1
> >>
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-03-22  7:13 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
2021-03-19  4:30   ` Huang Rui
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-19  4:32   ` Huang Rui
2021-03-19 12:10     ` Christian König
2021-03-22  7:13       ` Huang Rui
2021-03-18 14:43 ` [PATCH 1/3] drm/ttm: move swapout logic around v2 Nirmoy
2021-03-18 15:26   ` Christian König
2021-03-18 19:46     ` Nirmoy
2021-03-18 15:08 ` kernel test robot
2021-03-18 15:08   ` kernel test robot
2021-03-18 18:13 ` kernel test robot
2021-03-18 18:13   ` kernel test robot
2021-03-19 12:07   ` Christian König
2021-03-19 12:07     ` Christian König
2021-03-19  4:28 ` Huang Rui
2021-03-19  9:41 ` Matthew Auld

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.