All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: dri-devel@lists.freedesktop.org
Cc: ray.huang@amd.com
Subject: [PATCH 3/3] drm/ttm: switch to per device LRU lock
Date: Mon, 15 Mar 2021 17:04:22 +0100	[thread overview]
Message-ID: <20210315160422.152528-3-christian.koenig@amd.com> (raw)
In-Reply-To: <20210315160422.152528-1-christian.koenig@amd.com>

Instead of having a global lock.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
 drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
 drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
 drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
 drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
 include/drm/ttm/ttm_bo_driver.h        |  4 +--
 include/drm/ttm/ttm_device.h           |  4 +--
 8 files changed, 43 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 	struct amdgpu_vm_bo_base *bo_base;
 
 	if (vm->bulk_moveable) {
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&adev->mman.bdev.lru_lock);
 		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&adev->mman.bdev.lru_lock);
 		return;
 	}
 
 	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&adev->mman.bdev.lru_lock);
 	list_for_each_entry(bo_base, &vm->idle, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 						&bo->shadow->tbo.mem,
 						&vm->lru_bulk_move);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&adev->mman.bdev.lru_lock);
 
 	vm->bulk_moveable = true;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index f5845c96d414..b19f2f00b215 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
 		       release->id | 0xf0000000, release->base.seqno);
 	trace_dma_fence_emit(&release->base);
 
-	spin_lock(&ttm_glob.lru_lock);
-
 	list_for_each_entry(entry, &release->bos, head) {
 		bo = entry->bo;
 
 		dma_resv_add_shared_fence(bo->base.resv, &release->base);
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 	ww_acquire_fini(&release->ticket);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a1be88be357b..a8103c8718a3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
 		 * reference it any more. The only tricky case is the trylock on
 		 * the resv object while holding the lru_lock.
 		 */
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 		bo->base.resv = &bo->base._resv;
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 	}
 
 	return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 
 		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
 						 30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 		else if (lret == 0)
 			return -EBUSY;
 
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
 			/*
 			 * We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			 * delayed destruction would succeed, so just return success
 			 * here.
 			 */
-			spin_unlock(&ttm_glob.lru_lock);
+			spin_unlock(&bo->bdev->lru_lock);
 			return 0;
 		}
 		ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 	if (ret || unlikely(list_empty(&bo->ddestroy))) {
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 		return ret;
 	}
 
 	ttm_bo_del_from_lru(bo);
 	list_del_init(&bo->ddestroy);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
 	if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
  */
 bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct list_head removed;
 	bool empty;
 
 	INIT_LIST_HEAD(&removed);
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	while (!list_empty(&bdev->ddestroy)) {
 		struct ttm_buffer_object *bo;
 
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
 			continue;
 
 		if (remove_all || bo->base.resv != &bo->base._resv) {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 			dma_resv_lock(bo->base.resv, NULL);
 
-			spin_lock(&glob->lru_lock);
+			spin_lock(&bdev->lru_lock);
 			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 
 		} else if (dma_resv_trylock(bo->base.resv)) {
 			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 		} else {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 		}
 
 		ttm_bo_put(bo);
-		spin_lock(&glob->lru_lock);
+		spin_lock(&bdev->lru_lock);
 	}
 	list_splice_tail(&removed, &bdev->ddestroy);
 	empty = list_empty(&bdev->ddestroy);
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	return empty;
 }
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
 		ttm_bo_flush_all_fences(bo);
 		bo->deleted = true;
 
-		spin_lock(&ttm_glob.lru_lock);
+		spin_lock(&bo->bdev->lru_lock);
 
 		/*
 		 * Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
 
 		kref_init(&bo->kref);
 		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 
 		schedule_delayed_work(&bdev->wq,
 				      ((HZ / 100) < 1) ? 1 : HZ / 100);
 		return;
 	}
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_del_from_lru(bo);
 	list_del(&bo->ddestroy);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	ttm_bo_cleanup_memtype_use(bo);
 	dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 	unsigned i;
 	int ret;
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
 			bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 	if (!bo) {
 		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
 			busy_bo = NULL;
-		spin_unlock(&ttm_glob.lru_lock);
+		spin_unlock(&bo->bdev->lru_lock);
 		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
 		if (busy_bo)
 			ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 		return ret;
 	}
 
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	ret = ttm_bo_evict(bo, ctx);
 	if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
 	mem->mem_type = place->mem_type;
 	mem->placement = place->flags;
 
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_move_to_lru_tail(bo, mem, NULL);
-	spin_unlock(&ttm_glob.lru_lock);
-
+	spin_unlock(&bo->bdev->lru_lock);
 	return 0;
 }
 
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
 int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 		   gfp_t gfp_flags)
 {
-	struct ttm_global *glob = &ttm_glob;
 	bool locked;
 	int ret;
 
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 
 	ttm_bo_del_from_lru(bo);
 	/* TODO: Cleanup the locking */
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 
 	/*
 	 * Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 2c280fb1e992..924d892109e8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
 	ttm_pool_mgr_init(num_pages * 50 / 100);
 	ttm_tt_mgr_init();
 
-	spin_lock_init(&glob->lru_lock);
 	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
 
 	if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			gfp_t gfp_flags)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct ttm_resource_manager *man;
 	struct ttm_buffer_object *bo;
 	unsigned i, j;
 	int ret;
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
 		man = ttm_manager_type(bdev, i);
 		if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			}
 		}
 	}
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 
 	bdev->vma_manager = vma_manager;
 	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+	spin_lock_init(&bdev->lru_lock);
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	bdev->dev_mapping = mapping;
 	mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
 
 void ttm_device_fini(struct ttm_device *bdev)
 {
-	struct ttm_global *glob = &ttm_glob;
 	struct ttm_resource_manager *man;
 	unsigned i;
 
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
 	if (ttm_bo_delayed_delete(bdev, true))
 		pr_debug("Delayed destroy list was clean\n");
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		if (list_empty(&man->lru[0]))
 			pr_debug("Swap list %d was clean\n", i);
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	ttm_pool_fini(&bdev->pool);
 	ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 	if (list_empty(list))
 		return;
 
-	spin_lock(&ttm_glob.lru_lock);
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 
 	if (ticket)
 		ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 	if (list_empty(list))
 		return;
 
-	spin_lock(&ttm_glob.lru_lock);
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 			dma_resv_add_shared_fence(bo->base.resv, fence);
 		else
 			dma_resv_add_excl_fence(bo->base.resv, fence);
-		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
-	spin_unlock(&ttm_glob.lru_lock);
 	if (ticket)
 		ww_acquire_fini(ticket);
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 		.no_wait_gpu = false,
 		.force_alloc = true
 	};
-	struct ttm_global *glob = &ttm_glob;
 	struct dma_fence *fence;
 	int ret;
 	unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 	 * Can't use standard list traversal since we're unlocking.
 	 */
 
-	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
-			spin_unlock(&glob->lru_lock);
+			spin_unlock(&bdev->lru_lock);
 			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
 						  NULL);
 			if (ret)
 				return ret;
-			spin_lock(&glob->lru_lock);
+			spin_lock(&bdev->lru_lock);
 		}
 	}
-	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->lru_lock);
 
 	spin_lock(&man->move_lock);
 	fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
 static inline void
 ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
 {
-	spin_lock(&ttm_glob.lru_lock);
+	spin_lock(&bo->bdev->lru_lock);
 	ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
-	spin_unlock(&ttm_glob.lru_lock);
+	spin_unlock(&bo->bdev->lru_lock);
 }
 
 static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
 	 */
 
 	struct page *dummy_read_page;
-	spinlock_t lru_lock;
 
 	/**
 	 * Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
 	struct ttm_pool pool;
 
 	/*
-	 * Protected by the global:lru lock.
+	 * Protection for the per manager LRU and ddestroy lists.
 	 */
+	spinlock_t lru_lock;
 	struct list_head ddestroy;
 
 	/*
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2021-03-15 16:04 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
2021-03-15 18:54   ` kernel test robot
2021-03-15 18:54     ` kernel test robot
2021-03-15 18:54   ` Matthew Auld
2021-03-15 19:27     ` Christian König
2021-03-15 16:04 ` Christian König [this message]
2021-03-15 20:17   ` [PATCH 3/3] drm/ttm: switch to per device LRU lock kernel test robot
2021-03-15 20:17     ` kernel test robot
2021-03-16  9:35   ` Daniel Vetter
2021-03-16 12:03     ` Christian König
2021-03-16 12:05       ` Daniel Vetter
2021-03-16 15:13         ` Christian König
2021-03-15 18:47 ` [PATCH 1/3] drm/ttm: move swapout logic around kernel test robot
2021-03-15 18:47   ` kernel test robot
2021-03-19  9:41 ` kernel test robot
2021-03-19  9:41   ` kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-19  4:32   ` Huang Rui
2021-03-19 12:10     ` Christian König
2021-03-22  7:13       ` Huang Rui
2021-02-11 13:29 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-02-11 13:29 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-02-10 23:28 kernel test robot
2021-02-10 15:21 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-02-10 15:21 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210315160422.152528-3-christian.koenig@amd.com \
    --to=ckoenig.leichtzumerken@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=ray.huang@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.