All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: Intel Graphics Development <intel-gfx@lists.freedesktop.org>,
	dri-devel@lists.freedesktop.org, amd-gfx@lists.freedesktop.org
Subject: [PATCH 3/9] drm/ttm: use per BO cleanup workers
Date: Fri, 25 Nov 2022 11:21:31 +0100	[thread overview]
Message-ID: <20221125102137.1801-3-christian.koenig@amd.com> (raw)
In-Reply-To: <20221125102137.1801-1-christian.koenig@amd.com>

Instead of a single worker going over the list of delete BOs in regular
intervals use a per BO worker which blocks for the resv object and
locking of the BO.

This not only simplifies the handling massively, but also results in
much better response time when cleaning up buffers.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
 drivers/gpu/drm/i915/i915_gem.c            |   2 +-
 drivers/gpu/drm/i915/intel_region_ttm.c    |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c               | 112 ++++++++-------------
 drivers/gpu/drm/ttm/ttm_bo_util.c          |   1 -
 drivers/gpu/drm/ttm/ttm_device.c           |  24 ++---
 include/drm/ttm/ttm_bo_api.h               |  18 +---
 include/drm/ttm/ttm_device.h               |   7 +-
 8 files changed, 57 insertions(+), 111 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2b1db37e25c1..74ccbd566777 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3984,7 +3984,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 	amdgpu_fence_driver_hw_fini(adev);
 
 	if (adev->mman.initialized)
-		flush_delayed_work(&adev->mman.bdev.wq);
+		drain_workqueue(adev->mman.bdev.wq);
 
 	if (adev->pm_sysfs_en)
 		amdgpu_pm_sysfs_fini(adev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8468ca9885fd..c38306f156d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1099,7 +1099,7 @@ void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
 	while (atomic_read(&i915->mm.free_count)) {
 		flush_work(&i915->mm.free_work);
-		flush_delayed_work(&i915->bdev.wq);
+		drain_workqueue(i915->bdev.wq);
 		rcu_barrier();
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index cf89d0c2a2d9..657bbc16a48a 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -132,7 +132,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
 			break;
 
 		msleep(20);
-		flush_delayed_work(&mem->i915->bdev.wq);
+		drain_workqueue(mem->i915->bdev.wq);
 	}
 
 	/* If we leaked objects, Don't free the region causing use after free */
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b77262a623e0..4749b65bedc4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -280,14 +280,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 		ret = 0;
 	}
 
-	if (ret || unlikely(list_empty(&bo->ddestroy))) {
+	if (ret) {
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
 		spin_unlock(&bo->bdev->lru_lock);
 		return ret;
 	}
 
-	list_del_init(&bo->ddestroy);
 	spin_unlock(&bo->bdev->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
@@ -300,47 +299,21 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 }
 
 /*
- * Traverse the delayed list, and call ttm_bo_cleanup_refs on all
- * encountered buffers.
+ * Block for the dma_resv object to become idle, lock the buffer and clean up
+ * the resource and tt object.
  */
-bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
+static void ttm_bo_delayed_delete(struct work_struct *work)
 {
-	struct list_head removed;
-	bool empty;
-
-	INIT_LIST_HEAD(&removed);
-
-	spin_lock(&bdev->lru_lock);
-	while (!list_empty(&bdev->ddestroy)) {
-		struct ttm_buffer_object *bo;
-
-		bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
-				      ddestroy);
-		list_move_tail(&bo->ddestroy, &removed);
-		if (!ttm_bo_get_unless_zero(bo))
-			continue;
-
-		if (remove_all || bo->base.resv != &bo->base._resv) {
-			spin_unlock(&bdev->lru_lock);
-			dma_resv_lock(bo->base.resv, NULL);
-
-			spin_lock(&bdev->lru_lock);
-			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
-
-		} else if (dma_resv_trylock(bo->base.resv)) {
-			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
-		} else {
-			spin_unlock(&bdev->lru_lock);
-		}
+	struct ttm_buffer_object *bo;
 
-		ttm_bo_put(bo);
-		spin_lock(&bdev->lru_lock);
-	}
-	list_splice_tail(&removed, &bdev->ddestroy);
-	empty = list_empty(&bdev->ddestroy);
-	spin_unlock(&bdev->lru_lock);
+	bo = container_of(work, typeof(*bo), delayed_delete);
 
-	return empty;
+	dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
+			      MAX_SCHEDULE_TIMEOUT);
+	dma_resv_lock(bo->base.resv, NULL);
+	ttm_bo_cleanup_memtype_use(bo);
+	dma_resv_unlock(bo->base.resv);
+	ttm_bo_put(bo);
 }
 
 static void ttm_bo_release(struct kref *kref)
@@ -369,44 +342,40 @@ static void ttm_bo_release(struct kref *kref)
 
 		drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
 		ttm_mem_io_free(bdev, bo->resource);
-	}
-
-	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) ||
-	    !dma_resv_trylock(bo->base.resv)) {
-		/* The BO is not idle, resurrect it for delayed destroy */
-		ttm_bo_flush_all_fences(bo);
-		bo->deleted = true;
 
-		spin_lock(&bo->bdev->lru_lock);
+		if (!dma_resv_test_signaled(bo->base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP) ||
+		    !dma_resv_trylock(bo->base.resv)) {
+			/* The BO is not idle, resurrect it for delayed destroy */
+			ttm_bo_flush_all_fences(bo);
+			bo->deleted = true;
 
-		/*
-		 * Make pinned bos immediately available to
-		 * shrinkers, now that they are queued for
-		 * destruction.
-		 *
-		 * FIXME: QXL is triggering this. Can be removed when the
-		 * driver is fixed.
-		 */
-		if (bo->pin_count) {
-			bo->pin_count = 0;
-			ttm_resource_move_to_lru_tail(bo->resource);
-		}
+			spin_lock(&bo->bdev->lru_lock);
 
-		kref_init(&bo->kref);
-		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-		spin_unlock(&bo->bdev->lru_lock);
+			/*
+			 * Make pinned bos immediately available to
+			 * shrinkers, now that they are queued for
+			 * destruction.
+			 *
+			 * FIXME: QXL is triggering this. Can be removed when the
+			 * driver is fixed.
+			 */
+			if (bo->pin_count) {
+				bo->pin_count = 0;
+				ttm_resource_move_to_lru_tail(bo->resource);
+			}
 
-		schedule_delayed_work(&bdev->wq,
-				      ((HZ / 100) < 1) ? 1 : HZ / 100);
-		return;
-	}
+			kref_init(&bo->kref);
+			spin_unlock(&bo->bdev->lru_lock);
 
-	spin_lock(&bo->bdev->lru_lock);
-	list_del(&bo->ddestroy);
-	spin_unlock(&bo->bdev->lru_lock);
+			INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete);
+			queue_work(bdev->wq, &bo->delayed_delete);
+			return;
+		}
 
-	ttm_bo_cleanup_memtype_use(bo);
-	dma_resv_unlock(bo->base.resv);
+		ttm_bo_cleanup_memtype_use(bo);
+		dma_resv_unlock(bo->base.resv);
+	}
 
 	atomic_dec(&ttm_glob.bo_count);
 	bo->destroy(bo);
@@ -946,7 +915,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo,
 	int ret;
 
 	kref_init(&bo->kref);
-	INIT_LIST_HEAD(&bo->ddestroy);
 	bo->bdev = bdev;
 	bo->type = type;
 	bo->page_alignment = alignment;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ba3aa0a0fc43..ae4b7922ee1a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -230,7 +230,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	 */
 
 	atomic_inc(&ttm_glob.bo_count);
-	INIT_LIST_HEAD(&fbo->base.ddestroy);
 	drm_vma_node_reset(&fbo->base.base.vma_node);
 
 	kref_init(&fbo->base.kref);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index e7147e304637..e9bedca4dfdc 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -175,16 +175,6 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 }
 EXPORT_SYMBOL(ttm_device_swapout);
 
-static void ttm_device_delayed_workqueue(struct work_struct *work)
-{
-	struct ttm_device *bdev =
-		container_of(work, struct ttm_device, wq.work);
-
-	if (!ttm_bo_delayed_delete(bdev, false))
-		schedule_delayed_work(&bdev->wq,
-				      ((HZ / 100) < 1) ? 1 : HZ / 100);
-}
-
 /**
  * ttm_device_init
  *
@@ -215,15 +205,19 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 	if (ret)
 		return ret;
 
+	bdev->wq = alloc_workqueue("ttm", WQ_MEM_RECLAIM | WQ_HIGHPRI, 16);
+	if (!bdev->wq) {
+		ttm_global_release();
+		return -ENOMEM;
+	}
+
 	bdev->funcs = funcs;
 
 	ttm_sys_man_init(bdev);
 	ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
 
 	bdev->vma_manager = vma_manager;
-	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
 	spin_lock_init(&bdev->lru_lock);
-	INIT_LIST_HEAD(&bdev->ddestroy);
 	INIT_LIST_HEAD(&bdev->pinned);
 	bdev->dev_mapping = mapping;
 	mutex_lock(&ttm_global_mutex);
@@ -247,10 +241,8 @@ void ttm_device_fini(struct ttm_device *bdev)
 	list_del(&bdev->device_list);
 	mutex_unlock(&ttm_global_mutex);
 
-	cancel_delayed_work_sync(&bdev->wq);
-
-	if (ttm_bo_delayed_delete(bdev, true))
-		pr_debug("Delayed destroy list was clean\n");
+	drain_workqueue(bdev->wq);
+	destroy_workqueue(bdev->wq);
 
 	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 7758347c461c..69e62bbb01e3 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -92,7 +92,6 @@ struct ttm_tt;
  * @ttm: TTM structure holding system pages.
  * @evicted: Whether the object was evicted without user-space knowing.
  * @deleted: True if the object is only a zombie and already deleted.
- * @ddestroy: List head for the delayed destroy list.
  * @swap: List head for swap LRU list.
  * @offset: The current GPU offset, which can have different meanings
  * depending on the memory type. For SYSTEM type memory, it should be 0.
@@ -135,19 +134,14 @@ struct ttm_buffer_object {
 	struct ttm_tt *ttm;
 	bool deleted;
 	struct ttm_lru_bulk_move *bulk_move;
+	unsigned priority;
+	unsigned pin_count;
 
 	/**
-	 * Members protected by the bdev::lru_lock.
-	 */
-
-	struct list_head ddestroy;
-
-	/**
-	 * Members protected by a bo reservation.
+	 * @delayed_delete: Work item used when we can't delete the BO
+	 * immediately
 	 */
-
-	unsigned priority;
-	unsigned pin_count;
+	struct work_struct delayed_delete;
 
 	/**
 	 * Special members that are protected by the reserve lock
@@ -448,8 +442,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma);
 
 int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
 		     void *buf, int len, int write);
-bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
-
 vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
 
 #endif
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 95b3c04b1ab9..4f3e81eac6f3 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -251,11 +251,6 @@ struct ttm_device {
 	 */
 	spinlock_t lru_lock;
 
-	/**
-	 * @ddestroy: Destroyed but not yet cleaned up buffer objects.
-	 */
-	struct list_head ddestroy;
-
 	/**
 	 * @pinned: Buffer objects which are pinned and so not on any LRU list.
 	 */
@@ -270,7 +265,7 @@ struct ttm_device {
 	/**
 	 * @wq: Work queue structure for the delayed delete workqueue.
 	 */
-	struct delayed_work wq;
+	struct workqueue_struct *wq;
 };
 
 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
-- 
2.34.1


WARNING: multiple messages have this Message-ID (diff)
From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: Intel Graphics Development <intel-gfx@lists.freedesktop.org>,
	dri-devel@lists.freedesktop.org, amd-gfx@lists.freedesktop.org
Subject: [Intel-gfx] [PATCH 3/9] drm/ttm: use per BO cleanup workers
Date: Fri, 25 Nov 2022 11:21:31 +0100	[thread overview]
Message-ID: <20221125102137.1801-3-christian.koenig@amd.com> (raw)
In-Reply-To: <20221125102137.1801-1-christian.koenig@amd.com>

Instead of a single worker going over the list of delete BOs in regular
intervals use a per BO worker which blocks for the resv object and
locking of the BO.

This not only simplifies the handling massively, but also results in
much better response time when cleaning up buffers.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
 drivers/gpu/drm/i915/i915_gem.c            |   2 +-
 drivers/gpu/drm/i915/intel_region_ttm.c    |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c               | 112 ++++++++-------------
 drivers/gpu/drm/ttm/ttm_bo_util.c          |   1 -
 drivers/gpu/drm/ttm/ttm_device.c           |  24 ++---
 include/drm/ttm/ttm_bo_api.h               |  18 +---
 include/drm/ttm/ttm_device.h               |   7 +-
 8 files changed, 57 insertions(+), 111 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2b1db37e25c1..74ccbd566777 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3984,7 +3984,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 	amdgpu_fence_driver_hw_fini(adev);
 
 	if (adev->mman.initialized)
-		flush_delayed_work(&adev->mman.bdev.wq);
+		drain_workqueue(adev->mman.bdev.wq);
 
 	if (adev->pm_sysfs_en)
 		amdgpu_pm_sysfs_fini(adev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8468ca9885fd..c38306f156d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1099,7 +1099,7 @@ void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
 	while (atomic_read(&i915->mm.free_count)) {
 		flush_work(&i915->mm.free_work);
-		flush_delayed_work(&i915->bdev.wq);
+		drain_workqueue(i915->bdev.wq);
 		rcu_barrier();
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index cf89d0c2a2d9..657bbc16a48a 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -132,7 +132,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
 			break;
 
 		msleep(20);
-		flush_delayed_work(&mem->i915->bdev.wq);
+		drain_workqueue(mem->i915->bdev.wq);
 	}
 
 	/* If we leaked objects, Don't free the region causing use after free */
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b77262a623e0..4749b65bedc4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -280,14 +280,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 		ret = 0;
 	}
 
-	if (ret || unlikely(list_empty(&bo->ddestroy))) {
+	if (ret) {
 		if (unlock_resv)
 			dma_resv_unlock(bo->base.resv);
 		spin_unlock(&bo->bdev->lru_lock);
 		return ret;
 	}
 
-	list_del_init(&bo->ddestroy);
 	spin_unlock(&bo->bdev->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
@@ -300,47 +299,21 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 }
 
 /*
- * Traverse the delayed list, and call ttm_bo_cleanup_refs on all
- * encountered buffers.
+ * Block for the dma_resv object to become idle, lock the buffer and clean up
+ * the resource and tt object.
  */
-bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
+static void ttm_bo_delayed_delete(struct work_struct *work)
 {
-	struct list_head removed;
-	bool empty;
-
-	INIT_LIST_HEAD(&removed);
-
-	spin_lock(&bdev->lru_lock);
-	while (!list_empty(&bdev->ddestroy)) {
-		struct ttm_buffer_object *bo;
-
-		bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
-				      ddestroy);
-		list_move_tail(&bo->ddestroy, &removed);
-		if (!ttm_bo_get_unless_zero(bo))
-			continue;
-
-		if (remove_all || bo->base.resv != &bo->base._resv) {
-			spin_unlock(&bdev->lru_lock);
-			dma_resv_lock(bo->base.resv, NULL);
-
-			spin_lock(&bdev->lru_lock);
-			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
-
-		} else if (dma_resv_trylock(bo->base.resv)) {
-			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
-		} else {
-			spin_unlock(&bdev->lru_lock);
-		}
+	struct ttm_buffer_object *bo;
 
-		ttm_bo_put(bo);
-		spin_lock(&bdev->lru_lock);
-	}
-	list_splice_tail(&removed, &bdev->ddestroy);
-	empty = list_empty(&bdev->ddestroy);
-	spin_unlock(&bdev->lru_lock);
+	bo = container_of(work, typeof(*bo), delayed_delete);
 
-	return empty;
+	dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
+			      MAX_SCHEDULE_TIMEOUT);
+	dma_resv_lock(bo->base.resv, NULL);
+	ttm_bo_cleanup_memtype_use(bo);
+	dma_resv_unlock(bo->base.resv);
+	ttm_bo_put(bo);
 }
 
 static void ttm_bo_release(struct kref *kref)
@@ -369,44 +342,40 @@ static void ttm_bo_release(struct kref *kref)
 
 		drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
 		ttm_mem_io_free(bdev, bo->resource);
-	}
-
-	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) ||
-	    !dma_resv_trylock(bo->base.resv)) {
-		/* The BO is not idle, resurrect it for delayed destroy */
-		ttm_bo_flush_all_fences(bo);
-		bo->deleted = true;
 
-		spin_lock(&bo->bdev->lru_lock);
+		if (!dma_resv_test_signaled(bo->base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP) ||
+		    !dma_resv_trylock(bo->base.resv)) {
+			/* The BO is not idle, resurrect it for delayed destroy */
+			ttm_bo_flush_all_fences(bo);
+			bo->deleted = true;
 
-		/*
-		 * Make pinned bos immediately available to
-		 * shrinkers, now that they are queued for
-		 * destruction.
-		 *
-		 * FIXME: QXL is triggering this. Can be removed when the
-		 * driver is fixed.
-		 */
-		if (bo->pin_count) {
-			bo->pin_count = 0;
-			ttm_resource_move_to_lru_tail(bo->resource);
-		}
+			spin_lock(&bo->bdev->lru_lock);
 
-		kref_init(&bo->kref);
-		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-		spin_unlock(&bo->bdev->lru_lock);
+			/*
+			 * Make pinned bos immediately available to
+			 * shrinkers, now that they are queued for
+			 * destruction.
+			 *
+			 * FIXME: QXL is triggering this. Can be removed when the
+			 * driver is fixed.
+			 */
+			if (bo->pin_count) {
+				bo->pin_count = 0;
+				ttm_resource_move_to_lru_tail(bo->resource);
+			}
 
-		schedule_delayed_work(&bdev->wq,
-				      ((HZ / 100) < 1) ? 1 : HZ / 100);
-		return;
-	}
+			kref_init(&bo->kref);
+			spin_unlock(&bo->bdev->lru_lock);
 
-	spin_lock(&bo->bdev->lru_lock);
-	list_del(&bo->ddestroy);
-	spin_unlock(&bo->bdev->lru_lock);
+			INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete);
+			queue_work(bdev->wq, &bo->delayed_delete);
+			return;
+		}
 
-	ttm_bo_cleanup_memtype_use(bo);
-	dma_resv_unlock(bo->base.resv);
+		ttm_bo_cleanup_memtype_use(bo);
+		dma_resv_unlock(bo->base.resv);
+	}
 
 	atomic_dec(&ttm_glob.bo_count);
 	bo->destroy(bo);
@@ -946,7 +915,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo,
 	int ret;
 
 	kref_init(&bo->kref);
-	INIT_LIST_HEAD(&bo->ddestroy);
 	bo->bdev = bdev;
 	bo->type = type;
 	bo->page_alignment = alignment;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ba3aa0a0fc43..ae4b7922ee1a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -230,7 +230,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	 */
 
 	atomic_inc(&ttm_glob.bo_count);
-	INIT_LIST_HEAD(&fbo->base.ddestroy);
 	drm_vma_node_reset(&fbo->base.base.vma_node);
 
 	kref_init(&fbo->base.kref);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index e7147e304637..e9bedca4dfdc 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -175,16 +175,6 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 }
 EXPORT_SYMBOL(ttm_device_swapout);
 
-static void ttm_device_delayed_workqueue(struct work_struct *work)
-{
-	struct ttm_device *bdev =
-		container_of(work, struct ttm_device, wq.work);
-
-	if (!ttm_bo_delayed_delete(bdev, false))
-		schedule_delayed_work(&bdev->wq,
-				      ((HZ / 100) < 1) ? 1 : HZ / 100);
-}
-
 /**
  * ttm_device_init
  *
@@ -215,15 +205,19 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 	if (ret)
 		return ret;
 
+	bdev->wq = alloc_workqueue("ttm", WQ_MEM_RECLAIM | WQ_HIGHPRI, 16);
+	if (!bdev->wq) {
+		ttm_global_release();
+		return -ENOMEM;
+	}
+
 	bdev->funcs = funcs;
 
 	ttm_sys_man_init(bdev);
 	ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
 
 	bdev->vma_manager = vma_manager;
-	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
 	spin_lock_init(&bdev->lru_lock);
-	INIT_LIST_HEAD(&bdev->ddestroy);
 	INIT_LIST_HEAD(&bdev->pinned);
 	bdev->dev_mapping = mapping;
 	mutex_lock(&ttm_global_mutex);
@@ -247,10 +241,8 @@ void ttm_device_fini(struct ttm_device *bdev)
 	list_del(&bdev->device_list);
 	mutex_unlock(&ttm_global_mutex);
 
-	cancel_delayed_work_sync(&bdev->wq);
-
-	if (ttm_bo_delayed_delete(bdev, true))
-		pr_debug("Delayed destroy list was clean\n");
+	drain_workqueue(bdev->wq);
+	destroy_workqueue(bdev->wq);
 
 	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 7758347c461c..69e62bbb01e3 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -92,7 +92,6 @@ struct ttm_tt;
  * @ttm: TTM structure holding system pages.
  * @evicted: Whether the object was evicted without user-space knowing.
  * @deleted: True if the object is only a zombie and already deleted.
- * @ddestroy: List head for the delayed destroy list.
  * @swap: List head for swap LRU list.
  * @offset: The current GPU offset, which can have different meanings
  * depending on the memory type. For SYSTEM type memory, it should be 0.
@@ -135,19 +134,14 @@ struct ttm_buffer_object {
 	struct ttm_tt *ttm;
 	bool deleted;
 	struct ttm_lru_bulk_move *bulk_move;
+	unsigned priority;
+	unsigned pin_count;
 
 	/**
-	 * Members protected by the bdev::lru_lock.
-	 */
-
-	struct list_head ddestroy;
-
-	/**
-	 * Members protected by a bo reservation.
+	 * @delayed_delete: Work item used when we can't delete the BO
+	 * immediately
 	 */
-
-	unsigned priority;
-	unsigned pin_count;
+	struct work_struct delayed_delete;
 
 	/**
 	 * Special members that are protected by the reserve lock
@@ -448,8 +442,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma);
 
 int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
 		     void *buf, int len, int write);
-bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
-
 vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
 
 #endif
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 95b3c04b1ab9..4f3e81eac6f3 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -251,11 +251,6 @@ struct ttm_device {
 	 */
 	spinlock_t lru_lock;
 
-	/**
-	 * @ddestroy: Destroyed but not yet cleaned up buffer objects.
-	 */
-	struct list_head ddestroy;
-
 	/**
 	 * @pinned: Buffer objects which are pinned and so not on any LRU list.
 	 */
@@ -270,7 +265,7 @@ struct ttm_device {
 	/**
 	 * @wq: Work queue structure for the delayed delete workqueue.
 	 */
-	struct delayed_work wq;
+	struct workqueue_struct *wq;
 };
 
 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
-- 
2.34.1


  parent reply	other threads:[~2022-11-25 10:22 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-25 10:21 [PATCH 1/9] drm/amdgpu: generally allow over-commit during BO allocation Christian König
2022-11-25 10:21 ` [Intel-gfx] " Christian König
2022-11-25 10:21 ` [PATCH 2/9] drm/ttm: remove ttm_bo_(un)lock_delayed_workqueue Christian König
2022-11-25 10:21   ` [Intel-gfx] " Christian König
2022-11-25 10:21 ` Christian König [this message]
2022-11-25 10:21   ` [Intel-gfx] [PATCH 3/9] drm/ttm: use per BO cleanup workers Christian König
2022-11-29 21:14   ` Felix Kuehling
2022-11-29 21:14     ` [Intel-gfx] " Felix Kuehling
2022-12-05 13:39     ` Christian König
2022-12-05 13:39       ` [Intel-gfx] " Christian König
2023-06-13 13:05       ` Karol Herbst
2023-06-13 13:05         ` [Intel-gfx] " Karol Herbst
2023-06-13 13:59         ` Christian König
2023-06-13 13:59           ` [Intel-gfx] " Christian König
2023-06-13 14:18           ` Karol Herbst
2023-06-13 14:18             ` [Intel-gfx] " Karol Herbst
2023-06-15 11:19             ` Christian König
2023-06-15 11:19               ` [Intel-gfx] " Christian König
2023-06-15 12:04               ` Karol Herbst
2023-06-15 12:04                 ` [Intel-gfx] " Karol Herbst
2022-11-25 10:21 ` [PATCH 4/9] drm/ttm: merge ttm_bo_api.h and ttm_bo_driver.h Christian König
2022-11-25 10:21   ` [Intel-gfx] " Christian König
2022-11-25 12:43   ` kernel test robot
2022-11-25 12:43     ` [Intel-gfx] " kernel test robot
2022-11-25 21:19   ` kernel test robot
2022-11-25 21:19     ` [Intel-gfx] " kernel test robot
2022-11-25 10:21 ` [PATCH 5/9] drm/nouveau: stop using ttm_bo_wait Christian König
2022-11-25 10:21   ` [Intel-gfx] " Christian König
2023-01-11  9:52   ` [Nouveau] " Christian König
2023-01-18  9:04     ` Christian König
2023-01-18  9:42       ` Christian König
2023-01-18 13:01       ` Karol Herbst
2023-01-18 14:15         ` Christian König
2023-01-18 15:44           ` Danilo Krummrich
2022-11-25 10:21 ` [PATCH 6/9] drm/qxl: " Christian König
2022-11-25 10:21   ` [Intel-gfx] " Christian König
2022-12-15 14:19   ` Christian König
2022-12-15 14:19     ` [Intel-gfx] " Christian König
2022-12-15 20:09     ` Dave Airlie
2022-12-15 20:09       ` Dave Airlie
2022-12-15 20:09       ` [Intel-gfx] " Dave Airlie
2022-11-25 10:21 ` [PATCH 7/9] drm/i915: " Christian König
2022-11-25 10:21   ` [Intel-gfx] " Christian König
2022-11-25 11:14   ` Tvrtko Ursulin
2022-11-25 12:46     ` Christian König
2022-11-29 18:05     ` Matthew Auld
2022-11-30 13:02       ` Tvrtko Ursulin
2022-11-30 14:06         ` Daniel Vetter
2022-11-30 14:06           ` Daniel Vetter
2022-12-05 19:58           ` Christian König
2022-12-05 19:58             ` Christian König
2022-12-06 18:03             ` Matthew Auld
2022-12-06 18:03               ` Matthew Auld
2022-12-06 18:06               ` Christian König
2022-12-06 18:06                 ` Christian König
2022-11-25 10:21 ` [PATCH 8/9] drm/ttm: use ttm_bo_wait_ctx instead of ttm_bo_wait Christian König
2022-11-25 10:21   ` [Intel-gfx] " Christian König
2022-11-25 10:21 ` [PATCH 9/9] drm/ttm: move ttm_bo_wait into VMWGFX Christian König
2022-11-25 10:21   ` [Intel-gfx] " Christian König
2022-11-25 11:17 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/amdgpu: generally allow over-commit during BO allocation Patchwork
2022-11-25 11:18 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2022-11-25 11:18 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2022-11-25 11:40 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-11-25 18:18 ` [PATCH 1/9] " Alex Deucher
2022-11-25 18:18   ` [Intel-gfx] " Alex Deucher
2022-12-05 13:41   ` Christian König
2022-12-05 13:41     ` [Intel-gfx] " Christian König
2022-11-28  6:00 ` Arunpravin Paneer Selvam
2022-11-28  6:00   ` [Intel-gfx] " Arunpravin Paneer Selvam
2022-12-10  6:15 ` Felix Kuehling
2022-12-10  6:15   ` [Intel-gfx] " Felix Kuehling
2022-12-10 14:12   ` Christian König
2022-12-10 14:12     ` [Intel-gfx] " Christian König
2022-12-11  1:13     ` Felix Kuehling
2022-12-11  1:13       ` [Intel-gfx] " Felix Kuehling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221125102137.1801-3-christian.koenig@amd.com \
    --to=ckoenig.leichtzumerken@gmail.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.