All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
@ 2022-03-21 13:25 Christian König
  2022-03-21 13:25 ` [PATCH 2/6] drm/ttm: add resource iterator v4 Christian König
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Christian König @ 2022-03-21 13:25 UTC (permalink / raw)
  To: matthew.william.auld, felix.kuehling, daniel, dri-devel
  Cc: Christian König

This way we finally fix the problem that new resource are
not immediately evict-able after allocation.

That has caused numerous problems including OOM on GDS handling
and not being able to use TTM as general resource manager.

v2: stop assuming in ttm_resource_fini that res->bo is still valid.
v3: cleanup kerneldoc, add more lockdep annotation
v4: consistently use res->num_pages

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   8 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c            | 115 ++--------------------
 drivers/gpu/drm/ttm/ttm_bo_util.c       |   1 -
 drivers/gpu/drm/ttm/ttm_device.c        |  64 ++++++------
 drivers/gpu/drm/ttm/ttm_resource.c      | 124 ++++++++++++++++++++++--
 include/drm/ttm/ttm_bo_api.h            |  16 ---
 include/drm/ttm/ttm_bo_driver.h         |  29 +-----
 include/drm/ttm/ttm_resource.h          |  35 +++++++
 9 files changed, 198 insertions(+), 196 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b37fc7d7d2c7..f2ce5a0defd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -683,12 +683,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 
 	if (vm->bulk_moveable) {
 		spin_lock(&adev->mman.bdev.lru_lock);
-		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
+		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
 		spin_unlock(&adev->mman.bdev.lru_lock);
 		return;
 	}
 
-	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
+	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
 
 	spin_lock(&adev->mman.bdev.lru_lock);
 	list_for_each_entry(bo_base, &vm->idle, vm_status) {
@@ -698,11 +698,9 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 		if (!bo->parent)
 			continue;
 
-		ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource,
-					&vm->lru_bulk_move);
+		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
 		if (shadow)
 			ttm_bo_move_to_lru_tail(&shadow->tbo,
-						shadow->tbo.resource,
 						&vm->lru_bulk_move);
 	}
 	spin_unlock(&adev->mman.bdev.lru_lock);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index de3fe79b665a..582e8dc9bc8c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -849,7 +849,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 			bo->priority = I915_TTM_PRIO_NO_PAGES;
 	}
 
-	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
+	ttm_bo_move_to_lru_tail(bo, NULL);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index db3dc7ef5382..cb0fa932d495 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -69,108 +69,16 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
 	}
 }
 
-static inline void ttm_bo_move_to_pinned(struct ttm_buffer_object *bo)
-{
-	struct ttm_device *bdev = bo->bdev;
-
-	list_move_tail(&bo->lru, &bdev->pinned);
-
-	if (bdev->funcs->del_from_lru_notify)
-		bdev->funcs->del_from_lru_notify(bo);
-}
-
-static inline void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
-{
-	struct ttm_device *bdev = bo->bdev;
-
-	list_del_init(&bo->lru);
-
-	if (bdev->funcs->del_from_lru_notify)
-		bdev->funcs->del_from_lru_notify(bo);
-}
-
-static void ttm_bo_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
-				     struct ttm_buffer_object *bo)
-{
-	if (!pos->first)
-		pos->first = bo;
-	pos->last = bo;
-}
-
 void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
-			     struct ttm_resource *mem,
 			     struct ttm_lru_bulk_move *bulk)
 {
-	struct ttm_device *bdev = bo->bdev;
-	struct ttm_resource_manager *man;
-
-	if (!bo->deleted)
-		dma_resv_assert_held(bo->base.resv);
-
-	if (bo->pin_count) {
-		ttm_bo_move_to_pinned(bo);
-		return;
-	}
-
-	if (!mem)
-		return;
-
-	man = ttm_manager_type(bdev, mem->mem_type);
-	list_move_tail(&bo->lru, &man->lru[bo->priority]);
-
-	if (bdev->funcs->del_from_lru_notify)
-		bdev->funcs->del_from_lru_notify(bo);
-
-	if (bulk && !bo->pin_count) {
-		switch (bo->resource->mem_type) {
-		case TTM_PL_TT:
-			ttm_bo_bulk_move_set_pos(&bulk->tt[bo->priority], bo);
-			break;
+	dma_resv_assert_held(bo->base.resv);
 
-		case TTM_PL_VRAM:
-			ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
-			break;
-		}
-	}
+	if (bo->resource)
+		ttm_resource_move_to_lru_tail(bo->resource, bulk);
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 
-void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
-{
-	unsigned i;
-
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
-		struct ttm_resource_manager *man;
-
-		if (!pos->first)
-			continue;
-
-		dma_resv_assert_held(pos->first->base.resv);
-		dma_resv_assert_held(pos->last->base.resv);
-
-		man = ttm_manager_type(pos->first->bdev, TTM_PL_TT);
-		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
-				    &pos->last->lru);
-	}
-
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
-		struct ttm_resource_manager *man;
-
-		if (!pos->first)
-			continue;
-
-		dma_resv_assert_held(pos->first->base.resv);
-		dma_resv_assert_held(pos->last->base.resv);
-
-		man = ttm_manager_type(pos->first->bdev, TTM_PL_VRAM);
-		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
-				    &pos->last->lru);
-	}
-}
-EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
-
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 				  struct ttm_resource *mem, bool evict,
 				  struct ttm_operation_ctx *ctx,
@@ -344,7 +252,6 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 		return ret;
 	}
 
-	ttm_bo_move_to_pinned(bo);
 	list_del_init(&bo->ddestroy);
 	spin_unlock(&bo->bdev->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
@@ -445,7 +352,7 @@ static void ttm_bo_release(struct kref *kref)
 		 */
 		if (bo->pin_count) {
 			bo->pin_count = 0;
-			ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
+			ttm_resource_move_to_lru_tail(bo->resource, NULL);
 		}
 
 		kref_init(&bo->kref);
@@ -458,7 +365,6 @@ static void ttm_bo_release(struct kref *kref)
 	}
 
 	spin_lock(&bo->bdev->lru_lock);
-	ttm_bo_del_from_lru(bo);
 	list_del(&bo->ddestroy);
 	spin_unlock(&bo->bdev->lru_lock);
 
@@ -673,15 +579,17 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 			struct ww_acquire_ctx *ticket)
 {
 	struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
+	struct ttm_resource *res;
 	bool locked = false;
 	unsigned i;
 	int ret;
 
 	spin_lock(&bdev->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(bo, &man->lru[i], lru) {
+		list_for_each_entry(res, &man->lru[i], lru) {
 			bool busy;
 
+			bo = res->bo;
 			if (!ttm_bo_evict_swapout_allowable(bo, ctx, place,
 							    &locked, &busy)) {
 				if (busy && !busy_bo && ticket !=
@@ -699,7 +607,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 		}
 
 		/* If the inner loop terminated early, we have our candidate */
-		if (&bo->lru != &man->lru[i])
+		if (&res->lru != &man->lru[i])
 			break;
 
 		bo = NULL;
@@ -875,9 +783,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	}
 
 error:
-	if (bo->resource->mem_type == TTM_PL_SYSTEM && !bo->pin_count)
-		ttm_bo_move_to_lru_tail_unlocked(bo);
-
 	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_mem_space);
@@ -971,7 +876,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
 	bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
 
 	kref_init(&bo->kref);
-	INIT_LIST_HEAD(&bo->lru);
 	INIT_LIST_HEAD(&bo->ddestroy);
 	bo->bdev = bdev;
 	bo->type = type;
@@ -1021,8 +925,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
 		return ret;
 	}
 
-	ttm_bo_move_to_lru_tail_unlocked(bo);
-
 	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_init_reserved);
@@ -1123,7 +1025,6 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 		return ret == -EBUSY ? -ENOSPC : ret;
 	}
 
-	ttm_bo_move_to_pinned(bo);
 	/* TODO: Cleanup the locking */
 	spin_unlock(&bo->bdev->lru_lock);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 544a84fa6589..0163e92b57af 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -231,7 +231,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 
 	atomic_inc(&ttm_glob.bo_count);
 	INIT_LIST_HEAD(&fbo->base.ddestroy);
-	INIT_LIST_HEAD(&fbo->base.lru);
 	fbo->base.moving = NULL;
 	drm_vma_node_reset(&fbo->base.base.vma_node);
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index be24bb6cefd0..ba35887147ba 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -144,6 +144,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 {
 	struct ttm_resource_manager *man;
 	struct ttm_buffer_object *bo;
+	struct ttm_resource *res;
 	unsigned i, j;
 	int ret;
 
@@ -154,8 +155,11 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			continue;
 
 		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
-			list_for_each_entry(bo, &man->lru[j], lru) {
-				uint32_t num_pages = PFN_UP(bo->base.size);
+			list_for_each_entry(res, &man->lru[j], lru) {
+				uint32_t num_pages;
+
+				bo = res->bo;
+				num_pages = PFN_UP(bo->base.size);
 
 				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
 				/* ttm_bo_swapout has dropped the lru_lock */
@@ -259,49 +263,45 @@ void ttm_device_fini(struct ttm_device *bdev)
 }
 EXPORT_SYMBOL(ttm_device_fini);
 
-void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
+static void ttm_device_clear_lru_dma_mappings(struct ttm_device *bdev,
+					      struct list_head *list)
 {
-	struct ttm_resource_manager *man;
-	struct ttm_buffer_object *bo;
-	unsigned int i, j;
+	struct ttm_resource *res;
 
 	spin_lock(&bdev->lru_lock);
-	while (!list_empty(&bdev->pinned)) {
-		bo = list_first_entry(&bdev->pinned, struct ttm_buffer_object, lru);
+	while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
+		struct ttm_buffer_object *bo = res->bo;
+
 		/* Take ref against racing releases once lru_lock is unlocked */
-		if (ttm_bo_get_unless_zero(bo)) {
-			list_del_init(&bo->lru);
-			spin_unlock(&bdev->lru_lock);
+		if (!ttm_bo_get_unless_zero(bo))
+			continue;
 
-			if (bo->ttm)
-				ttm_tt_unpopulate(bo->bdev, bo->ttm);
+		list_del_init(&res->lru);
+		spin_unlock(&bdev->lru_lock);
 
-			ttm_bo_put(bo);
-			spin_lock(&bdev->lru_lock);
-		}
+		if (bo->ttm)
+			ttm_tt_unpopulate(bo->bdev, bo->ttm);
+
+		ttm_bo_put(bo);
+		spin_lock(&bdev->lru_lock);
 	}
+	spin_unlock(&bdev->lru_lock);
+}
+
+void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
+{
+	struct ttm_resource_manager *man;
+	unsigned int i, j;
+
+	ttm_device_clear_lru_dma_mappings(bdev, &bdev->pinned);
 
 	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
 		man = ttm_manager_type(bdev, i);
 		if (!man || !man->use_tt)
 			continue;
 
-		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
-			while (!list_empty(&man->lru[j])) {
-				bo = list_first_entry(&man->lru[j], struct ttm_buffer_object, lru);
-				if (ttm_bo_get_unless_zero(bo)) {
-					list_del_init(&bo->lru);
-					spin_unlock(&bdev->lru_lock);
-
-					if (bo->ttm)
-						ttm_tt_unpopulate(bo->bdev, bo->ttm);
-
-					ttm_bo_put(bo);
-					spin_lock(&bdev->lru_lock);
-				}
-			}
-		}
+		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j)
+			ttm_device_clear_lru_dma_mappings(bdev, &man->lru[j]);
 	}
-	spin_unlock(&bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_device_clear_dma_mappings);
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index cbd47a104962..8c253b6de6cc 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -29,6 +29,110 @@
 #include <drm/ttm/ttm_resource.h>
 #include <drm/ttm/ttm_bo_driver.h>
 
+/**
+ * ttm_lru_bulk_move_init - initialize a bulk move structure
+ * @bulk: the structure to init
+ *
+ * For now just memset the structure to zero.
+ */
+void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
+{
+	memset(bulk, 0, sizeof(*bulk));
+}
+EXPORT_SYMBOL(ttm_lru_bulk_move_init);
+
+/**
+ * ttm_lru_bulk_move_tail - bulk move range of resources to the LRU tail.
+ *
+ * @bulk: bulk move structure
+ *
+ * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
+ * resource order never changes. Should be called with &ttm_device.lru_lock held.
+ */
+void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
+{
+	unsigned i;
+
+	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
+		struct ttm_resource_manager *man;
+
+		if (!pos->first)
+			continue;
+
+		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
+		dma_resv_assert_held(pos->first->bo->base.resv);
+		dma_resv_assert_held(pos->last->bo->base.resv);
+
+		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_TT);
+		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
+				    &pos->last->lru);
+	}
+
+	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
+		struct ttm_resource_manager *man;
+
+		if (!pos->first)
+			continue;
+
+		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
+		dma_resv_assert_held(pos->first->bo->base.resv);
+		dma_resv_assert_held(pos->last->bo->base.resv);
+
+		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_VRAM);
+		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
+				    &pos->last->lru);
+	}
+}
+EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
+
+/* Record a resource position in a bulk move structure */
+static void ttm_lru_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
+				      struct ttm_resource *res)
+{
+	if (!pos->first)
+		pos->first = res;
+	pos->last = res;
+}
+
+/* Move a resource to the LRU tail and track the bulk position */
+void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
+				   struct ttm_lru_bulk_move *bulk)
+{
+	struct ttm_buffer_object *bo = res->bo;
+	struct ttm_device *bdev = bo->bdev;
+	struct ttm_resource_manager *man;
+
+	lockdep_assert_held(&bo->bdev->lru_lock);
+
+	if (bo->pin_count) {
+		list_move_tail(&res->lru, &bdev->pinned);
+		if (bdev->funcs->del_from_lru_notify)
+			bdev->funcs->del_from_lru_notify(res->bo);
+		return;
+	}
+
+	man = ttm_manager_type(bdev, res->mem_type);
+	list_move_tail(&res->lru, &man->lru[bo->priority]);
+
+	if (bdev->funcs->del_from_lru_notify)
+		bdev->funcs->del_from_lru_notify(bo);
+
+	if (!bulk)
+		return;
+
+	switch (res->mem_type) {
+	case TTM_PL_TT:
+		ttm_lru_bulk_move_set_pos(&bulk->tt[bo->priority], res);
+		break;
+
+	case TTM_PL_VRAM:
+		ttm_lru_bulk_move_set_pos(&bulk->vram[bo->priority], res);
+		break;
+	}
+}
+
 /**
  * ttm_resource_init - resource object constructure
  * @bo: buffer object this resources is allocated for
@@ -52,10 +156,12 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
 	res->bus.is_iomem = false;
 	res->bus.caching = ttm_cached;
 	res->bo = bo;
+	INIT_LIST_HEAD(&res->lru);
 
 	man = ttm_manager_type(bo->bdev, place->mem_type);
 	spin_lock(&bo->bdev->lru_lock);
-	man->usage += bo->base.size;
+	man->usage += res->num_pages << PAGE_SHIFT;
+	ttm_resource_move_to_lru_tail(res, NULL);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_resource_init);
@@ -66,15 +172,21 @@ EXPORT_SYMBOL(ttm_resource_init);
  * @res: the resource to clean up
  *
  * Should be used by resource manager backends to clean up the TTM resource
- * objects before freeing the underlying structure. Counterpart of
- * &ttm_resource_init
+ * objects before freeing the underlying structure. Makes sure the resource is
+ * removed from the LRU before destruction.
+ * Counterpart of &ttm_resource_init.
  */
 void ttm_resource_fini(struct ttm_resource_manager *man,
 		       struct ttm_resource *res)
 {
-	spin_lock(&man->bdev->lru_lock);
-	man->usage -= res->bo->base.size;
-	spin_unlock(&man->bdev->lru_lock);
+	struct ttm_device *bdev = man->bdev;
+
+	spin_lock(&bdev->lru_lock);
+	list_del_init(&res->lru);
+	if (res->bo && bdev->funcs->del_from_lru_notify)
+		bdev->funcs->del_from_lru_notify(res->bo);
+	man->usage -= res->num_pages << PAGE_SHIFT;
+	spin_unlock(&bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_resource_fini);
 
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index c17b2df9178b..3da77fc54552 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -55,8 +55,6 @@ struct ttm_placement;
 
 struct ttm_place;
 
-struct ttm_lru_bulk_move;
-
 /**
  * enum ttm_bo_type
  *
@@ -94,7 +92,6 @@ struct ttm_tt;
  * @ttm: TTM structure holding system pages.
  * @evicted: Whether the object was evicted without user-space knowing.
  * @deleted: True if the object is only a zombie and already deleted.
- * @lru: List head for the lru list.
  * @ddestroy: List head for the delayed destroy list.
  * @swap: List head for swap LRU list.
  * @moving: Fence set when BO is moving
@@ -143,7 +140,6 @@ struct ttm_buffer_object {
 	 * Members protected by the bdev::lru_lock.
 	 */
 
-	struct list_head lru;
 	struct list_head ddestroy;
 
 	/**
@@ -295,7 +291,6 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
  * ttm_bo_move_to_lru_tail
  *
  * @bo: The buffer object.
- * @mem: Resource object.
  * @bulk: optional bulk move structure to remember BO positions
  *
  * Move this BO to the tail of all lru lists used to lookup and reserve an
@@ -303,19 +298,8 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
  * held, and is used to make a BO less likely to be considered for eviction.
  */
 void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
-			     struct ttm_resource *mem,
 			     struct ttm_lru_bulk_move *bulk);
 
-/**
- * ttm_bo_bulk_move_lru_tail
- *
- * @bulk: bulk move structure
- *
- * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
- * BO order never changes. Should be called with ttm_global::lru_lock held.
- */
-void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk);
-
 /**
  * ttm_bo_lock_delayed_workqueue
  *
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 5f087575194b..6c7352e13708 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -45,33 +45,6 @@
 #include "ttm_tt.h"
 #include "ttm_pool.h"
 
-/**
- * struct ttm_lru_bulk_move_pos
- *
- * @first: first BO in the bulk move range
- * @last: last BO in the bulk move range
- *
- * Positions for a lru bulk move.
- */
-struct ttm_lru_bulk_move_pos {
-	struct ttm_buffer_object *first;
-	struct ttm_buffer_object *last;
-};
-
-/**
- * struct ttm_lru_bulk_move
- *
- * @tt: first/last lru entry for BOs in the TT domain
- * @vram: first/last lru entry for BOs in the VRAM domain
- * @swap: first/last lru entry for BOs on the swap list
- *
- * Helper structure for bulk moves on the LRU list.
- */
-struct ttm_lru_bulk_move {
-	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
-	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
-};
-
 /*
  * ttm_bo.c
  */
@@ -182,7 +155,7 @@ static inline void
 ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
 {
 	spin_lock(&bo->bdev->lru_lock);
-	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
+	ttm_bo_move_to_lru_tail(bo, NULL);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 323c14a30c6b..181e82e3d806 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -26,10 +26,12 @@
 #define _TTM_RESOURCE_H_
 
 #include <linux/types.h>
+#include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 #include <linux/dma-buf-map.h>
 #include <linux/dma-fence.h>
+
 #include <drm/drm_print.h>
 #include <drm/ttm/ttm_caching.h>
 #include <drm/ttm/ttm_kmap_iter.h>
@@ -179,6 +181,33 @@ struct ttm_resource {
 	uint32_t placement;
 	struct ttm_bus_placement bus;
 	struct ttm_buffer_object *bo;
+	struct list_head lru;
+};
+
+/**
+ * struct ttm_lru_bulk_move_pos
+ *
+ * @first: first res in the bulk move range
+ * @last: last res in the bulk move range
+ *
+ * Positions for a lru bulk move.
+ */
+struct ttm_lru_bulk_move_pos {
+	struct ttm_resource *first;
+	struct ttm_resource *last;
+};
+
+/**
+ * struct ttm_lru_bulk_move
+ *
+ * @tt: first/last lru entry for resources in the TT domain
+ * @vram: first/last lru entry for resources in the VRAM domain
+ *
+ * Helper structure for bulk moves on the LRU list.
+ */
+struct ttm_lru_bulk_move {
+	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
+	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
 };
 
 /**
@@ -267,6 +296,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
 	man->move = NULL;
 }
 
+void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
+void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
+
+void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
+				   struct ttm_lru_bulk_move *bulk);
+
 void ttm_resource_init(struct ttm_buffer_object *bo,
                        const struct ttm_place *place,
                        struct ttm_resource *res);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/6] drm/ttm: add resource iterator v4
  2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
@ 2022-03-21 13:25 ` Christian König
  2022-03-21 13:25 ` [PATCH 3/6] drm/ttm: allow bulk moves for all domains Christian König
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2022-03-21 13:25 UTC (permalink / raw)
  To: matthew.william.auld, felix.kuehling, daniel, dri-devel
  Cc: Daniel Vetter, Felix Kuehling, Christian König

Instead of duplicating that at different places add an iterator over all
the resources in a resource manager.

v2: add lockdep annotation and kerneldoc
v3: fix various bugs pointed out by Felix
v4: simplify the code a bit more

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> (v2)
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c       | 37 ++++++++--------------
 drivers/gpu/drm/ttm/ttm_device.c   | 26 +++++++--------
 drivers/gpu/drm/ttm/ttm_resource.c | 51 ++++++++++++++++++++++++++++++
 include/drm/ttm/ttm_resource.h     | 23 ++++++++++++++
 4 files changed, 99 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cb0fa932d495..b119af33e7d7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -579,38 +579,29 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 			struct ww_acquire_ctx *ticket)
 {
 	struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
+	struct ttm_resource_cursor cursor;
 	struct ttm_resource *res;
 	bool locked = false;
-	unsigned i;
 	int ret;
 
 	spin_lock(&bdev->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		list_for_each_entry(res, &man->lru[i], lru) {
-			bool busy;
+	ttm_resource_manager_for_each_res(man, &cursor, res) {
+		bool busy;
+
+		if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place,
+						    &locked, &busy)) {
+			if (busy && !busy_bo && ticket !=
+			    dma_resv_locking_ctx(res->bo->base.resv))
+				busy_bo = res->bo;
+			continue;
+		}
 
+		if (ttm_bo_get_unless_zero(res->bo)) {
 			bo = res->bo;
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, place,
-							    &locked, &busy)) {
-				if (busy && !busy_bo && ticket !=
-				    dma_resv_locking_ctx(bo->base.resv))
-					busy_bo = bo;
-				continue;
-			}
-
-			if (!ttm_bo_get_unless_zero(bo)) {
-				if (locked)
-					dma_resv_unlock(bo->base.resv);
-				continue;
-			}
 			break;
 		}
-
-		/* If the inner loop terminated early, we have our candidate */
-		if (&res->lru != &man->lru[i])
-			break;
-
-		bo = NULL;
+		if (locked)
+			dma_resv_unlock(res->bo->base.resv);
 	}
 
 	if (!bo) {
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index ba35887147ba..a0562ab386f5 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -142,10 +142,10 @@ EXPORT_SYMBOL(ttm_global_swapout);
 int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 		       gfp_t gfp_flags)
 {
+	struct ttm_resource_cursor cursor;
 	struct ttm_resource_manager *man;
-	struct ttm_buffer_object *bo;
 	struct ttm_resource *res;
-	unsigned i, j;
+	unsigned i;
 	int ret;
 
 	spin_lock(&bdev->lru_lock);
@@ -154,20 +154,16 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 		if (!man || !man->use_tt)
 			continue;
 
-		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
-			list_for_each_entry(res, &man->lru[j], lru) {
-				uint32_t num_pages;
-
-				bo = res->bo;
-				num_pages = PFN_UP(bo->base.size);
+		ttm_resource_manager_for_each_res(man, &cursor, res) {
+			struct ttm_buffer_object *bo = res->bo;
+			uint32_t num_pages = PFN_UP(bo->base.size);
 
-				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
-				/* ttm_bo_swapout has dropped the lru_lock */
-				if (!ret)
-					return num_pages;
-				if (ret != -EBUSY)
-					return ret;
-			}
+			ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+			/* ttm_bo_swapout has dropped the lru_lock */
+			if (!ret)
+				return num_pages;
+			if (ret != -EBUSY)
+				return ret;
 		}
 	}
 	spin_unlock(&bdev->lru_lock);
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 8c253b6de6cc..81676c3dbeee 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -385,6 +385,57 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man,
 }
 EXPORT_SYMBOL(ttm_resource_manager_debug);
 
+/**
+ * ttm_resource_manager_first
+ *
+ * @man: resource manager to iterate over
+ * @cursor: cursor to record the position
+ *
+ * Returns the first resource from the resource manager.
+ */
+struct ttm_resource *
+ttm_resource_manager_first(struct ttm_resource_manager *man,
+			   struct ttm_resource_cursor *cursor)
+{
+	struct ttm_resource *res;
+
+	lockdep_assert_held(&man->bdev->lru_lock);
+
+	for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY;
+	     ++cursor->priority)
+		list_for_each_entry(res, &man->lru[cursor->priority], lru)
+			return res;
+
+	return NULL;
+}
+
+/**
+ * ttm_resource_manager_next
+ *
+ * @man: resource manager to iterate over
+ * @cursor: cursor to record the position
+ * @res: the current resource pointer
+ *
+ * Returns the next resource from the resource manager.
+ */
+struct ttm_resource *
+ttm_resource_manager_next(struct ttm_resource_manager *man,
+			  struct ttm_resource_cursor *cursor,
+			  struct ttm_resource *res)
+{
+	lockdep_assert_held(&man->bdev->lru_lock);
+
+	list_for_each_entry_continue(res, &man->lru[cursor->priority], lru)
+		return res;
+
+	for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY;
+	     ++cursor->priority)
+		list_for_each_entry(res, &man->lru[cursor->priority], lru)
+			return res;
+
+	return NULL;
+}
+
 static void ttm_kmap_iter_iomap_map_local(struct ttm_kmap_iter *iter,
 					  struct dma_buf_map *dmap,
 					  pgoff_t i)
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 181e82e3d806..ef0ec700e896 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -184,6 +184,17 @@ struct ttm_resource {
 	struct list_head lru;
 };
 
+/**
+ * struct ttm_resource_cursor
+ *
+ * @priority: the current priority
+ *
+ * Cursor to iterate over the resources in a manager.
+ */
+struct ttm_resource_cursor {
+	unsigned int priority;
+};
+
 /**
  * struct ttm_lru_bulk_move_pos
  *
@@ -328,6 +339,18 @@ uint64_t ttm_resource_manager_usage(struct ttm_resource_manager *man);
 void ttm_resource_manager_debug(struct ttm_resource_manager *man,
 				struct drm_printer *p);
 
+struct ttm_resource *
+ttm_resource_manager_first(struct ttm_resource_manager *man,
+			   struct ttm_resource_cursor *cursor);
+struct ttm_resource *
+ttm_resource_manager_next(struct ttm_resource_manager *man,
+			  struct ttm_resource_cursor *cursor,
+			  struct ttm_resource *res);
+
+#define ttm_resource_manager_for_each_res(man, cursor, res)		\
+	for (res = ttm_resource_manager_first(man, cursor); res;	\
+	     res = ttm_resource_manager_next(man, cursor, res))
+
 struct ttm_kmap_iter *
 ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io,
 			 struct io_mapping *iomap,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/6] drm/ttm: allow bulk moves for all domains
  2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
  2022-03-21 13:25 ` [PATCH 2/6] drm/ttm: add resource iterator v4 Christian König
@ 2022-03-21 13:25 ` Christian König
  2022-03-21 13:25 ` [PATCH 4/6] drm/ttm: de-inline ttm_bo_pin/unpin Christian König
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2022-03-21 13:25 UTC (permalink / raw)
  To: matthew.william.auld, felix.kuehling, daniel, dri-devel
  Cc: Daniel Vetter, Christian König

Not just TT and VRAM.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/ttm/ttm_resource.c | 52 +++++++++---------------------
 include/drm/ttm/ttm_device.h       |  2 --
 include/drm/ttm/ttm_resource.h     |  4 +--
 3 files changed, 17 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 81676c3dbeee..25b0a23ba04b 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -51,38 +51,24 @@ EXPORT_SYMBOL(ttm_lru_bulk_move_init);
  */
 void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
 {
-	unsigned i;
-
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
-		struct ttm_resource_manager *man;
+	unsigned i, j;
 
-		if (!pos->first)
-			continue;
+	for (i = 0; i < TTM_NUM_MEM_TYPES; ++i) {
+		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
+			struct ttm_lru_bulk_move_pos *pos = &bulk->pos[i][j];
+			struct ttm_resource_manager *man;
 
-		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
-		dma_resv_assert_held(pos->first->bo->base.resv);
-		dma_resv_assert_held(pos->last->bo->base.resv);
+			if (!pos->first)
+				continue;
 
-		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_TT);
-		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
-				    &pos->last->lru);
-	}
-
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
-		struct ttm_resource_manager *man;
+			lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
+			dma_resv_assert_held(pos->first->bo->base.resv);
+			dma_resv_assert_held(pos->last->bo->base.resv);
 
-		if (!pos->first)
-			continue;
-
-		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
-		dma_resv_assert_held(pos->first->bo->base.resv);
-		dma_resv_assert_held(pos->last->bo->base.resv);
-
-		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_VRAM);
-		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
-				    &pos->last->lru);
+			man = ttm_manager_type(pos->first->bo->bdev, i);
+			list_bulk_move_tail(&man->lru[j], &pos->first->lru,
+					    &pos->last->lru);
+		}
 	}
 }
 EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
@@ -122,15 +108,7 @@ void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
 	if (!bulk)
 		return;
 
-	switch (res->mem_type) {
-	case TTM_PL_TT:
-		ttm_lru_bulk_move_set_pos(&bulk->tt[bo->priority], res);
-		break;
-
-	case TTM_PL_VRAM:
-		ttm_lru_bulk_move_set_pos(&bulk->vram[bo->priority], res);
-		break;
-	}
+	ttm_lru_bulk_move_set_pos(&bulk->pos[res->mem_type][bo->priority], res);
 }
 
 /**
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 0a4ddec78d8f..425150f35fbe 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -30,8 +30,6 @@
 #include <drm/ttm/ttm_resource.h>
 #include <drm/ttm/ttm_pool.h>
 
-#define TTM_NUM_MEM_TYPES 8
-
 struct ttm_device;
 struct ttm_placement;
 struct ttm_buffer_object;
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index ef0ec700e896..e8a64ca3cf25 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -37,6 +37,7 @@
 #include <drm/ttm/ttm_kmap_iter.h>
 
 #define TTM_MAX_BO_PRIORITY	4U
+#define TTM_NUM_MEM_TYPES 8
 
 struct ttm_device;
 struct ttm_resource_manager;
@@ -217,8 +218,7 @@ struct ttm_lru_bulk_move_pos {
  * Helper structure for bulk moves on the LRU list.
  */
 struct ttm_lru_bulk_move {
-	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
-	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
+	struct ttm_lru_bulk_move_pos pos[TTM_NUM_MEM_TYPES][TTM_MAX_BO_PRIORITY];
 };
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/6] drm/ttm: de-inline ttm_bo_pin/unpin
  2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
  2022-03-21 13:25 ` [PATCH 2/6] drm/ttm: add resource iterator v4 Christian König
  2022-03-21 13:25 ` [PATCH 3/6] drm/ttm: allow bulk moves for all domains Christian König
@ 2022-03-21 13:25 ` Christian König
  2022-03-23 10:37   ` Daniel Vetter
  2022-03-21 13:26 ` [PATCH 5/6] drm/ttm: rework bulk move handling v4 Christian König
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2022-03-21 13:25 UTC (permalink / raw)
  To: matthew.william.auld, felix.kuehling, daniel, dri-devel
  Cc: Christian König

Those functions are going to become more complex, don't inline them any
more.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 31 +++++++++++++++++++++++++++++++
 include/drm/ttm/ttm_bo_api.h | 30 ++----------------------------
 2 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b119af33e7d7..502617ee9303 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -633,6 +633,37 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 	return ret;
 }
 
+/**
+ * ttm_bo_pin - Pin the buffer object.
+ * @bo: The buffer object to pin
+ *
+ * Make sure the buffer is not evicted any more during memory pressure.
+ */
+void ttm_bo_pin(struct ttm_buffer_object *bo)
+{
+	dma_resv_assert_held(bo->base.resv);
+	WARN_ON_ONCE(!kref_read(&bo->kref));
+	++bo->pin_count;
+}
+EXPORT_SYMBOL(ttm_bo_pin);
+
+/**
+ * ttm_bo_unpin - Unpin the buffer object.
+ * @bo: The buffer object to unpin
+ *
+ * Allows the buffer object to be evicted again during memory pressure.
+ */
+void ttm_bo_unpin(struct ttm_buffer_object *bo)
+{
+	dma_resv_assert_held(bo->base.resv);
+	WARN_ON_ONCE(!kref_read(&bo->kref));
+	if (bo->pin_count)
+		--bo->pin_count;
+	else
+		WARN_ON_ONCE(true);
+}
+EXPORT_SYMBOL(ttm_bo_unpin);
+
 /*
  * Add the last move fence to the BO and reserve a new shared slot. We only use
  * a shared slot to avoid unecessary sync and rely on the subsequent bo move to
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 3da77fc54552..885b7698fd65 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -524,34 +524,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
 int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 		   gfp_t gfp_flags);
 
-/**
- * ttm_bo_pin - Pin the buffer object.
- * @bo: The buffer object to pin
- *
- * Make sure the buffer is not evicted any more during memory pressure.
- */
-static inline void ttm_bo_pin(struct ttm_buffer_object *bo)
-{
-	dma_resv_assert_held(bo->base.resv);
-	WARN_ON_ONCE(!kref_read(&bo->kref));
-	++bo->pin_count;
-}
-
-/**
- * ttm_bo_unpin - Unpin the buffer object.
- * @bo: The buffer object to unpin
- *
- * Allows the buffer object to be evicted again during memory pressure.
- */
-static inline void ttm_bo_unpin(struct ttm_buffer_object *bo)
-{
-	dma_resv_assert_held(bo->base.resv);
-	WARN_ON_ONCE(!kref_read(&bo->kref));
-	if (bo->pin_count)
-		--bo->pin_count;
-	else
-		WARN_ON_ONCE(true);
-}
+void ttm_bo_pin(struct ttm_buffer_object *bo);
+void ttm_bo_unpin(struct ttm_buffer_object *bo);
 
 int ttm_mem_evict_first(struct ttm_device *bdev,
 			struct ttm_resource_manager *man,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/6] drm/ttm: rework bulk move handling v4
  2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
                   ` (2 preceding siblings ...)
  2022-03-21 13:25 ` [PATCH 4/6] drm/ttm: de-inline ttm_bo_pin/unpin Christian König
@ 2022-03-21 13:26 ` Christian König
  2022-03-23 10:49   ` Daniel Vetter
  2022-03-21 13:26 ` [PATCH 6/6] drm/amdgpu: drop amdgpu_gtt_node Christian König
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2022-03-21 13:26 UTC (permalink / raw)
  To: matthew.william.auld, felix.kuehling, daniel, dri-devel
  Cc: Christian König

Instead of providing the bulk move structure for each LRU update set
this as property of the BO. This should avoid costly bulk move rebuilds
with some games under RADV.

v2: some name polishing, add a few more kerneldoc words.
v3: add some lockdep
v4: fix bugs, handle pin/unpin as well

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 72 +++-----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 -
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c |  2 +-
 drivers/gpu/drm/ttm/ttm_bo.c            | 59 +++++++++++++---
 drivers/gpu/drm/ttm/ttm_resource.c      | 90 ++++++++++++++++++-------
 include/drm/ttm/ttm_bo_api.h            | 16 ++---
 include/drm/ttm/ttm_bo_driver.h         |  2 +-
 include/drm/ttm/ttm_device.h            |  9 ---
 include/drm/ttm/ttm_resource.h          |  9 ++-
 10 files changed, 138 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5859ed0552a4..57ac118fc266 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1498,7 +1498,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = {
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
 	.access_memory = &amdgpu_ttm_access_memory,
-	.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f2ce5a0defd9..28f5e8b21a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -375,7 +375,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 	if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
 		return;
 
-	vm->bulk_moveable = false;
+	ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
 	if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
 		amdgpu_vm_bo_relocated(base);
 	else
@@ -637,36 +637,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }
 
-/**
- * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
- *
- * @bo: BO which was removed from the LRU
- *
- * Make sure the bulk_moveable flag is updated when a BO is removed from the
- * LRU.
- */
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
-{
-	struct amdgpu_bo *abo;
-	struct amdgpu_vm_bo_base *bo_base;
-
-	if (!amdgpu_bo_is_amdgpu_bo(bo))
-		return;
-
-	if (bo->pin_count)
-		return;
-
-	abo = ttm_to_amdgpu_bo(bo);
-	if (!abo->parent)
-		return;
-	for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
-		struct amdgpu_vm *vm = bo_base->vm;
-
-		if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv)
-			vm->bulk_moveable = false;
-	}
-
-}
 /**
  * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
  *
@@ -679,33 +649,9 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm)
 {
-	struct amdgpu_vm_bo_base *bo_base;
-
-	if (vm->bulk_moveable) {
-		spin_lock(&adev->mman.bdev.lru_lock);
-		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
-		spin_unlock(&adev->mman.bdev.lru_lock);
-		return;
-	}
-
-	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
-
 	spin_lock(&adev->mman.bdev.lru_lock);
-	list_for_each_entry(bo_base, &vm->idle, vm_status) {
-		struct amdgpu_bo *bo = bo_base->bo;
-		struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
-
-		if (!bo->parent)
-			continue;
-
-		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
-		if (shadow)
-			ttm_bo_move_to_lru_tail(&shadow->tbo,
-						&vm->lru_bulk_move);
-	}
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
 	spin_unlock(&adev->mman.bdev.lru_lock);
-
-	vm->bulk_moveable = true;
 }
 
 /**
@@ -728,8 +674,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct amdgpu_vm_bo_base *bo_base, *tmp;
 	int r;
 
-	vm->bulk_moveable &= list_empty(&vm->evicted);
-
 	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 		struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
@@ -1047,10 +991,16 @@ static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry)
 
 	if (!entry->bo)
 		return;
+
 	shadow = amdgpu_bo_shadowed(entry->bo);
+	if (shadow) {
+		ttm_bo_set_bulk_move(&shadow->tbo, NULL);
+		amdgpu_bo_unref(&shadow);
+	}
+
+	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
 	entry->bo->vm_bo = NULL;
 	list_del(&entry->vm_status);
-	amdgpu_bo_unref(&shadow);
 	amdgpu_bo_unref(&entry->bo);
 }
 
@@ -1070,8 +1020,6 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_vm_bo_base *entry;
 
-	vm->bulk_moveable = false;
-
 	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
 		amdgpu_vm_free_table(entry);
 
@@ -2651,7 +2599,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 	if (bo) {
 		if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
-			vm->bulk_moveable = false;
+			ttm_bo_set_bulk_move(&bo->tbo, NULL);
 
 		for (base = &bo_va->base.bo->vm_bo; *base;
 		     base = &(*base)->next) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 85fcfb8c5efd..4d236682a118 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -317,8 +317,6 @@ struct amdgpu_vm {
 
 	/* Store positions of group of BOs */
 	struct ttm_lru_bulk_move lru_bulk_move;
-	/* mark whether can do the bulk move */
-	bool			bulk_moveable;
 	/* Flag to indicate if VM is used for compute */
 	bool			is_compute_context;
 };
@@ -454,7 +452,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
 void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
 				uint64_t *gtt_mem, uint64_t *cpu_mem);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 582e8dc9bc8c..6fc192082d8c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -849,7 +849,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 			bo->priority = I915_TTM_PRIO_NO_PAGES;
 	}
 
-	ttm_bo_move_to_lru_tail(bo, NULL);
+	ttm_bo_move_to_lru_tail(bo);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 502617ee9303..bbd068bbcd2e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -69,16 +69,53 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
 	}
 }
 
-void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
-			     struct ttm_lru_bulk_move *bulk)
+/**
+ * ttm_bo_move_to_lru_tail
+ *
+ * @bo: The buffer object.
+ *
+ * Move this BO to the tail of all lru lists used to lookup and reserve an
+ * object. This function must be called with struct ttm_global::lru_lock
+ * held, and is used to make a BO less likely to be considered for eviction.
+ */
+void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo)
 {
 	dma_resv_assert_held(bo->base.resv);
 
 	if (bo->resource)
-		ttm_resource_move_to_lru_tail(bo->resource, bulk);
+		ttm_resource_move_to_lru_tail(bo->resource);
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 
+/**
+ * ttm_bo_set_bulk_move - update BOs bulk move object
+ *
+ * @bo: The buffer object.
+ *
+ * Update the BOs bulk move object, making sure that resources are added/removed
+ * as well. A bulk move allows to move many resource on the LRU at once,
+ * resulting in much less overhead of maintaining the LRU.
+ * The only requirement is that the resources stay together on the LRU and are
+ * never separated. This is enforces by setting the bulk_move structure on a BO.
+ */
+void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
+			  struct ttm_lru_bulk_move *bulk)
+{
+	dma_resv_assert_held(bo->base.resv);
+
+	if (bo->bulk_move == bulk)
+		return;
+
+	spin_lock(&bo->bdev->lru_lock);
+	if (bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+	bo->bulk_move = bulk;
+	if (bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+	spin_unlock(&bo->bdev->lru_lock);
+}
+EXPORT_SYMBOL(ttm_bo_set_bulk_move);
+
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 				  struct ttm_resource *mem, bool evict,
 				  struct ttm_operation_ctx *ctx,
@@ -316,6 +353,7 @@ static void ttm_bo_release(struct kref *kref)
 	int ret;
 
 	WARN_ON_ONCE(bo->pin_count);
+	WARN_ON_ONCE(bo->bulk_move);
 
 	if (!bo->deleted) {
 		ret = ttm_bo_individualize_resv(bo);
@@ -352,7 +390,7 @@ static void ttm_bo_release(struct kref *kref)
 		 */
 		if (bo->pin_count) {
 			bo->pin_count = 0;
-			ttm_resource_move_to_lru_tail(bo->resource, NULL);
+			ttm_resource_move_to_lru_tail(bo->resource);
 		}
 
 		kref_init(&bo->kref);
@@ -643,7 +681,8 @@ void ttm_bo_pin(struct ttm_buffer_object *bo)
 {
 	dma_resv_assert_held(bo->base.resv);
 	WARN_ON_ONCE(!kref_read(&bo->kref));
-	++bo->pin_count;
+	if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
 }
 EXPORT_SYMBOL(ttm_bo_pin);
 
@@ -657,10 +696,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
 {
 	dma_resv_assert_held(bo->base.resv);
 	WARN_ON_ONCE(!kref_read(&bo->kref));
-	if (bo->pin_count)
-		--bo->pin_count;
-	else
-		WARN_ON_ONCE(true);
+	if (WARN_ON_ONCE(!bo->pin_count))
+		return;
+
+	if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
 }
 EXPORT_SYMBOL(ttm_bo_unpin);
 
@@ -905,6 +945,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
 	bo->moving = NULL;
 	bo->pin_count = 0;
 	bo->sg = sg;
+	bo->bulk_move = NULL;
 	if (resv) {
 		bo->base.resv = resv;
 		dma_resv_assert_held(bo->base.resv);
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 25b0a23ba04b..bde43495b8fc 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -73,42 +73,77 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
 }
 EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
 
-/* Record a resource position in a bulk move structure */
-static void ttm_lru_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
-				      struct ttm_resource *res)
+/* Return the bulk move pos object for this resource */
+static struct ttm_lru_bulk_move_pos *
+ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, struct ttm_resource *res)
 {
-	if (!pos->first)
+	return &bulk->pos[res->mem_type][res->bo->priority];
+}
+
+/* Move the resource to the tail of the bulk move range */
+static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
+				       struct ttm_resource *res)
+{
+	if (pos->last != res) {
+		list_move(&res->lru, &pos->last->lru);
+		pos->last = res;
+	}
+}
+
+/* Add the resource to a bulk_move cursor */
+void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res)
+{
+	struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
+
+	if (!pos->first) {
 		pos->first = res;
-	pos->last = res;
+		pos->last = res;
+	} else {
+		ttm_lru_bulk_move_pos_tail(pos, res);
+	}
+}
+
+/* Remove the resource from a bulk_move range */
+void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res)
+{
+	struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
+
+	if (unlikely(pos->first == res && pos->last == res)) {
+		pos->first = NULL;
+		pos->last = NULL;
+	} else if (pos->first == res) {
+		pos->first = list_next_entry(res, lru);
+	} else if (pos->last == res) {
+		pos->last = list_prev_entry(res, lru);
+	} else {
+		list_move(&res->lru, &pos->last->lru);
+	}
 }
 
-/* Move a resource to the LRU tail and track the bulk position */
-void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
-				   struct ttm_lru_bulk_move *bulk)
+/* Move a resource to the LRU or bulk tail */
+void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
 {
 	struct ttm_buffer_object *bo = res->bo;
 	struct ttm_device *bdev = bo->bdev;
-	struct ttm_resource_manager *man;
 
 	lockdep_assert_held(&bo->bdev->lru_lock);
 
 	if (bo->pin_count) {
 		list_move_tail(&res->lru, &bdev->pinned);
-		if (bdev->funcs->del_from_lru_notify)
-			bdev->funcs->del_from_lru_notify(res->bo);
-		return;
-	}
 
-	man = ttm_manager_type(bdev, res->mem_type);
-	list_move_tail(&res->lru, &man->lru[bo->priority]);
+	} else	if (bo->bulk_move) {
+		struct ttm_lru_bulk_move_pos *pos =
+			ttm_lru_bulk_move_pos(bo->bulk_move, res);
 
-	if (bdev->funcs->del_from_lru_notify)
-		bdev->funcs->del_from_lru_notify(bo);
-
-	if (!bulk)
-		return;
+		ttm_lru_bulk_move_pos_tail(pos, res);
+	} else {
+		struct ttm_resource_manager *man;
 
-	ttm_lru_bulk_move_set_pos(&bulk->pos[res->mem_type][bo->priority], res);
+		man = ttm_manager_type(bdev, res->mem_type);
+		list_move_tail(&res->lru, &man->lru[bo->priority]);
+	}
 }
 
 /**
@@ -139,7 +174,10 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
 	man = ttm_manager_type(bo->bdev, place->mem_type);
 	spin_lock(&bo->bdev->lru_lock);
 	man->usage += res->num_pages << PAGE_SHIFT;
-	ttm_resource_move_to_lru_tail(res, NULL);
+	if (bo->bulk_move)
+		ttm_lru_bulk_move_add(bo->bulk_move, res);
+	else
+		ttm_resource_move_to_lru_tail(res);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_resource_init);
@@ -161,8 +199,6 @@ void ttm_resource_fini(struct ttm_resource_manager *man,
 
 	spin_lock(&bdev->lru_lock);
 	list_del_init(&res->lru);
-	if (res->bo && bdev->funcs->del_from_lru_notify)
-		bdev->funcs->del_from_lru_notify(res->bo);
 	man->usage -= res->num_pages << PAGE_SHIFT;
 	spin_unlock(&bdev->lru_lock);
 }
@@ -185,6 +221,12 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
 	if (!*res)
 		return;
 
+	if (bo->bulk_move) {
+		spin_lock(&bo->bdev->lru_lock);
+		ttm_lru_bulk_move_del(bo->bulk_move, *res);
+		spin_unlock(&bo->bdev->lru_lock);
+	}
+
 	man = ttm_manager_type(bo->bdev, (*res)->mem_type);
 	man->func->free(man, *res);
 	*res = NULL;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 885b7698fd65..c61e1e5ceb83 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -135,6 +135,7 @@ struct ttm_buffer_object {
 	struct ttm_resource *resource;
 	struct ttm_tt *ttm;
 	bool deleted;
+	struct ttm_lru_bulk_move *bulk_move;
 
 	/**
 	 * Members protected by the bdev::lru_lock.
@@ -287,18 +288,9 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
  */
 void ttm_bo_put(struct ttm_buffer_object *bo);
 
-/**
- * ttm_bo_move_to_lru_tail
- *
- * @bo: The buffer object.
- * @bulk: optional bulk move structure to remember BO positions
- *
- * Move this BO to the tail of all lru lists used to lookup and reserve an
- * object. This function must be called with struct ttm_global::lru_lock
- * held, and is used to make a BO less likely to be considered for eviction.
- */
-void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
-			     struct ttm_lru_bulk_move *bulk);
+void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo);
+void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
+			  struct ttm_lru_bulk_move *bulk);
 
 /**
  * ttm_bo_lock_delayed_workqueue
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 6c7352e13708..059a595e14e5 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -155,7 +155,7 @@ static inline void
 ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
 {
 	spin_lock(&bo->bdev->lru_lock);
-	ttm_bo_move_to_lru_tail(bo, NULL);
+	ttm_bo_move_to_lru_tail(bo);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 425150f35fbe..95b3c04b1ab9 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -198,15 +198,6 @@ struct ttm_device_funcs {
 	int (*access_memory)(struct ttm_buffer_object *bo, unsigned long offset,
 			     void *buf, int len, int write);
 
-	/**
-	 * struct ttm_bo_driver member del_from_lru_notify
-	 *
-	 * @bo: the buffer object deleted from lru
-	 *
-	 * notify driver that a BO was deleted from LRU.
-	 */
-	void (*del_from_lru_notify)(struct ttm_buffer_object *bo);
-
 	/**
 	 * Notify the driver that we're about to release a BO
 	 *
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index e8a64ca3cf25..07a17b22cf39 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -202,7 +202,7 @@ struct ttm_resource_cursor {
  * @first: first res in the bulk move range
  * @last: last res in the bulk move range
  *
- * Positions for a lru bulk move.
+ * Range of resources for a lru bulk move.
  */
 struct ttm_lru_bulk_move_pos {
 	struct ttm_resource *first;
@@ -308,10 +308,13 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
 }
 
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
+void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res);
+void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res);
 void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
 
-void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
-				   struct ttm_lru_bulk_move *bulk);
+void ttm_resource_move_to_lru_tail(struct ttm_resource *res);
 
 void ttm_resource_init(struct ttm_buffer_object *bo,
                        const struct ttm_place *place,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/6] drm/amdgpu: drop amdgpu_gtt_node
  2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
                   ` (3 preceding siblings ...)
  2022-03-21 13:26 ` [PATCH 5/6] drm/ttm: rework bulk move handling v4 Christian König
@ 2022-03-21 13:26 ` Christian König
  2022-03-23 10:35 ` [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Daniel Vetter
  2022-03-23 11:59 ` Daniel Vetter
  6 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2022-03-21 13:26 UTC (permalink / raw)
  To: matthew.william.auld, felix.kuehling, daniel, dri-devel
  Cc: Daniel Vetter, Christian König

We have the BO pointer in the base structure now as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 49 ++++++++-------------
 include/drm/ttm/ttm_resource.h              |  8 ++++
 2 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 3bcd27ae379d..68494b959116 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -26,23 +26,12 @@
 
 #include "amdgpu.h"
 
-struct amdgpu_gtt_node {
-	struct ttm_buffer_object *tbo;
-	struct ttm_range_mgr_node base;
-};
-
 static inline struct amdgpu_gtt_mgr *
 to_gtt_mgr(struct ttm_resource_manager *man)
 {
 	return container_of(man, struct amdgpu_gtt_mgr, manager);
 }
 
-static inline struct amdgpu_gtt_node *
-to_amdgpu_gtt_node(struct ttm_resource *res)
-{
-	return container_of(res, struct amdgpu_gtt_node, base.base);
-}
-
 /**
  * DOC: mem_info_gtt_total
  *
@@ -106,9 +95,9 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = {
  */
 bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
 {
-	struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
 
-	return drm_mm_node_allocated(&node->base.mm_nodes[0]);
+	return drm_mm_node_allocated(&node->mm_nodes[0]);
 }
 
 /**
@@ -128,15 +117,14 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
 {
 	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
 	uint32_t num_pages = PFN_UP(tbo->base.size);
-	struct amdgpu_gtt_node *node;
+	struct ttm_range_mgr_node *node;
 	int r;
 
-	node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+	node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
 	if (!node)
 		return -ENOMEM;
 
-	node->tbo = tbo;
-	ttm_resource_init(tbo, place, &node->base.base);
+	ttm_resource_init(tbo, place, &node->base);
 	if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
 	    ttm_resource_manager_usage(man) > man->size) {
 		r = -ENOSPC;
@@ -145,8 +133,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
 
 	if (place->lpfn) {
 		spin_lock(&mgr->lock);
-		r = drm_mm_insert_node_in_range(&mgr->mm,
-						&node->base.mm_nodes[0],
+		r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
 						num_pages, tbo->page_alignment,
 						0, place->fpfn, place->lpfn,
 						DRM_MM_INSERT_BEST);
@@ -154,18 +141,18 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
 		if (unlikely(r))
 			goto err_free;
 
-		node->base.base.start = node->base.mm_nodes[0].start;
+		node->base.start = node->mm_nodes[0].start;
 	} else {
-		node->base.mm_nodes[0].start = 0;
-		node->base.mm_nodes[0].size = node->base.base.num_pages;
-		node->base.base.start = AMDGPU_BO_INVALID_OFFSET;
+		node->mm_nodes[0].start = 0;
+		node->mm_nodes[0].size = node->base.num_pages;
+		node->base.start = AMDGPU_BO_INVALID_OFFSET;
 	}
 
-	*res = &node->base.base;
+	*res = &node->base;
 	return 0;
 
 err_free:
-	ttm_resource_fini(man, &node->base.base);
+	ttm_resource_fini(man, &node->base);
 	kfree(node);
 	return r;
 }
@@ -181,12 +168,12 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
 static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
 			       struct ttm_resource *res)
 {
-	struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
 	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
 
 	spin_lock(&mgr->lock);
-	if (drm_mm_node_allocated(&node->base.mm_nodes[0]))
-		drm_mm_remove_node(&node->base.mm_nodes[0]);
+	if (drm_mm_node_allocated(&node->mm_nodes[0]))
+		drm_mm_remove_node(&node->mm_nodes[0]);
 	spin_unlock(&mgr->lock);
 
 	ttm_resource_fini(man, res);
@@ -202,7 +189,7 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
  */
 int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
 {
-	struct amdgpu_gtt_node *node;
+	struct ttm_range_mgr_node *node;
 	struct drm_mm_node *mm_node;
 	struct amdgpu_device *adev;
 	int r = 0;
@@ -210,8 +197,8 @@ int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
 	adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
 	spin_lock(&mgr->lock);
 	drm_mm_for_each_node(mm_node, &mgr->mm) {
-		node = container_of(mm_node, typeof(*node), base.mm_nodes[0]);
-		r = amdgpu_ttm_recover_gart(node->tbo);
+		node = container_of(mm_node, typeof(*node), mm_nodes[0]);
+		r = amdgpu_ttm_recover_gart(node->base.bo);
 		if (r)
 			break;
 	}
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 07a17b22cf39..a1f495b1dd55 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -350,6 +350,14 @@ ttm_resource_manager_next(struct ttm_resource_manager *man,
 			  struct ttm_resource_cursor *cursor,
 			  struct ttm_resource *res);
 
+/**
+ * ttm_resource_manager_for_each_res - iterate over all resources
+ * @man: the resource manager
+ * @cursor: struct ttm_resource_cursor for the current position
+ * @res: the current resource
+ *
+ * Iterate over all the evictable resources in a resource manager.
+ */
 #define ttm_resource_manager_for_each_res(man, cursor, res)		\
 	for (res = ttm_resource_manager_first(man, cursor); res;	\
 	     res = ttm_resource_manager_next(man, cursor, res))
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
                   ` (4 preceding siblings ...)
  2022-03-21 13:26 ` [PATCH 6/6] drm/amdgpu: drop amdgpu_gtt_node Christian König
@ 2022-03-23 10:35 ` Daniel Vetter
  2022-03-23 10:39   ` Daniel Vetter
  2022-03-24 14:25   ` Christian König
  2022-03-23 11:59 ` Daniel Vetter
  6 siblings, 2 replies; 18+ messages in thread
From: Daniel Vetter @ 2022-03-23 10:35 UTC (permalink / raw)
  To: Christian König
  Cc: felix.kuehling, matthew.william.auld, dri-devel, Christian König

On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
> This way we finally fix the problem that new resource are
> not immediately evict-able after allocation.
> 
> That has caused numerous problems including OOM on GDS handling
> and not being able to use TTM as general resource manager.
> 
> v2: stop assuming in ttm_resource_fini that res->bo is still valid.
> v3: cleanup kerneldoc, add more lockdep annotation
> v4: consistently use res->num_pages
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   8 +-
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c |   2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c            | 115 ++--------------------
>  drivers/gpu/drm/ttm/ttm_bo_util.c       |   1 -
>  drivers/gpu/drm/ttm/ttm_device.c        |  64 ++++++------
>  drivers/gpu/drm/ttm/ttm_resource.c      | 124 ++++++++++++++++++++++--
>  include/drm/ttm/ttm_bo_api.h            |  16 ---
>  include/drm/ttm/ttm_bo_driver.h         |  29 +-----
>  include/drm/ttm/ttm_resource.h          |  35 +++++++
>  9 files changed, 198 insertions(+), 196 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b37fc7d7d2c7..f2ce5a0defd9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -683,12 +683,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  
>  	if (vm->bulk_moveable) {
>  		spin_lock(&adev->mman.bdev.lru_lock);
> -		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> +		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
>  		spin_unlock(&adev->mman.bdev.lru_lock);
>  		return;
>  	}
>  
> -	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> +	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
>  
>  	spin_lock(&adev->mman.bdev.lru_lock);
>  	list_for_each_entry(bo_base, &vm->idle, vm_status) {
> @@ -698,11 +698,9 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  		if (!bo->parent)
>  			continue;
>  
> -		ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource,
> -					&vm->lru_bulk_move);
> +		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
>  		if (shadow)
>  			ttm_bo_move_to_lru_tail(&shadow->tbo,
> -						shadow->tbo.resource,
>  						&vm->lru_bulk_move);
>  	}
>  	spin_unlock(&adev->mman.bdev.lru_lock);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index de3fe79b665a..582e8dc9bc8c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -849,7 +849,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
>  			bo->priority = I915_TTM_PRIO_NO_PAGES;
>  	}
>  
> -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> +	ttm_bo_move_to_lru_tail(bo, NULL);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index db3dc7ef5382..cb0fa932d495 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -69,108 +69,16 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
>  	}
>  }
>  
> -static inline void ttm_bo_move_to_pinned(struct ttm_buffer_object *bo)
> -{
> -	struct ttm_device *bdev = bo->bdev;
> -
> -	list_move_tail(&bo->lru, &bdev->pinned);
> -
> -	if (bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(bo);
> -}
> -
> -static inline void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
> -{
> -	struct ttm_device *bdev = bo->bdev;
> -
> -	list_del_init(&bo->lru);
> -
> -	if (bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(bo);
> -}
> -
> -static void ttm_bo_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
> -				     struct ttm_buffer_object *bo)
> -{
> -	if (!pos->first)
> -		pos->first = bo;
> -	pos->last = bo;
> -}
> -
>  void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> -			     struct ttm_resource *mem,
>  			     struct ttm_lru_bulk_move *bulk)
>  {
> -	struct ttm_device *bdev = bo->bdev;
> -	struct ttm_resource_manager *man;
> -
> -	if (!bo->deleted)
> -		dma_resv_assert_held(bo->base.resv);
> -
> -	if (bo->pin_count) {
> -		ttm_bo_move_to_pinned(bo);
> -		return;
> -	}
> -
> -	if (!mem)
> -		return;
> -
> -	man = ttm_manager_type(bdev, mem->mem_type);
> -	list_move_tail(&bo->lru, &man->lru[bo->priority]);
> -
> -	if (bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(bo);
> -
> -	if (bulk && !bo->pin_count) {
> -		switch (bo->resource->mem_type) {
> -		case TTM_PL_TT:
> -			ttm_bo_bulk_move_set_pos(&bulk->tt[bo->priority], bo);
> -			break;
> +	dma_resv_assert_held(bo->base.resv);
>  
> -		case TTM_PL_VRAM:
> -			ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
> -			break;
> -		}
> -	}
> +	if (bo->resource)
> +		ttm_resource_move_to_lru_tail(bo->resource, bulk);
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> -{
> -	unsigned i;
> -
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> -		struct ttm_resource_manager *man;
> -
> -		if (!pos->first)
> -			continue;
> -
> -		dma_resv_assert_held(pos->first->base.resv);
> -		dma_resv_assert_held(pos->last->base.resv);
> -
> -		man = ttm_manager_type(pos->first->bdev, TTM_PL_TT);
> -		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> -				    &pos->last->lru);
> -	}
> -
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> -		struct ttm_resource_manager *man;
> -
> -		if (!pos->first)
> -			continue;
> -
> -		dma_resv_assert_held(pos->first->base.resv);
> -		dma_resv_assert_held(pos->last->base.resv);
> -
> -		man = ttm_manager_type(pos->first->bdev, TTM_PL_VRAM);
> -		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> -				    &pos->last->lru);
> -	}
> -}
> -EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> -
>  static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
>  				  struct ttm_resource *mem, bool evict,
>  				  struct ttm_operation_ctx *ctx,
> @@ -344,7 +252,6 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  		return ret;
>  	}
>  
> -	ttm_bo_move_to_pinned(bo);
>  	list_del_init(&bo->ddestroy);
>  	spin_unlock(&bo->bdev->lru_lock);
>  	ttm_bo_cleanup_memtype_use(bo);
> @@ -445,7 +352,7 @@ static void ttm_bo_release(struct kref *kref)
>  		 */
>  		if (bo->pin_count) {
>  			bo->pin_count = 0;
> -			ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> +			ttm_resource_move_to_lru_tail(bo->resource, NULL);
>  		}
>  
>  		kref_init(&bo->kref);
> @@ -458,7 +365,6 @@ static void ttm_bo_release(struct kref *kref)
>  	}
>  
>  	spin_lock(&bo->bdev->lru_lock);
> -	ttm_bo_del_from_lru(bo);
>  	list_del(&bo->ddestroy);
>  	spin_unlock(&bo->bdev->lru_lock);
>  
> @@ -673,15 +579,17 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  			struct ww_acquire_ctx *ticket)
>  {
>  	struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
> +	struct ttm_resource *res;
>  	bool locked = false;
>  	unsigned i;
>  	int ret;
>  
>  	spin_lock(&bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		list_for_each_entry(bo, &man->lru[i], lru) {
> +		list_for_each_entry(res, &man->lru[i], lru) {
>  			bool busy;
>  
> +			bo = res->bo;
>  			if (!ttm_bo_evict_swapout_allowable(bo, ctx, place,
>  							    &locked, &busy)) {
>  				if (busy && !busy_bo && ticket !=
> @@ -699,7 +607,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  		}
>  
>  		/* If the inner loop terminated early, we have our candidate */
> -		if (&bo->lru != &man->lru[i])
> +		if (&res->lru != &man->lru[i])
>  			break;
>  
>  		bo = NULL;
> @@ -875,9 +783,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
>  	}
>  
>  error:
> -	if (bo->resource->mem_type == TTM_PL_SYSTEM && !bo->pin_count)
> -		ttm_bo_move_to_lru_tail_unlocked(bo);
> -
>  	return ret;
>  }
>  EXPORT_SYMBOL(ttm_bo_mem_space);
> @@ -971,7 +876,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  	bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
>  
>  	kref_init(&bo->kref);
> -	INIT_LIST_HEAD(&bo->lru);
>  	INIT_LIST_HEAD(&bo->ddestroy);
>  	bo->bdev = bdev;
>  	bo->type = type;
> @@ -1021,8 +925,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  		return ret;
>  	}
>  
> -	ttm_bo_move_to_lru_tail_unlocked(bo);
> -
>  	return ret;
>  }
>  EXPORT_SYMBOL(ttm_bo_init_reserved);
> @@ -1123,7 +1025,6 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  		return ret == -EBUSY ? -ENOSPC : ret;
>  	}
>  
> -	ttm_bo_move_to_pinned(bo);
>  	/* TODO: Cleanup the locking */
>  	spin_unlock(&bo->bdev->lru_lock);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 544a84fa6589..0163e92b57af 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -231,7 +231,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  
>  	atomic_inc(&ttm_glob.bo_count);
>  	INIT_LIST_HEAD(&fbo->base.ddestroy);
> -	INIT_LIST_HEAD(&fbo->base.lru);
>  	fbo->base.moving = NULL;
>  	drm_vma_node_reset(&fbo->base.base.vma_node);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index be24bb6cefd0..ba35887147ba 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -144,6 +144,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  {
>  	struct ttm_resource_manager *man;
>  	struct ttm_buffer_object *bo;
> +	struct ttm_resource *res;
>  	unsigned i, j;
>  	int ret;
>  
> @@ -154,8 +155,11 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  			continue;
>  
>  		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> -			list_for_each_entry(bo, &man->lru[j], lru) {
> -				uint32_t num_pages = PFN_UP(bo->base.size);
> +			list_for_each_entry(res, &man->lru[j], lru) {
> +				uint32_t num_pages;
> +
> +				bo = res->bo;
> +				num_pages = PFN_UP(bo->base.size);
>  
>  				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
>  				/* ttm_bo_swapout has dropped the lru_lock */
> @@ -259,49 +263,45 @@ void ttm_device_fini(struct ttm_device *bdev)
>  }
>  EXPORT_SYMBOL(ttm_device_fini);
>  
> -void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
> +static void ttm_device_clear_lru_dma_mappings(struct ttm_device *bdev,
> +					      struct list_head *list)
>  {
> -	struct ttm_resource_manager *man;
> -	struct ttm_buffer_object *bo;
> -	unsigned int i, j;
> +	struct ttm_resource *res;
>  
>  	spin_lock(&bdev->lru_lock);
> -	while (!list_empty(&bdev->pinned)) {
> -		bo = list_first_entry(&bdev->pinned, struct ttm_buffer_object, lru);
> +	while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
> +		struct ttm_buffer_object *bo = res->bo;
> +
>  		/* Take ref against racing releases once lru_lock is unlocked */
> -		if (ttm_bo_get_unless_zero(bo)) {
> -			list_del_init(&bo->lru);
> -			spin_unlock(&bdev->lru_lock);
> +		if (!ttm_bo_get_unless_zero(bo))
> +			continue;
>  
> -			if (bo->ttm)
> -				ttm_tt_unpopulate(bo->bdev, bo->ttm);
> +		list_del_init(&res->lru);
> +		spin_unlock(&bdev->lru_lock);
>  
> -			ttm_bo_put(bo);
> -			spin_lock(&bdev->lru_lock);
> -		}
> +		if (bo->ttm)
> +			ttm_tt_unpopulate(bo->bdev, bo->ttm);
> +
> +		ttm_bo_put(bo);
> +		spin_lock(&bdev->lru_lock);
>  	}
> +	spin_unlock(&bdev->lru_lock);
> +}
> +
> +void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
> +{
> +	struct ttm_resource_manager *man;
> +	unsigned int i, j;
> +
> +	ttm_device_clear_lru_dma_mappings(bdev, &bdev->pinned);
>  
>  	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>  		man = ttm_manager_type(bdev, i);
>  		if (!man || !man->use_tt)
>  			continue;
>  
> -		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> -			while (!list_empty(&man->lru[j])) {
> -				bo = list_first_entry(&man->lru[j], struct ttm_buffer_object, lru);
> -				if (ttm_bo_get_unless_zero(bo)) {
> -					list_del_init(&bo->lru);
> -					spin_unlock(&bdev->lru_lock);
> -
> -					if (bo->ttm)
> -						ttm_tt_unpopulate(bo->bdev, bo->ttm);
> -
> -					ttm_bo_put(bo);
> -					spin_lock(&bdev->lru_lock);
> -				}
> -			}
> -		}
> +		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j)
> +			ttm_device_clear_lru_dma_mappings(bdev, &man->lru[j]);
>  	}
> -	spin_unlock(&bdev->lru_lock);
>  }
>  EXPORT_SYMBOL(ttm_device_clear_dma_mappings);
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index cbd47a104962..8c253b6de6cc 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -29,6 +29,110 @@
>  #include <drm/ttm/ttm_resource.h>
>  #include <drm/ttm/ttm_bo_driver.h>
>  
> +/**
> + * ttm_lru_bulk_move_init - initialize a bulk move structure
> + * @bulk: the structure to init
> + *
> + * For now just memset the structure to zero.
> + */
> +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
> +{
> +	memset(bulk, 0, sizeof(*bulk));
> +}
> +EXPORT_SYMBOL(ttm_lru_bulk_move_init);
> +
> +/**
> + * ttm_lru_bulk_move_tail - bulk move range of resources to the LRU tail.
> + *
> + * @bulk: bulk move structure
> + *
> + * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
> + * resource order never changes. Should be called with &ttm_device.lru_lock held.
> + */
> +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
> +{
> +	unsigned i;
> +
> +	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> +		struct ttm_resource_manager *man;
> +
> +		if (!pos->first)
> +			continue;
> +
> +		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
> +		dma_resv_assert_held(pos->first->bo->base.resv);
> +		dma_resv_assert_held(pos->last->bo->base.resv);
> +
> +		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_TT);
> +		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> +				    &pos->last->lru);
> +	}
> +
> +	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> +		struct ttm_resource_manager *man;
> +
> +		if (!pos->first)
> +			continue;
> +
> +		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
> +		dma_resv_assert_held(pos->first->bo->base.resv);
> +		dma_resv_assert_held(pos->last->bo->base.resv);
> +
> +		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_VRAM);
> +		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> +				    &pos->last->lru);
> +	}
> +}
> +EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
> +
> +/* Record a resource position in a bulk move structure */
> +static void ttm_lru_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
> +				      struct ttm_resource *res)
> +{
> +	if (!pos->first)
> +		pos->first = res;
> +	pos->last = res;
> +}
> +
> +/* Move a resource to the LRU tail and track the bulk position */
> +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> +				   struct ttm_lru_bulk_move *bulk)
> +{
> +	struct ttm_buffer_object *bo = res->bo;
> +	struct ttm_device *bdev = bo->bdev;
> +	struct ttm_resource_manager *man;
> +
> +	lockdep_assert_held(&bo->bdev->lru_lock);
> +
> +	if (bo->pin_count) {
> +		list_move_tail(&res->lru, &bdev->pinned);
> +		if (bdev->funcs->del_from_lru_notify)
> +			bdev->funcs->del_from_lru_notify(res->bo);
> +		return;
> +	}
> +
> +	man = ttm_manager_type(bdev, res->mem_type);
> +	list_move_tail(&res->lru, &man->lru[bo->priority]);
> +
> +	if (bdev->funcs->del_from_lru_notify)
> +		bdev->funcs->del_from_lru_notify(bo);
> +
> +	if (!bulk)
> +		return;
> +
> +	switch (res->mem_type) {
> +	case TTM_PL_TT:
> +		ttm_lru_bulk_move_set_pos(&bulk->tt[bo->priority], res);
> +		break;
> +
> +	case TTM_PL_VRAM:
> +		ttm_lru_bulk_move_set_pos(&bulk->vram[bo->priority], res);
> +		break;
> +	}
> +}
> +
>  /**
>   * ttm_resource_init - resource object constructure
>   * @bo: buffer object this resources is allocated for
> @@ -52,10 +156,12 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>  	res->bus.is_iomem = false;
>  	res->bus.caching = ttm_cached;
>  	res->bo = bo;
> +	INIT_LIST_HEAD(&res->lru);
>  
>  	man = ttm_manager_type(bo->bdev, place->mem_type);
>  	spin_lock(&bo->bdev->lru_lock);
> -	man->usage += bo->base.size;
> +	man->usage += res->num_pages << PAGE_SHIFT;
> +	ttm_resource_move_to_lru_tail(res, NULL);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  EXPORT_SYMBOL(ttm_resource_init);
> @@ -66,15 +172,21 @@ EXPORT_SYMBOL(ttm_resource_init);
>   * @res: the resource to clean up
>   *
>   * Should be used by resource manager backends to clean up the TTM resource
> - * objects before freeing the underlying structure. Counterpart of
> - * &ttm_resource_init
> + * objects before freeing the underlying structure. Makes sure the resource is
> + * removed from the LRU before destruction.
> + * Counterpart of &ttm_resource_init.

ttm_resource_init() or the link wont work correctly. There's a few more
like this. From earlier patches, but please fix.

Also in my earlier review I had a bunch more kerneldoc comments that
arean't addressed here yet:

https://lore.kernel.org/dri-devel/YfAqtI95aewAb10L@phenom.ffwll.local/

With that addressed Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
still holds.
-Daniel


>   */
>  void ttm_resource_fini(struct ttm_resource_manager *man,
>  		       struct ttm_resource *res)
>  {
> -	spin_lock(&man->bdev->lru_lock);
> -	man->usage -= res->bo->base.size;
> -	spin_unlock(&man->bdev->lru_lock);
> +	struct ttm_device *bdev = man->bdev;
> +
> +	spin_lock(&bdev->lru_lock);
> +	list_del_init(&res->lru);
> +	if (res->bo && bdev->funcs->del_from_lru_notify)
> +		bdev->funcs->del_from_lru_notify(res->bo);
> +	man->usage -= res->num_pages << PAGE_SHIFT;
> +	spin_unlock(&bdev->lru_lock);
>  }
>  EXPORT_SYMBOL(ttm_resource_fini);
>  
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index c17b2df9178b..3da77fc54552 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -55,8 +55,6 @@ struct ttm_placement;
>  
>  struct ttm_place;
>  
> -struct ttm_lru_bulk_move;
> -
>  /**
>   * enum ttm_bo_type
>   *
> @@ -94,7 +92,6 @@ struct ttm_tt;
>   * @ttm: TTM structure holding system pages.
>   * @evicted: Whether the object was evicted without user-space knowing.
>   * @deleted: True if the object is only a zombie and already deleted.
> - * @lru: List head for the lru list.
>   * @ddestroy: List head for the delayed destroy list.
>   * @swap: List head for swap LRU list.
>   * @moving: Fence set when BO is moving
> @@ -143,7 +140,6 @@ struct ttm_buffer_object {
>  	 * Members protected by the bdev::lru_lock.
>  	 */
>  
> -	struct list_head lru;
>  	struct list_head ddestroy;
>  
>  	/**
> @@ -295,7 +291,6 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
>   * ttm_bo_move_to_lru_tail
>   *
>   * @bo: The buffer object.
> - * @mem: Resource object.
>   * @bulk: optional bulk move structure to remember BO positions
>   *
>   * Move this BO to the tail of all lru lists used to lookup and reserve an
> @@ -303,19 +298,8 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
>   * held, and is used to make a BO less likely to be considered for eviction.
>   */
>  void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> -			     struct ttm_resource *mem,
>  			     struct ttm_lru_bulk_move *bulk);
>  
> -/**
> - * ttm_bo_bulk_move_lru_tail
> - *
> - * @bulk: bulk move structure
> - *
> - * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
> - * BO order never changes. Should be called with ttm_global::lru_lock held.
> - */
> -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk);
> -
>  /**
>   * ttm_bo_lock_delayed_workqueue
>   *
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 5f087575194b..6c7352e13708 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -45,33 +45,6 @@
>  #include "ttm_tt.h"
>  #include "ttm_pool.h"
>  
> -/**
> - * struct ttm_lru_bulk_move_pos
> - *
> - * @first: first BO in the bulk move range
> - * @last: last BO in the bulk move range
> - *
> - * Positions for a lru bulk move.
> - */
> -struct ttm_lru_bulk_move_pos {
> -	struct ttm_buffer_object *first;
> -	struct ttm_buffer_object *last;
> -};
> -
> -/**
> - * struct ttm_lru_bulk_move
> - *
> - * @tt: first/last lru entry for BOs in the TT domain
> - * @vram: first/last lru entry for BOs in the VRAM domain
> - * @swap: first/last lru entry for BOs on the swap list
> - *
> - * Helper structure for bulk moves on the LRU list.
> - */
> -struct ttm_lru_bulk_move {
> -	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> -	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> -};
> -
>  /*
>   * ttm_bo.c
>   */
> @@ -182,7 +155,7 @@ static inline void
>  ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>  {
>  	spin_lock(&bo->bdev->lru_lock);
> -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> +	ttm_bo_move_to_lru_tail(bo, NULL);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> index 323c14a30c6b..181e82e3d806 100644
> --- a/include/drm/ttm/ttm_resource.h
> +++ b/include/drm/ttm/ttm_resource.h
> @@ -26,10 +26,12 @@
>  #define _TTM_RESOURCE_H_
>  
>  #include <linux/types.h>
> +#include <linux/list.h>
>  #include <linux/mutex.h>
>  #include <linux/atomic.h>
>  #include <linux/dma-buf-map.h>
>  #include <linux/dma-fence.h>
> +
>  #include <drm/drm_print.h>
>  #include <drm/ttm/ttm_caching.h>
>  #include <drm/ttm/ttm_kmap_iter.h>
> @@ -179,6 +181,33 @@ struct ttm_resource {
>  	uint32_t placement;
>  	struct ttm_bus_placement bus;
>  	struct ttm_buffer_object *bo;
> +	struct list_head lru;
> +};
> +
> +/**
> + * struct ttm_lru_bulk_move_pos
> + *
> + * @first: first res in the bulk move range
> + * @last: last res in the bulk move range
> + *
> + * Positions for a lru bulk move.
> + */
> +struct ttm_lru_bulk_move_pos {
> +	struct ttm_resource *first;
> +	struct ttm_resource *last;
> +};
> +
> +/**
> + * struct ttm_lru_bulk_move
> + *
> + * @tt: first/last lru entry for resources in the TT domain
> + * @vram: first/last lru entry for resources in the VRAM domain
> + *
> + * Helper structure for bulk moves on the LRU list.
> + */
> +struct ttm_lru_bulk_move {
> +	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> +	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
>  };
>  
>  /**
> @@ -267,6 +296,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
>  	man->move = NULL;
>  }
>  
> +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
> +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
> +
> +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> +				   struct ttm_lru_bulk_move *bulk);
> +
>  void ttm_resource_init(struct ttm_buffer_object *bo,
>                         const struct ttm_place *place,
>                         struct ttm_resource *res);
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/6] drm/ttm: de-inline ttm_bo_pin/unpin
  2022-03-21 13:25 ` [PATCH 4/6] drm/ttm: de-inline ttm_bo_pin/unpin Christian König
@ 2022-03-23 10:37   ` Daniel Vetter
  0 siblings, 0 replies; 18+ messages in thread
From: Daniel Vetter @ 2022-03-23 10:37 UTC (permalink / raw)
  To: Christian König
  Cc: felix.kuehling, matthew.william.auld, dri-devel, Christian König

On Mon, Mar 21, 2022 at 02:25:59PM +0100, Christian König wrote:
> Those functions are going to become more complex, don't inline them any
> more.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 31 +++++++++++++++++++++++++++++++
>  include/drm/ttm/ttm_bo_api.h | 30 ++----------------------------
>  2 files changed, 33 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index b119af33e7d7..502617ee9303 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -633,6 +633,37 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  	return ret;
>  }
>  
> +/**
> + * ttm_bo_pin - Pin the buffer object.
> + * @bo: The buffer object to pin
> + *
> + * Make sure the buffer is not evicted any more during memory pressure.

Maybe add kerneldoc cross links here while at it.

"@bo must be unpinned again by calling ttm_bo_unpin()."

or whatever you prefer.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> + */
> +void ttm_bo_pin(struct ttm_buffer_object *bo)
> +{
> +	dma_resv_assert_held(bo->base.resv);
> +	WARN_ON_ONCE(!kref_read(&bo->kref));
> +	++bo->pin_count;
> +}
> +EXPORT_SYMBOL(ttm_bo_pin);
> +
> +/**
> + * ttm_bo_unpin - Unpin the buffer object.
> + * @bo: The buffer object to unpin
> + *
> + * Allows the buffer object to be evicted again during memory pressure.
> + */
> +void ttm_bo_unpin(struct ttm_buffer_object *bo)
> +{
> +	dma_resv_assert_held(bo->base.resv);
> +	WARN_ON_ONCE(!kref_read(&bo->kref));
> +	if (bo->pin_count)
> +		--bo->pin_count;
> +	else
> +		WARN_ON_ONCE(true);
> +}
> +EXPORT_SYMBOL(ttm_bo_unpin);
> +
>  /*
>   * Add the last move fence to the BO and reserve a new shared slot. We only use
>   * a shared slot to avoid unecessary sync and rely on the subsequent bo move to
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 3da77fc54552..885b7698fd65 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -524,34 +524,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
>  int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  		   gfp_t gfp_flags);
>  
> -/**
> - * ttm_bo_pin - Pin the buffer object.
> - * @bo: The buffer object to pin
> - *
> - * Make sure the buffer is not evicted any more during memory pressure.
> - */
> -static inline void ttm_bo_pin(struct ttm_buffer_object *bo)
> -{
> -	dma_resv_assert_held(bo->base.resv);
> -	WARN_ON_ONCE(!kref_read(&bo->kref));
> -	++bo->pin_count;
> -}
> -
> -/**
> - * ttm_bo_unpin - Unpin the buffer object.
> - * @bo: The buffer object to unpin
> - *
> - * Allows the buffer object to be evicted again during memory pressure.
> - */
> -static inline void ttm_bo_unpin(struct ttm_buffer_object *bo)
> -{
> -	dma_resv_assert_held(bo->base.resv);
> -	WARN_ON_ONCE(!kref_read(&bo->kref));
> -	if (bo->pin_count)
> -		--bo->pin_count;
> -	else
> -		WARN_ON_ONCE(true);
> -}
> +void ttm_bo_pin(struct ttm_buffer_object *bo);
> +void ttm_bo_unpin(struct ttm_buffer_object *bo);
>  
>  int ttm_mem_evict_first(struct ttm_device *bdev,
>  			struct ttm_resource_manager *man,
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-23 10:35 ` [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Daniel Vetter
@ 2022-03-23 10:39   ` Daniel Vetter
  2022-03-24 14:25   ` Christian König
  1 sibling, 0 replies; 18+ messages in thread
From: Daniel Vetter @ 2022-03-23 10:39 UTC (permalink / raw)
  To: Christian König
  Cc: felix.kuehling, matthew.william.auld, dri-devel, Christian König

On Wed, Mar 23, 2022 at 11:35:02AM +0100, Daniel Vetter wrote:
> On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
> > This way we finally fix the problem that new resource are
> > not immediately evict-able after allocation.
> > 
> > That has caused numerous problems including OOM on GDS handling
> > and not being able to use TTM as general resource manager.
> > 
> > v2: stop assuming in ttm_resource_fini that res->bo is still valid.
> > v3: cleanup kerneldoc, add more lockdep annotation
> > v4: consistently use res->num_pages
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

One thing I've forgotten: Compared to v1 you moved away from atomic64_t
for res->usage, which is nice and cleaner and great, but not mentioned in
the commit message :-) Maybe add that too.
-Daniel

> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   8 +-
> >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c |   2 +-
> >  drivers/gpu/drm/ttm/ttm_bo.c            | 115 ++--------------------
> >  drivers/gpu/drm/ttm/ttm_bo_util.c       |   1 -
> >  drivers/gpu/drm/ttm/ttm_device.c        |  64 ++++++------
> >  drivers/gpu/drm/ttm/ttm_resource.c      | 124 ++++++++++++++++++++++--
> >  include/drm/ttm/ttm_bo_api.h            |  16 ---
> >  include/drm/ttm/ttm_bo_driver.h         |  29 +-----
> >  include/drm/ttm/ttm_resource.h          |  35 +++++++
> >  9 files changed, 198 insertions(+), 196 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index b37fc7d7d2c7..f2ce5a0defd9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -683,12 +683,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >  
> >  	if (vm->bulk_moveable) {
> >  		spin_lock(&adev->mman.bdev.lru_lock);
> > -		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> > +		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> >  		spin_unlock(&adev->mman.bdev.lru_lock);
> >  		return;
> >  	}
> >  
> > -	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> > +	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
> >  
> >  	spin_lock(&adev->mman.bdev.lru_lock);
> >  	list_for_each_entry(bo_base, &vm->idle, vm_status) {
> > @@ -698,11 +698,9 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >  		if (!bo->parent)
> >  			continue;
> >  
> > -		ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource,
> > -					&vm->lru_bulk_move);
> > +		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
> >  		if (shadow)
> >  			ttm_bo_move_to_lru_tail(&shadow->tbo,
> > -						shadow->tbo.resource,
> >  						&vm->lru_bulk_move);
> >  	}
> >  	spin_unlock(&adev->mman.bdev.lru_lock);
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > index de3fe79b665a..582e8dc9bc8c 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > @@ -849,7 +849,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
> >  			bo->priority = I915_TTM_PRIO_NO_PAGES;
> >  	}
> >  
> > -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> > +	ttm_bo_move_to_lru_tail(bo, NULL);
> >  	spin_unlock(&bo->bdev->lru_lock);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index db3dc7ef5382..cb0fa932d495 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -69,108 +69,16 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
> >  	}
> >  }
> >  
> > -static inline void ttm_bo_move_to_pinned(struct ttm_buffer_object *bo)
> > -{
> > -	struct ttm_device *bdev = bo->bdev;
> > -
> > -	list_move_tail(&bo->lru, &bdev->pinned);
> > -
> > -	if (bdev->funcs->del_from_lru_notify)
> > -		bdev->funcs->del_from_lru_notify(bo);
> > -}
> > -
> > -static inline void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
> > -{
> > -	struct ttm_device *bdev = bo->bdev;
> > -
> > -	list_del_init(&bo->lru);
> > -
> > -	if (bdev->funcs->del_from_lru_notify)
> > -		bdev->funcs->del_from_lru_notify(bo);
> > -}
> > -
> > -static void ttm_bo_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
> > -				     struct ttm_buffer_object *bo)
> > -{
> > -	if (!pos->first)
> > -		pos->first = bo;
> > -	pos->last = bo;
> > -}
> > -
> >  void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> > -			     struct ttm_resource *mem,
> >  			     struct ttm_lru_bulk_move *bulk)
> >  {
> > -	struct ttm_device *bdev = bo->bdev;
> > -	struct ttm_resource_manager *man;
> > -
> > -	if (!bo->deleted)
> > -		dma_resv_assert_held(bo->base.resv);
> > -
> > -	if (bo->pin_count) {
> > -		ttm_bo_move_to_pinned(bo);
> > -		return;
> > -	}
> > -
> > -	if (!mem)
> > -		return;
> > -
> > -	man = ttm_manager_type(bdev, mem->mem_type);
> > -	list_move_tail(&bo->lru, &man->lru[bo->priority]);
> > -
> > -	if (bdev->funcs->del_from_lru_notify)
> > -		bdev->funcs->del_from_lru_notify(bo);
> > -
> > -	if (bulk && !bo->pin_count) {
> > -		switch (bo->resource->mem_type) {
> > -		case TTM_PL_TT:
> > -			ttm_bo_bulk_move_set_pos(&bulk->tt[bo->priority], bo);
> > -			break;
> > +	dma_resv_assert_held(bo->base.resv);
> >  
> > -		case TTM_PL_VRAM:
> > -			ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
> > -			break;
> > -		}
> > -	}
> > +	if (bo->resource)
> > +		ttm_resource_move_to_lru_tail(bo->resource, bulk);
> >  }
> >  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> >  
> > -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> > -{
> > -	unsigned i;
> > -
> > -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> > -		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> > -		struct ttm_resource_manager *man;
> > -
> > -		if (!pos->first)
> > -			continue;
> > -
> > -		dma_resv_assert_held(pos->first->base.resv);
> > -		dma_resv_assert_held(pos->last->base.resv);
> > -
> > -		man = ttm_manager_type(pos->first->bdev, TTM_PL_TT);
> > -		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> > -				    &pos->last->lru);
> > -	}
> > -
> > -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> > -		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> > -		struct ttm_resource_manager *man;
> > -
> > -		if (!pos->first)
> > -			continue;
> > -
> > -		dma_resv_assert_held(pos->first->base.resv);
> > -		dma_resv_assert_held(pos->last->base.resv);
> > -
> > -		man = ttm_manager_type(pos->first->bdev, TTM_PL_VRAM);
> > -		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> > -				    &pos->last->lru);
> > -	}
> > -}
> > -EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> > -
> >  static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
> >  				  struct ttm_resource *mem, bool evict,
> >  				  struct ttm_operation_ctx *ctx,
> > @@ -344,7 +252,6 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >  		return ret;
> >  	}
> >  
> > -	ttm_bo_move_to_pinned(bo);
> >  	list_del_init(&bo->ddestroy);
> >  	spin_unlock(&bo->bdev->lru_lock);
> >  	ttm_bo_cleanup_memtype_use(bo);
> > @@ -445,7 +352,7 @@ static void ttm_bo_release(struct kref *kref)
> >  		 */
> >  		if (bo->pin_count) {
> >  			bo->pin_count = 0;
> > -			ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> > +			ttm_resource_move_to_lru_tail(bo->resource, NULL);
> >  		}
> >  
> >  		kref_init(&bo->kref);
> > @@ -458,7 +365,6 @@ static void ttm_bo_release(struct kref *kref)
> >  	}
> >  
> >  	spin_lock(&bo->bdev->lru_lock);
> > -	ttm_bo_del_from_lru(bo);
> >  	list_del(&bo->ddestroy);
> >  	spin_unlock(&bo->bdev->lru_lock);
> >  
> > @@ -673,15 +579,17 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >  			struct ww_acquire_ctx *ticket)
> >  {
> >  	struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
> > +	struct ttm_resource *res;
> >  	bool locked = false;
> >  	unsigned i;
> >  	int ret;
> >  
> >  	spin_lock(&bdev->lru_lock);
> >  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> > -		list_for_each_entry(bo, &man->lru[i], lru) {
> > +		list_for_each_entry(res, &man->lru[i], lru) {
> >  			bool busy;
> >  
> > +			bo = res->bo;
> >  			if (!ttm_bo_evict_swapout_allowable(bo, ctx, place,
> >  							    &locked, &busy)) {
> >  				if (busy && !busy_bo && ticket !=
> > @@ -699,7 +607,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >  		}
> >  
> >  		/* If the inner loop terminated early, we have our candidate */
> > -		if (&bo->lru != &man->lru[i])
> > +		if (&res->lru != &man->lru[i])
> >  			break;
> >  
> >  		bo = NULL;
> > @@ -875,9 +783,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
> >  	}
> >  
> >  error:
> > -	if (bo->resource->mem_type == TTM_PL_SYSTEM && !bo->pin_count)
> > -		ttm_bo_move_to_lru_tail_unlocked(bo);
> > -
> >  	return ret;
> >  }
> >  EXPORT_SYMBOL(ttm_bo_mem_space);
> > @@ -971,7 +876,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
> >  	bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
> >  
> >  	kref_init(&bo->kref);
> > -	INIT_LIST_HEAD(&bo->lru);
> >  	INIT_LIST_HEAD(&bo->ddestroy);
> >  	bo->bdev = bdev;
> >  	bo->type = type;
> > @@ -1021,8 +925,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
> >  		return ret;
> >  	}
> >  
> > -	ttm_bo_move_to_lru_tail_unlocked(bo);
> > -
> >  	return ret;
> >  }
> >  EXPORT_SYMBOL(ttm_bo_init_reserved);
> > @@ -1123,7 +1025,6 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >  		return ret == -EBUSY ? -ENOSPC : ret;
> >  	}
> >  
> > -	ttm_bo_move_to_pinned(bo);
> >  	/* TODO: Cleanup the locking */
> >  	spin_unlock(&bo->bdev->lru_lock);
> >  
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > index 544a84fa6589..0163e92b57af 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > @@ -231,7 +231,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> >  
> >  	atomic_inc(&ttm_glob.bo_count);
> >  	INIT_LIST_HEAD(&fbo->base.ddestroy);
> > -	INIT_LIST_HEAD(&fbo->base.lru);
> >  	fbo->base.moving = NULL;
> >  	drm_vma_node_reset(&fbo->base.base.vma_node);
> >  
> > diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> > index be24bb6cefd0..ba35887147ba 100644
> > --- a/drivers/gpu/drm/ttm/ttm_device.c
> > +++ b/drivers/gpu/drm/ttm/ttm_device.c
> > @@ -144,6 +144,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >  {
> >  	struct ttm_resource_manager *man;
> >  	struct ttm_buffer_object *bo;
> > +	struct ttm_resource *res;
> >  	unsigned i, j;
> >  	int ret;
> >  
> > @@ -154,8 +155,11 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >  			continue;
> >  
> >  		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> > -			list_for_each_entry(bo, &man->lru[j], lru) {
> > -				uint32_t num_pages = PFN_UP(bo->base.size);
> > +			list_for_each_entry(res, &man->lru[j], lru) {
> > +				uint32_t num_pages;
> > +
> > +				bo = res->bo;
> > +				num_pages = PFN_UP(bo->base.size);
> >  
> >  				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> >  				/* ttm_bo_swapout has dropped the lru_lock */
> > @@ -259,49 +263,45 @@ void ttm_device_fini(struct ttm_device *bdev)
> >  }
> >  EXPORT_SYMBOL(ttm_device_fini);
> >  
> > -void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
> > +static void ttm_device_clear_lru_dma_mappings(struct ttm_device *bdev,
> > +					      struct list_head *list)
> >  {
> > -	struct ttm_resource_manager *man;
> > -	struct ttm_buffer_object *bo;
> > -	unsigned int i, j;
> > +	struct ttm_resource *res;
> >  
> >  	spin_lock(&bdev->lru_lock);
> > -	while (!list_empty(&bdev->pinned)) {
> > -		bo = list_first_entry(&bdev->pinned, struct ttm_buffer_object, lru);
> > +	while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
> > +		struct ttm_buffer_object *bo = res->bo;
> > +
> >  		/* Take ref against racing releases once lru_lock is unlocked */
> > -		if (ttm_bo_get_unless_zero(bo)) {
> > -			list_del_init(&bo->lru);
> > -			spin_unlock(&bdev->lru_lock);
> > +		if (!ttm_bo_get_unless_zero(bo))
> > +			continue;
> >  
> > -			if (bo->ttm)
> > -				ttm_tt_unpopulate(bo->bdev, bo->ttm);
> > +		list_del_init(&res->lru);
> > +		spin_unlock(&bdev->lru_lock);
> >  
> > -			ttm_bo_put(bo);
> > -			spin_lock(&bdev->lru_lock);
> > -		}
> > +		if (bo->ttm)
> > +			ttm_tt_unpopulate(bo->bdev, bo->ttm);
> > +
> > +		ttm_bo_put(bo);
> > +		spin_lock(&bdev->lru_lock);
> >  	}
> > +	spin_unlock(&bdev->lru_lock);
> > +}
> > +
> > +void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
> > +{
> > +	struct ttm_resource_manager *man;
> > +	unsigned int i, j;
> > +
> > +	ttm_device_clear_lru_dma_mappings(bdev, &bdev->pinned);
> >  
> >  	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> >  		man = ttm_manager_type(bdev, i);
> >  		if (!man || !man->use_tt)
> >  			continue;
> >  
> > -		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> > -			while (!list_empty(&man->lru[j])) {
> > -				bo = list_first_entry(&man->lru[j], struct ttm_buffer_object, lru);
> > -				if (ttm_bo_get_unless_zero(bo)) {
> > -					list_del_init(&bo->lru);
> > -					spin_unlock(&bdev->lru_lock);
> > -
> > -					if (bo->ttm)
> > -						ttm_tt_unpopulate(bo->bdev, bo->ttm);
> > -
> > -					ttm_bo_put(bo);
> > -					spin_lock(&bdev->lru_lock);
> > -				}
> > -			}
> > -		}
> > +		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j)
> > +			ttm_device_clear_lru_dma_mappings(bdev, &man->lru[j]);
> >  	}
> > -	spin_unlock(&bdev->lru_lock);
> >  }
> >  EXPORT_SYMBOL(ttm_device_clear_dma_mappings);
> > diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> > index cbd47a104962..8c253b6de6cc 100644
> > --- a/drivers/gpu/drm/ttm/ttm_resource.c
> > +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> > @@ -29,6 +29,110 @@
> >  #include <drm/ttm/ttm_resource.h>
> >  #include <drm/ttm/ttm_bo_driver.h>
> >  
> > +/**
> > + * ttm_lru_bulk_move_init - initialize a bulk move structure
> > + * @bulk: the structure to init
> > + *
> > + * For now just memset the structure to zero.
> > + */
> > +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
> > +{
> > +	memset(bulk, 0, sizeof(*bulk));
> > +}
> > +EXPORT_SYMBOL(ttm_lru_bulk_move_init);
> > +
> > +/**
> > + * ttm_lru_bulk_move_tail - bulk move range of resources to the LRU tail.
> > + *
> > + * @bulk: bulk move structure
> > + *
> > + * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
> > + * resource order never changes. Should be called with &ttm_device.lru_lock held.
> > + */
> > +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
> > +{
> > +	unsigned i;
> > +
> > +	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> > +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> > +		struct ttm_resource_manager *man;
> > +
> > +		if (!pos->first)
> > +			continue;
> > +
> > +		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
> > +		dma_resv_assert_held(pos->first->bo->base.resv);
> > +		dma_resv_assert_held(pos->last->bo->base.resv);
> > +
> > +		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_TT);
> > +		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> > +				    &pos->last->lru);
> > +	}
> > +
> > +	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> > +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> > +		struct ttm_resource_manager *man;
> > +
> > +		if (!pos->first)
> > +			continue;
> > +
> > +		lockdep_assert_held(&pos->first->bo->bdev->lru_lock);
> > +		dma_resv_assert_held(pos->first->bo->base.resv);
> > +		dma_resv_assert_held(pos->last->bo->base.resv);
> > +
> > +		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_VRAM);
> > +		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> > +				    &pos->last->lru);
> > +	}
> > +}
> > +EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
> > +
> > +/* Record a resource position in a bulk move structure */
> > +static void ttm_lru_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
> > +				      struct ttm_resource *res)
> > +{
> > +	if (!pos->first)
> > +		pos->first = res;
> > +	pos->last = res;
> > +}
> > +
> > +/* Move a resource to the LRU tail and track the bulk position */
> > +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> > +				   struct ttm_lru_bulk_move *bulk)
> > +{
> > +	struct ttm_buffer_object *bo = res->bo;
> > +	struct ttm_device *bdev = bo->bdev;
> > +	struct ttm_resource_manager *man;
> > +
> > +	lockdep_assert_held(&bo->bdev->lru_lock);
> > +
> > +	if (bo->pin_count) {
> > +		list_move_tail(&res->lru, &bdev->pinned);
> > +		if (bdev->funcs->del_from_lru_notify)
> > +			bdev->funcs->del_from_lru_notify(res->bo);
> > +		return;
> > +	}
> > +
> > +	man = ttm_manager_type(bdev, res->mem_type);
> > +	list_move_tail(&res->lru, &man->lru[bo->priority]);
> > +
> > +	if (bdev->funcs->del_from_lru_notify)
> > +		bdev->funcs->del_from_lru_notify(bo);
> > +
> > +	if (!bulk)
> > +		return;
> > +
> > +	switch (res->mem_type) {
> > +	case TTM_PL_TT:
> > +		ttm_lru_bulk_move_set_pos(&bulk->tt[bo->priority], res);
> > +		break;
> > +
> > +	case TTM_PL_VRAM:
> > +		ttm_lru_bulk_move_set_pos(&bulk->vram[bo->priority], res);
> > +		break;
> > +	}
> > +}
> > +
> >  /**
> >   * ttm_resource_init - resource object constructure
> >   * @bo: buffer object this resources is allocated for
> > @@ -52,10 +156,12 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
> >  	res->bus.is_iomem = false;
> >  	res->bus.caching = ttm_cached;
> >  	res->bo = bo;
> > +	INIT_LIST_HEAD(&res->lru);
> >  
> >  	man = ttm_manager_type(bo->bdev, place->mem_type);
> >  	spin_lock(&bo->bdev->lru_lock);
> > -	man->usage += bo->base.size;
> > +	man->usage += res->num_pages << PAGE_SHIFT;
> > +	ttm_resource_move_to_lru_tail(res, NULL);
> >  	spin_unlock(&bo->bdev->lru_lock);
> >  }
> >  EXPORT_SYMBOL(ttm_resource_init);
> > @@ -66,15 +172,21 @@ EXPORT_SYMBOL(ttm_resource_init);
> >   * @res: the resource to clean up
> >   *
> >   * Should be used by resource manager backends to clean up the TTM resource
> > - * objects before freeing the underlying structure. Counterpart of
> > - * &ttm_resource_init
> > + * objects before freeing the underlying structure. Makes sure the resource is
> > + * removed from the LRU before destruction.
> > + * Counterpart of &ttm_resource_init.
> 
> ttm_resource_init() or the link wont work correctly. There's a few more
> like this. From earlier patches, but please fix.
> 
> Also in my earlier review I had a bunch more kerneldoc comments that
> arean't addressed here yet:
> 
> https://lore.kernel.org/dri-devel/YfAqtI95aewAb10L@phenom.ffwll.local/
> 
> With that addressed Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> still holds.
> -Daniel
> 
> 
> >   */
> >  void ttm_resource_fini(struct ttm_resource_manager *man,
> >  		       struct ttm_resource *res)
> >  {
> > -	spin_lock(&man->bdev->lru_lock);
> > -	man->usage -= res->bo->base.size;
> > -	spin_unlock(&man->bdev->lru_lock);
> > +	struct ttm_device *bdev = man->bdev;
> > +
> > +	spin_lock(&bdev->lru_lock);
> > +	list_del_init(&res->lru);
> > +	if (res->bo && bdev->funcs->del_from_lru_notify)
> > +		bdev->funcs->del_from_lru_notify(res->bo);
> > +	man->usage -= res->num_pages << PAGE_SHIFT;
> > +	spin_unlock(&bdev->lru_lock);
> >  }
> >  EXPORT_SYMBOL(ttm_resource_fini);
> >  
> > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> > index c17b2df9178b..3da77fc54552 100644
> > --- a/include/drm/ttm/ttm_bo_api.h
> > +++ b/include/drm/ttm/ttm_bo_api.h
> > @@ -55,8 +55,6 @@ struct ttm_placement;
> >  
> >  struct ttm_place;
> >  
> > -struct ttm_lru_bulk_move;
> > -
> >  /**
> >   * enum ttm_bo_type
> >   *
> > @@ -94,7 +92,6 @@ struct ttm_tt;
> >   * @ttm: TTM structure holding system pages.
> >   * @evicted: Whether the object was evicted without user-space knowing.
> >   * @deleted: True if the object is only a zombie and already deleted.
> > - * @lru: List head for the lru list.
> >   * @ddestroy: List head for the delayed destroy list.
> >   * @swap: List head for swap LRU list.
> >   * @moving: Fence set when BO is moving
> > @@ -143,7 +140,6 @@ struct ttm_buffer_object {
> >  	 * Members protected by the bdev::lru_lock.
> >  	 */
> >  
> > -	struct list_head lru;
> >  	struct list_head ddestroy;
> >  
> >  	/**
> > @@ -295,7 +291,6 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
> >   * ttm_bo_move_to_lru_tail
> >   *
> >   * @bo: The buffer object.
> > - * @mem: Resource object.
> >   * @bulk: optional bulk move structure to remember BO positions
> >   *
> >   * Move this BO to the tail of all lru lists used to lookup and reserve an
> > @@ -303,19 +298,8 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
> >   * held, and is used to make a BO less likely to be considered for eviction.
> >   */
> >  void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> > -			     struct ttm_resource *mem,
> >  			     struct ttm_lru_bulk_move *bulk);
> >  
> > -/**
> > - * ttm_bo_bulk_move_lru_tail
> > - *
> > - * @bulk: bulk move structure
> > - *
> > - * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
> > - * BO order never changes. Should be called with ttm_global::lru_lock held.
> > - */
> > -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk);
> > -
> >  /**
> >   * ttm_bo_lock_delayed_workqueue
> >   *
> > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> > index 5f087575194b..6c7352e13708 100644
> > --- a/include/drm/ttm/ttm_bo_driver.h
> > +++ b/include/drm/ttm/ttm_bo_driver.h
> > @@ -45,33 +45,6 @@
> >  #include "ttm_tt.h"
> >  #include "ttm_pool.h"
> >  
> > -/**
> > - * struct ttm_lru_bulk_move_pos
> > - *
> > - * @first: first BO in the bulk move range
> > - * @last: last BO in the bulk move range
> > - *
> > - * Positions for a lru bulk move.
> > - */
> > -struct ttm_lru_bulk_move_pos {
> > -	struct ttm_buffer_object *first;
> > -	struct ttm_buffer_object *last;
> > -};
> > -
> > -/**
> > - * struct ttm_lru_bulk_move
> > - *
> > - * @tt: first/last lru entry for BOs in the TT domain
> > - * @vram: first/last lru entry for BOs in the VRAM domain
> > - * @swap: first/last lru entry for BOs on the swap list
> > - *
> > - * Helper structure for bulk moves on the LRU list.
> > - */
> > -struct ttm_lru_bulk_move {
> > -	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> > -	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> > -};
> > -
> >  /*
> >   * ttm_bo.c
> >   */
> > @@ -182,7 +155,7 @@ static inline void
> >  ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> >  {
> >  	spin_lock(&bo->bdev->lru_lock);
> > -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> > +	ttm_bo_move_to_lru_tail(bo, NULL);
> >  	spin_unlock(&bo->bdev->lru_lock);
> >  }
> >  
> > diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> > index 323c14a30c6b..181e82e3d806 100644
> > --- a/include/drm/ttm/ttm_resource.h
> > +++ b/include/drm/ttm/ttm_resource.h
> > @@ -26,10 +26,12 @@
> >  #define _TTM_RESOURCE_H_
> >  
> >  #include <linux/types.h>
> > +#include <linux/list.h>
> >  #include <linux/mutex.h>
> >  #include <linux/atomic.h>
> >  #include <linux/dma-buf-map.h>
> >  #include <linux/dma-fence.h>
> > +
> >  #include <drm/drm_print.h>
> >  #include <drm/ttm/ttm_caching.h>
> >  #include <drm/ttm/ttm_kmap_iter.h>
> > @@ -179,6 +181,33 @@ struct ttm_resource {
> >  	uint32_t placement;
> >  	struct ttm_bus_placement bus;
> >  	struct ttm_buffer_object *bo;
> > +	struct list_head lru;
> > +};
> > +
> > +/**
> > + * struct ttm_lru_bulk_move_pos
> > + *
> > + * @first: first res in the bulk move range
> > + * @last: last res in the bulk move range
> > + *
> > + * Positions for a lru bulk move.
> > + */
> > +struct ttm_lru_bulk_move_pos {
> > +	struct ttm_resource *first;
> > +	struct ttm_resource *last;
> > +};
> > +
> > +/**
> > + * struct ttm_lru_bulk_move
> > + *
> > + * @tt: first/last lru entry for resources in the TT domain
> > + * @vram: first/last lru entry for resources in the VRAM domain
> > + *
> > + * Helper structure for bulk moves on the LRU list.
> > + */
> > +struct ttm_lru_bulk_move {
> > +	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> > +	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> >  };
> >  
> >  /**
> > @@ -267,6 +296,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
> >  	man->move = NULL;
> >  }
> >  
> > +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
> > +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
> > +
> > +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> > +				   struct ttm_lru_bulk_move *bulk);
> > +
> >  void ttm_resource_init(struct ttm_buffer_object *bo,
> >                         const struct ttm_place *place,
> >                         struct ttm_resource *res);
> > -- 
> > 2.25.1
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/6] drm/ttm: rework bulk move handling v4
  2022-03-21 13:26 ` [PATCH 5/6] drm/ttm: rework bulk move handling v4 Christian König
@ 2022-03-23 10:49   ` Daniel Vetter
  0 siblings, 0 replies; 18+ messages in thread
From: Daniel Vetter @ 2022-03-23 10:49 UTC (permalink / raw)
  To: Christian König
  Cc: felix.kuehling, matthew.william.auld, dri-devel, Christian König

On Mon, Mar 21, 2022 at 02:26:00PM +0100, Christian König wrote:
> Instead of providing the bulk move structure for each LRU update set
> this as property of the BO. This should avoid costly bulk move rebuilds
> with some games under RADV.
> 
> v2: some name polishing, add a few more kerneldoc words.
> v3: add some lockdep
> v4: fix bugs, handle pin/unpin as well
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

I'm trusting my earlier review and the code has been update per that. I
think what's left to do is better kerneldoc:

- struct ttm_lru_bulk_move should have pointers to all the functions
  drivers will need to use to use this correctly, specifically
  ttm_bo_set_bulk_move().

- I think ttm_bo_set_bulk_move() should explain that once this is set you
  don't need to call ttm_bo_move_to_lru_tail() anymore, but instead need
  to guarantee that you're calling ttm_lru_bulk_move_tail()

I think with those connections explained this looks good.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 72 +++-----------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 -
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c |  2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c            | 59 +++++++++++++---
>  drivers/gpu/drm/ttm/ttm_resource.c      | 90 ++++++++++++++++++-------
>  include/drm/ttm/ttm_bo_api.h            | 16 ++---
>  include/drm/ttm/ttm_bo_driver.h         |  2 +-
>  include/drm/ttm/ttm_device.h            |  9 ---
>  include/drm/ttm/ttm_resource.h          |  9 ++-
>  10 files changed, 138 insertions(+), 125 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 5859ed0552a4..57ac118fc266 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1498,7 +1498,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = {
>  	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>  	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
>  	.access_memory = &amdgpu_ttm_access_memory,
> -	.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
>  };
>  
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f2ce5a0defd9..28f5e8b21a99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -375,7 +375,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>  	if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
>  		return;
>  
> -	vm->bulk_moveable = false;
> +	ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
>  	if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
>  		amdgpu_vm_bo_relocated(base);
>  	else
> @@ -637,36 +637,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>  	list_add(&entry->tv.head, validated);
>  }
>  
> -/**
> - * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
> - *
> - * @bo: BO which was removed from the LRU
> - *
> - * Make sure the bulk_moveable flag is updated when a BO is removed from the
> - * LRU.
> - */
> -void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
> -{
> -	struct amdgpu_bo *abo;
> -	struct amdgpu_vm_bo_base *bo_base;
> -
> -	if (!amdgpu_bo_is_amdgpu_bo(bo))
> -		return;
> -
> -	if (bo->pin_count)
> -		return;
> -
> -	abo = ttm_to_amdgpu_bo(bo);
> -	if (!abo->parent)
> -		return;
> -	for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
> -		struct amdgpu_vm *vm = bo_base->vm;
> -
> -		if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv)
> -			vm->bulk_moveable = false;
> -	}
> -
> -}
>  /**
>   * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
>   *
> @@ -679,33 +649,9 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
>  void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  				struct amdgpu_vm *vm)
>  {
> -	struct amdgpu_vm_bo_base *bo_base;
> -
> -	if (vm->bulk_moveable) {
> -		spin_lock(&adev->mman.bdev.lru_lock);
> -		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> -		spin_unlock(&adev->mman.bdev.lru_lock);
> -		return;
> -	}
> -
> -	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
> -
>  	spin_lock(&adev->mman.bdev.lru_lock);
> -	list_for_each_entry(bo_base, &vm->idle, vm_status) {
> -		struct amdgpu_bo *bo = bo_base->bo;
> -		struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
> -
> -		if (!bo->parent)
> -			continue;
> -
> -		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
> -		if (shadow)
> -			ttm_bo_move_to_lru_tail(&shadow->tbo,
> -						&vm->lru_bulk_move);
> -	}
> +	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
>  	spin_unlock(&adev->mman.bdev.lru_lock);
> -
> -	vm->bulk_moveable = true;
>  }
>  
>  /**
> @@ -728,8 +674,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	struct amdgpu_vm_bo_base *bo_base, *tmp;
>  	int r;
>  
> -	vm->bulk_moveable &= list_empty(&vm->evicted);
> -
>  	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
>  		struct amdgpu_bo *bo = bo_base->bo;
>  		struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
> @@ -1047,10 +991,16 @@ static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry)
>  
>  	if (!entry->bo)
>  		return;
> +
>  	shadow = amdgpu_bo_shadowed(entry->bo);
> +	if (shadow) {
> +		ttm_bo_set_bulk_move(&shadow->tbo, NULL);
> +		amdgpu_bo_unref(&shadow);
> +	}
> +
> +	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
>  	entry->bo->vm_bo = NULL;
>  	list_del(&entry->vm_status);
> -	amdgpu_bo_unref(&shadow);
>  	amdgpu_bo_unref(&entry->bo);
>  }
>  
> @@ -1070,8 +1020,6 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
>  	struct amdgpu_vm_pt_cursor cursor;
>  	struct amdgpu_vm_bo_base *entry;
>  
> -	vm->bulk_moveable = false;
> -
>  	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>  		amdgpu_vm_free_table(entry);
>  
> @@ -2651,7 +2599,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>  
>  	if (bo) {
>  		if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
> -			vm->bulk_moveable = false;
> +			ttm_bo_set_bulk_move(&bo->tbo, NULL);
>  
>  		for (base = &bo_va->base.bo->vm_bo; *base;
>  		     base = &(*base)->next) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 85fcfb8c5efd..4d236682a118 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -317,8 +317,6 @@ struct amdgpu_vm {
>  
>  	/* Store positions of group of BOs */
>  	struct ttm_lru_bulk_move lru_bulk_move;
> -	/* mark whether can do the bulk move */
> -	bool			bulk_moveable;
>  	/* Flag to indicate if VM is used for compute */
>  	bool			is_compute_context;
>  };
> @@ -454,7 +452,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>  
>  void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  				struct amdgpu_vm *vm);
> -void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
>  void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
>  				uint64_t *gtt_mem, uint64_t *cpu_mem);
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 582e8dc9bc8c..6fc192082d8c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -849,7 +849,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
>  			bo->priority = I915_TTM_PRIO_NO_PAGES;
>  	}
>  
> -	ttm_bo_move_to_lru_tail(bo, NULL);
> +	ttm_bo_move_to_lru_tail(bo);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 502617ee9303..bbd068bbcd2e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -69,16 +69,53 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
>  	}
>  }
>  
> -void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> -			     struct ttm_lru_bulk_move *bulk)
> +/**
> + * ttm_bo_move_to_lru_tail
> + *
> + * @bo: The buffer object.
> + *
> + * Move this BO to the tail of all lru lists used to lookup and reserve an
> + * object. This function must be called with struct ttm_global::lru_lock
> + * held, and is used to make a BO less likely to be considered for eviction.
> + */
> +void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo)
>  {
>  	dma_resv_assert_held(bo->base.resv);
>  
>  	if (bo->resource)
> -		ttm_resource_move_to_lru_tail(bo->resource, bulk);
> +		ttm_resource_move_to_lru_tail(bo->resource);
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> +/**
> + * ttm_bo_set_bulk_move - update BOs bulk move object
> + *
> + * @bo: The buffer object.
> + *
> + * Update the BOs bulk move object, making sure that resources are added/removed
> + * as well. A bulk move allows to move many resource on the LRU at once,
> + * resulting in much less overhead of maintaining the LRU.
> + * The only requirement is that the resources stay together on the LRU and are
> + * never separated. This is enforces by setting the bulk_move structure on a BO.
> + */
> +void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
> +			  struct ttm_lru_bulk_move *bulk)
> +{
> +	dma_resv_assert_held(bo->base.resv);
> +
> +	if (bo->bulk_move == bulk)
> +		return;
> +
> +	spin_lock(&bo->bdev->lru_lock);
> +	if (bo->bulk_move && bo->resource)
> +		ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
> +	bo->bulk_move = bulk;
> +	if (bo->bulk_move && bo->resource)
> +		ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
> +	spin_unlock(&bo->bdev->lru_lock);
> +}
> +EXPORT_SYMBOL(ttm_bo_set_bulk_move);
> +
>  static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
>  				  struct ttm_resource *mem, bool evict,
>  				  struct ttm_operation_ctx *ctx,
> @@ -316,6 +353,7 @@ static void ttm_bo_release(struct kref *kref)
>  	int ret;
>  
>  	WARN_ON_ONCE(bo->pin_count);
> +	WARN_ON_ONCE(bo->bulk_move);
>  
>  	if (!bo->deleted) {
>  		ret = ttm_bo_individualize_resv(bo);
> @@ -352,7 +390,7 @@ static void ttm_bo_release(struct kref *kref)
>  		 */
>  		if (bo->pin_count) {
>  			bo->pin_count = 0;
> -			ttm_resource_move_to_lru_tail(bo->resource, NULL);
> +			ttm_resource_move_to_lru_tail(bo->resource);
>  		}
>  
>  		kref_init(&bo->kref);
> @@ -643,7 +681,8 @@ void ttm_bo_pin(struct ttm_buffer_object *bo)
>  {
>  	dma_resv_assert_held(bo->base.resv);
>  	WARN_ON_ONCE(!kref_read(&bo->kref));
> -	++bo->pin_count;
> +	if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
> +		ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
>  }
>  EXPORT_SYMBOL(ttm_bo_pin);
>  
> @@ -657,10 +696,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
>  {
>  	dma_resv_assert_held(bo->base.resv);
>  	WARN_ON_ONCE(!kref_read(&bo->kref));
> -	if (bo->pin_count)
> -		--bo->pin_count;
> -	else
> -		WARN_ON_ONCE(true);
> +	if (WARN_ON_ONCE(!bo->pin_count))
> +		return;
> +
> +	if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
> +		ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
>  }
>  EXPORT_SYMBOL(ttm_bo_unpin);
>  
> @@ -905,6 +945,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  	bo->moving = NULL;
>  	bo->pin_count = 0;
>  	bo->sg = sg;
> +	bo->bulk_move = NULL;
>  	if (resv) {
>  		bo->base.resv = resv;
>  		dma_resv_assert_held(bo->base.resv);
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index 25b0a23ba04b..bde43495b8fc 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -73,42 +73,77 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
>  }
>  EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
>  
> -/* Record a resource position in a bulk move structure */
> -static void ttm_lru_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
> -				      struct ttm_resource *res)
> +/* Return the bulk move pos object for this resource */
> +static struct ttm_lru_bulk_move_pos *
> +ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, struct ttm_resource *res)
>  {
> -	if (!pos->first)
> +	return &bulk->pos[res->mem_type][res->bo->priority];
> +}
> +
> +/* Move the resource to the tail of the bulk move range */
> +static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
> +				       struct ttm_resource *res)
> +{
> +	if (pos->last != res) {
> +		list_move(&res->lru, &pos->last->lru);
> +		pos->last = res;
> +	}
> +}
> +
> +/* Add the resource to a bulk_move cursor */
> +void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
> +			   struct ttm_resource *res)
> +{
> +	struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
> +
> +	if (!pos->first) {
>  		pos->first = res;
> -	pos->last = res;
> +		pos->last = res;
> +	} else {
> +		ttm_lru_bulk_move_pos_tail(pos, res);
> +	}
> +}
> +
> +/* Remove the resource from a bulk_move range */
> +void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
> +			   struct ttm_resource *res)
> +{
> +	struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
> +
> +	if (unlikely(pos->first == res && pos->last == res)) {
> +		pos->first = NULL;
> +		pos->last = NULL;
> +	} else if (pos->first == res) {
> +		pos->first = list_next_entry(res, lru);
> +	} else if (pos->last == res) {
> +		pos->last = list_prev_entry(res, lru);
> +	} else {
> +		list_move(&res->lru, &pos->last->lru);
> +	}
>  }
>  
> -/* Move a resource to the LRU tail and track the bulk position */
> -void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> -				   struct ttm_lru_bulk_move *bulk)
> +/* Move a resource to the LRU or bulk tail */
> +void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
>  {
>  	struct ttm_buffer_object *bo = res->bo;
>  	struct ttm_device *bdev = bo->bdev;
> -	struct ttm_resource_manager *man;
>  
>  	lockdep_assert_held(&bo->bdev->lru_lock);
>  
>  	if (bo->pin_count) {
>  		list_move_tail(&res->lru, &bdev->pinned);
> -		if (bdev->funcs->del_from_lru_notify)
> -			bdev->funcs->del_from_lru_notify(res->bo);
> -		return;
> -	}
>  
> -	man = ttm_manager_type(bdev, res->mem_type);
> -	list_move_tail(&res->lru, &man->lru[bo->priority]);
> +	} else	if (bo->bulk_move) {
> +		struct ttm_lru_bulk_move_pos *pos =
> +			ttm_lru_bulk_move_pos(bo->bulk_move, res);
>  
> -	if (bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(bo);
> -
> -	if (!bulk)
> -		return;
> +		ttm_lru_bulk_move_pos_tail(pos, res);
> +	} else {
> +		struct ttm_resource_manager *man;
>  
> -	ttm_lru_bulk_move_set_pos(&bulk->pos[res->mem_type][bo->priority], res);
> +		man = ttm_manager_type(bdev, res->mem_type);
> +		list_move_tail(&res->lru, &man->lru[bo->priority]);
> +	}
>  }
>  
>  /**
> @@ -139,7 +174,10 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>  	man = ttm_manager_type(bo->bdev, place->mem_type);
>  	spin_lock(&bo->bdev->lru_lock);
>  	man->usage += res->num_pages << PAGE_SHIFT;
> -	ttm_resource_move_to_lru_tail(res, NULL);
> +	if (bo->bulk_move)
> +		ttm_lru_bulk_move_add(bo->bulk_move, res);
> +	else
> +		ttm_resource_move_to_lru_tail(res);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  EXPORT_SYMBOL(ttm_resource_init);
> @@ -161,8 +199,6 @@ void ttm_resource_fini(struct ttm_resource_manager *man,
>  
>  	spin_lock(&bdev->lru_lock);
>  	list_del_init(&res->lru);
> -	if (res->bo && bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(res->bo);
>  	man->usage -= res->num_pages << PAGE_SHIFT;
>  	spin_unlock(&bdev->lru_lock);
>  }
> @@ -185,6 +221,12 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
>  	if (!*res)
>  		return;
>  
> +	if (bo->bulk_move) {
> +		spin_lock(&bo->bdev->lru_lock);
> +		ttm_lru_bulk_move_del(bo->bulk_move, *res);
> +		spin_unlock(&bo->bdev->lru_lock);
> +	}
> +
>  	man = ttm_manager_type(bo->bdev, (*res)->mem_type);
>  	man->func->free(man, *res);
>  	*res = NULL;
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 885b7698fd65..c61e1e5ceb83 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -135,6 +135,7 @@ struct ttm_buffer_object {
>  	struct ttm_resource *resource;
>  	struct ttm_tt *ttm;
>  	bool deleted;
> +	struct ttm_lru_bulk_move *bulk_move;
>  
>  	/**
>  	 * Members protected by the bdev::lru_lock.
> @@ -287,18 +288,9 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
>   */
>  void ttm_bo_put(struct ttm_buffer_object *bo);
>  
> -/**
> - * ttm_bo_move_to_lru_tail
> - *
> - * @bo: The buffer object.
> - * @bulk: optional bulk move structure to remember BO positions
> - *
> - * Move this BO to the tail of all lru lists used to lookup and reserve an
> - * object. This function must be called with struct ttm_global::lru_lock
> - * held, and is used to make a BO less likely to be considered for eviction.
> - */
> -void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> -			     struct ttm_lru_bulk_move *bulk);
> +void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo);
> +void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
> +			  struct ttm_lru_bulk_move *bulk);
>  
>  /**
>   * ttm_bo_lock_delayed_workqueue
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 6c7352e13708..059a595e14e5 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -155,7 +155,7 @@ static inline void
>  ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>  {
>  	spin_lock(&bo->bdev->lru_lock);
> -	ttm_bo_move_to_lru_tail(bo, NULL);
> +	ttm_bo_move_to_lru_tail(bo);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 425150f35fbe..95b3c04b1ab9 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -198,15 +198,6 @@ struct ttm_device_funcs {
>  	int (*access_memory)(struct ttm_buffer_object *bo, unsigned long offset,
>  			     void *buf, int len, int write);
>  
> -	/**
> -	 * struct ttm_bo_driver member del_from_lru_notify
> -	 *
> -	 * @bo: the buffer object deleted from lru
> -	 *
> -	 * notify driver that a BO was deleted from LRU.
> -	 */
> -	void (*del_from_lru_notify)(struct ttm_buffer_object *bo);
> -
>  	/**
>  	 * Notify the driver that we're about to release a BO
>  	 *
> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> index e8a64ca3cf25..07a17b22cf39 100644
> --- a/include/drm/ttm/ttm_resource.h
> +++ b/include/drm/ttm/ttm_resource.h
> @@ -202,7 +202,7 @@ struct ttm_resource_cursor {
>   * @first: first res in the bulk move range
>   * @last: last res in the bulk move range
>   *
> - * Positions for a lru bulk move.
> + * Range of resources for a lru bulk move.
>   */
>  struct ttm_lru_bulk_move_pos {
>  	struct ttm_resource *first;
> @@ -308,10 +308,13 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
>  }
>  
>  void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
> +void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
> +			   struct ttm_resource *res);
> +void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
> +			   struct ttm_resource *res);
>  void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
>  
> -void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> -				   struct ttm_lru_bulk_move *bulk);
> +void ttm_resource_move_to_lru_tail(struct ttm_resource *res);
>  
>  void ttm_resource_init(struct ttm_buffer_object *bo,
>                         const struct ttm_place *place,
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
                   ` (5 preceding siblings ...)
  2022-03-23 10:35 ` [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Daniel Vetter
@ 2022-03-23 11:59 ` Daniel Vetter
  2022-03-23 12:20   ` Christian König
  6 siblings, 1 reply; 18+ messages in thread
From: Daniel Vetter @ 2022-03-23 11:59 UTC (permalink / raw)
  To: Christian König
  Cc: felix.kuehling, matthew.william.auld, dri-devel, Christian König

On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
> This way we finally fix the problem that new resource are
> not immediately evict-able after allocation.
> 
> That has caused numerous problems including OOM on GDS handling
> and not being able to use TTM as general resource manager.
> 
> v2: stop assuming in ttm_resource_fini that res->bo is still valid.
> v3: cleanup kerneldoc, add more lockdep annotation
> v4: consistently use res->num_pages
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

> +/**
> + * struct ttm_lru_bulk_move
> + *
> + * @tt: first/last lru entry for resources in the TT domain
> + * @vram: first/last lru entry for resources in the VRAM domain
> + *
> + * Helper structure for bulk moves on the LRU list.
> + */
> +struct ttm_lru_bulk_move {
> +	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> +	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];

Not really needed, just a thought: Should we track the associated dma_resv
object here to make sure the locking is all done correctly (and also check
that the bulk move bo have the same dma_resv)? It wouldn't really be any
overhead for the !CONFIG_LOCKDEP case and we could sprinkle a lot more
dma_resv_held all over the place.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-23 11:59 ` Daniel Vetter
@ 2022-03-23 12:20   ` Christian König
  2022-03-23 13:36     ` Daniel Vetter
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2022-03-23 12:20 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: felix.kuehling, matthew.william.auld, Christian König, dri-devel

Am 23.03.22 um 12:59 schrieb Daniel Vetter:
> On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
>> This way we finally fix the problem that new resource are
>> not immediately evict-able after allocation.
>>
>> That has caused numerous problems including OOM on GDS handling
>> and not being able to use TTM as general resource manager.
>>
>> v2: stop assuming in ttm_resource_fini that res->bo is still valid.
>> v3: cleanup kerneldoc, add more lockdep annotation
>> v4: consistently use res->num_pages
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>> +/**
>> + * struct ttm_lru_bulk_move
>> + *
>> + * @tt: first/last lru entry for resources in the TT domain
>> + * @vram: first/last lru entry for resources in the VRAM domain
>> + *
>> + * Helper structure for bulk moves on the LRU list.
>> + */
>> +struct ttm_lru_bulk_move {
>> +	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
>> +	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> Not really needed, just a thought: Should we track the associated dma_resv
> object here to make sure the locking is all done correctly (and also check
> that the bulk move bo have the same dma_resv)? It wouldn't really be any
> overhead for the !CONFIG_LOCKDEP case and we could sprinkle a lot more
> dma_resv_held all over the place.

You made a similar comment on the last revision and I already tried to 
play around with that idea a bit.

But I've completely abandoned that idea after realizing that the BOs in 
the bulk move actually don't need to have the same dma_resv object, nor 
do they all need to be locked.

It just happens that amdgpu is currently using it that way, but I can't 
see any technical necessarily to restrict the bulk move like that.

Regards,
Christian.


> -Daniel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-23 12:20   ` Christian König
@ 2022-03-23 13:36     ` Daniel Vetter
  2022-03-23 13:44       ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Daniel Vetter @ 2022-03-23 13:36 UTC (permalink / raw)
  To: Christian König
  Cc: Felix.Kuehling, matthew.william.auld, Christian König, dri-devel

On Wed, 23 Mar 2022 at 13:20, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 23.03.22 um 12:59 schrieb Daniel Vetter:
> > On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
> >> This way we finally fix the problem that new resource are
> >> not immediately evict-able after allocation.
> >>
> >> That has caused numerous problems including OOM on GDS handling
> >> and not being able to use TTM as general resource manager.
> >>
> >> v2: stop assuming in ttm_resource_fini that res->bo is still valid.
> >> v3: cleanup kerneldoc, add more lockdep annotation
> >> v4: consistently use res->num_pages
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> >> +/**
> >> + * struct ttm_lru_bulk_move
> >> + *
> >> + * @tt: first/last lru entry for resources in the TT domain
> >> + * @vram: first/last lru entry for resources in the VRAM domain
> >> + *
> >> + * Helper structure for bulk moves on the LRU list.
> >> + */
> >> +struct ttm_lru_bulk_move {
> >> +    struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> >> +    struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> > Not really needed, just a thought: Should we track the associated dma_resv
> > object here to make sure the locking is all done correctly (and also check
> > that the bulk move bo have the same dma_resv)? It wouldn't really be any
> > overhead for the !CONFIG_LOCKDEP case and we could sprinkle a lot more
> > dma_resv_held all over the place.
>
> You made a similar comment on the last revision and I already tried to
> play around with that idea a bit.
>
> But I've completely abandoned that idea after realizing that the BOs in
> the bulk move actually don't need to have the same dma_resv object, nor
> do they all need to be locked.

Uh how does that work? If you evict that bo while someone else is
doing a bulk move, then at least the result might be rather funny, and
I have no idea how it could work.

Like even if you then make the rule that you have to lock all bos for
the bulk move, the bo could still be moved independently, and that
would again break the bulk lru properties.

And if you do none of the above, there's no reason for that bo to have
a distinct dma_resv.

Like maybe the data structure wont fall apart, but semantically it
just doesn't make any sense to me to allow this. What would you want
to use this for?

> It just happens that amdgpu is currently using it that way, but I can't
> see any technical necessarily to restrict the bulk move like that.

Yeah we can do that later on in a follow up patch, or I figure out why
it's not a good idea :-) Just figured this might be good to lock down
before other drivers start adopting this.
-Daniel

>
> Regards,
> Christian.
>
>
> > -Daniel
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-23 13:36     ` Daniel Vetter
@ 2022-03-23 13:44       ` Christian König
  2022-03-23 15:22         ` Daniel Vetter
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2022-03-23 13:44 UTC (permalink / raw)
  To: Daniel Vetter, Christian König
  Cc: Felix.Kuehling, matthew.william.auld, dri-devel

Am 23.03.22 um 14:36 schrieb Daniel Vetter:
> On Wed, 23 Mar 2022 at 13:20, Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 23.03.22 um 12:59 schrieb Daniel Vetter:
>>> On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
>>>> This way we finally fix the problem that new resource are
>>>> not immediately evict-able after allocation.
>>>>
>>>> That has caused numerous problems including OOM on GDS handling
>>>> and not being able to use TTM as general resource manager.
>>>>
>>>> v2: stop assuming in ttm_resource_fini that res->bo is still valid.
>>>> v3: cleanup kerneldoc, add more lockdep annotation
>>>> v4: consistently use res->num_pages
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>>>> +/**
>>>> + * struct ttm_lru_bulk_move
>>>> + *
>>>> + * @tt: first/last lru entry for resources in the TT domain
>>>> + * @vram: first/last lru entry for resources in the VRAM domain
>>>> + *
>>>> + * Helper structure for bulk moves on the LRU list.
>>>> + */
>>>> +struct ttm_lru_bulk_move {
>>>> +    struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
>>>> +    struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
>>> Not really needed, just a thought: Should we track the associated dma_resv
>>> object here to make sure the locking is all done correctly (and also check
>>> that the bulk move bo have the same dma_resv)? It wouldn't really be any
>>> overhead for the !CONFIG_LOCKDEP case and we could sprinkle a lot more
>>> dma_resv_held all over the place.
>> You made a similar comment on the last revision and I already tried to
>> play around with that idea a bit.
>>
>> But I've completely abandoned that idea after realizing that the BOs in
>> the bulk move actually don't need to have the same dma_resv object, nor
>> do they all need to be locked.
> Uh how does that work? If you evict that bo while someone else is
> doing a bulk move, then at least the result might be rather funny, and
> I have no idea how it could work.

The LRU is still protected by the common spinlock.

So that will synchronize any modification to both the bulk move 
structure as well as the individual list_heads making up the LRU.

>
> Like even if you then make the rule that you have to lock all bos for
> the bulk move, the bo could still be moved independently, and that
> would again break the bulk lru properties.
>
> And if you do none of the above, there's no reason for that bo to have
> a distinct dma_resv.
>
> Like maybe the data structure wont fall apart, but semantically it
> just doesn't make any sense to me to allow this. What would you want
> to use this for?

Yeah, that's a good point.

It's not technically necessary as far as I can see, but I'm not sure if 
there is a valid use case either.

>> It just happens that amdgpu is currently using it that way, but I can't
>> see any technical necessarily to restrict the bulk move like that.
> Yeah we can do that later on in a follow up patch, or I figure out why
> it's not a good idea :-) Just figured this might be good to lock down
> before other drivers start adopting this.

I'm just wondering if it's really more defensive to restrict the 
handling like that.

On the other hand we can still lift the restriction when anybody really 
comes along with a valid use case.

Christian.

> -Daniel
>
>> Regards,
>> Christian.
>>
>>
>>> -Daniel
>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-23 13:44       ` Christian König
@ 2022-03-23 15:22         ` Daniel Vetter
  0 siblings, 0 replies; 18+ messages in thread
From: Daniel Vetter @ 2022-03-23 15:22 UTC (permalink / raw)
  To: Christian König
  Cc: Christian König, Felix.Kuehling, matthew.william.auld, dri-devel

On Wed, Mar 23, 2022 at 02:44:17PM +0100, Christian König wrote:
> Am 23.03.22 um 14:36 schrieb Daniel Vetter:
> > On Wed, 23 Mar 2022 at 13:20, Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> > > Am 23.03.22 um 12:59 schrieb Daniel Vetter:
> > > > On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
> > > > > This way we finally fix the problem that new resource are
> > > > > not immediately evict-able after allocation.
> > > > > 
> > > > > That has caused numerous problems including OOM on GDS handling
> > > > > and not being able to use TTM as general resource manager.
> > > > > 
> > > > > v2: stop assuming in ttm_resource_fini that res->bo is still valid.
> > > > > v3: cleanup kerneldoc, add more lockdep annotation
> > > > > v4: consistently use res->num_pages
> > > > > 
> > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > > Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > > > > +/**
> > > > > + * struct ttm_lru_bulk_move
> > > > > + *
> > > > > + * @tt: first/last lru entry for resources in the TT domain
> > > > > + * @vram: first/last lru entry for resources in the VRAM domain
> > > > > + *
> > > > > + * Helper structure for bulk moves on the LRU list.
> > > > > + */
> > > > > +struct ttm_lru_bulk_move {
> > > > > +    struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> > > > > +    struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> > > > Not really needed, just a thought: Should we track the associated dma_resv
> > > > object here to make sure the locking is all done correctly (and also check
> > > > that the bulk move bo have the same dma_resv)? It wouldn't really be any
> > > > overhead for the !CONFIG_LOCKDEP case and we could sprinkle a lot more
> > > > dma_resv_held all over the place.
> > > You made a similar comment on the last revision and I already tried to
> > > play around with that idea a bit.
> > > 
> > > But I've completely abandoned that idea after realizing that the BOs in
> > > the bulk move actually don't need to have the same dma_resv object, nor
> > > do they all need to be locked.
> > Uh how does that work? If you evict that bo while someone else is
> > doing a bulk move, then at least the result might be rather funny, and
> > I have no idea how it could work.
> 
> The LRU is still protected by the common spinlock.
> 
> So that will synchronize any modification to both the bulk move structure as
> well as the individual list_heads making up the LRU.
> 
> > 
> > Like even if you then make the rule that you have to lock all bos for
> > the bulk move, the bo could still be moved independently, and that
> > would again break the bulk lru properties.
> > 
> > And if you do none of the above, there's no reason for that bo to have
> > a distinct dma_resv.
> > 
> > Like maybe the data structure wont fall apart, but semantically it
> > just doesn't make any sense to me to allow this. What would you want
> > to use this for?
> 
> Yeah, that's a good point.
> 
> It's not technically necessary as far as I can see, but I'm not sure if
> there is a valid use case either.

Yeah I think nothing obviously bad will happen to the list, but it will
very much no longer be an LRU. Which kinda defeats the point - there's
easier ways to not have an LRU like never updating anything :-)

Aside, this is actually what i915-gem folks did without realizing, so
that's also a bit why I think being strict here would be good. It's tricky
and if you're too clever it's way too easy to end up with something which
isn't anything useful anymore.
> 
> > > It just happens that amdgpu is currently using it that way, but I can't
> > > see any technical necessarily to restrict the bulk move like that.
> > Yeah we can do that later on in a follow up patch, or I figure out why
> > it's not a good idea :-) Just figured this might be good to lock down
> > before other drivers start adopting this.
> 
> I'm just wondering if it's really more defensive to restrict the handling
> like that.
> 
> On the other hand we can still lift the restriction when anybody really
> comes along with a valid use case.

Yeah if we do have a use-case we can lift this easily, it's just a few
dma_resv_assert_held. I don't think we need this included for merging, but
it'd be great as a follow-up patch while only amdgpu is using this.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > Regards,
> > > Christian.
> > > 
> > > 
> > > > -Daniel
> > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-23 10:35 ` [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Daniel Vetter
  2022-03-23 10:39   ` Daniel Vetter
@ 2022-03-24 14:25   ` Christian König
  2022-03-24 17:27     ` Daniel Vetter
  1 sibling, 1 reply; 18+ messages in thread
From: Christian König @ 2022-03-24 14:25 UTC (permalink / raw)
  To: Daniel Vetter, Christian König
  Cc: felix.kuehling, matthew.william.auld, dri-devel

Am 23.03.22 um 11:35 schrieb Daniel Vetter:
> On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
>> [SNIP]
>>   EXPORT_SYMBOL(ttm_resource_init);
>> @@ -66,15 +172,21 @@ EXPORT_SYMBOL(ttm_resource_init);
>>    * @res: the resource to clean up
>>    *
>>    * Should be used by resource manager backends to clean up the TTM resource
>> - * objects before freeing the underlying structure. Counterpart of
>> - * &ttm_resource_init
>> + * objects before freeing the underlying structure. Makes sure the resource is
>> + * removed from the LRU before destruction.
>> + * Counterpart of &ttm_resource_init.
> ttm_resource_init() or the link wont work correctly. There's a few more
> like this. From earlier patches, but please fix.

Hui what? I've just tried it and the current documentation works fine, 
but when I add the () it doesn't work any more.

> Also in my earlier review I had a bunch more kerneldoc comments that
> arean't addressed here yet:
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fdri-devel%2FYfAqtI95aewAb10L%40phenom.ffwll.local%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C43a4517eb6bc4ccda10708da0cb8cbd6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637836285385616052%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=%2BqlN6044DRHaXsYub9pCEVKUu5a0Nfod06lOp%2BaXNP0%3D&amp;reserved=0

What comment exactly do you mean? I thought I've addressed everything 
except for the lockdep checks.

Thanks,
Christian.

>
> With that addressed Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> still holds.
> -Daniel
>
>
>>    */
>>   void ttm_resource_fini(struct ttm_resource_manager *man,
>>   		       struct ttm_resource *res)
>>   {
>> -	spin_lock(&man->bdev->lru_lock);
>> -	man->usage -= res->bo->base.size;
>> -	spin_unlock(&man->bdev->lru_lock);
>> +	struct ttm_device *bdev = man->bdev;
>> +
>> +	spin_lock(&bdev->lru_lock);
>> +	list_del_init(&res->lru);
>> +	if (res->bo && bdev->funcs->del_from_lru_notify)
>> +		bdev->funcs->del_from_lru_notify(res->bo);
>> +	man->usage -= res->num_pages << PAGE_SHIFT;
>> +	spin_unlock(&bdev->lru_lock);
>>   }
>>   EXPORT_SYMBOL(ttm_resource_fini);
>>   
>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>> index c17b2df9178b..3da77fc54552 100644
>> --- a/include/drm/ttm/ttm_bo_api.h
>> +++ b/include/drm/ttm/ttm_bo_api.h
>> @@ -55,8 +55,6 @@ struct ttm_placement;
>>   
>>   struct ttm_place;
>>   
>> -struct ttm_lru_bulk_move;
>> -
>>   /**
>>    * enum ttm_bo_type
>>    *
>> @@ -94,7 +92,6 @@ struct ttm_tt;
>>    * @ttm: TTM structure holding system pages.
>>    * @evicted: Whether the object was evicted without user-space knowing.
>>    * @deleted: True if the object is only a zombie and already deleted.
>> - * @lru: List head for the lru list.
>>    * @ddestroy: List head for the delayed destroy list.
>>    * @swap: List head for swap LRU list.
>>    * @moving: Fence set when BO is moving
>> @@ -143,7 +140,6 @@ struct ttm_buffer_object {
>>   	 * Members protected by the bdev::lru_lock.
>>   	 */
>>   
>> -	struct list_head lru;
>>   	struct list_head ddestroy;
>>   
>>   	/**
>> @@ -295,7 +291,6 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
>>    * ttm_bo_move_to_lru_tail
>>    *
>>    * @bo: The buffer object.
>> - * @mem: Resource object.
>>    * @bulk: optional bulk move structure to remember BO positions
>>    *
>>    * Move this BO to the tail of all lru lists used to lookup and reserve an
>> @@ -303,19 +298,8 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
>>    * held, and is used to make a BO less likely to be considered for eviction.
>>    */
>>   void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>> -			     struct ttm_resource *mem,
>>   			     struct ttm_lru_bulk_move *bulk);
>>   
>> -/**
>> - * ttm_bo_bulk_move_lru_tail
>> - *
>> - * @bulk: bulk move structure
>> - *
>> - * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
>> - * BO order never changes. Should be called with ttm_global::lru_lock held.
>> - */
>> -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk);
>> -
>>   /**
>>    * ttm_bo_lock_delayed_workqueue
>>    *
>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>> index 5f087575194b..6c7352e13708 100644
>> --- a/include/drm/ttm/ttm_bo_driver.h
>> +++ b/include/drm/ttm/ttm_bo_driver.h
>> @@ -45,33 +45,6 @@
>>   #include "ttm_tt.h"
>>   #include "ttm_pool.h"
>>   
>> -/**
>> - * struct ttm_lru_bulk_move_pos
>> - *
>> - * @first: first BO in the bulk move range
>> - * @last: last BO in the bulk move range
>> - *
>> - * Positions for a lru bulk move.
>> - */
>> -struct ttm_lru_bulk_move_pos {
>> -	struct ttm_buffer_object *first;
>> -	struct ttm_buffer_object *last;
>> -};
>> -
>> -/**
>> - * struct ttm_lru_bulk_move
>> - *
>> - * @tt: first/last lru entry for BOs in the TT domain
>> - * @vram: first/last lru entry for BOs in the VRAM domain
>> - * @swap: first/last lru entry for BOs on the swap list
>> - *
>> - * Helper structure for bulk moves on the LRU list.
>> - */
>> -struct ttm_lru_bulk_move {
>> -	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
>> -	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
>> -};
>> -
>>   /*
>>    * ttm_bo.c
>>    */
>> @@ -182,7 +155,7 @@ static inline void
>>   ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>>   {
>>   	spin_lock(&bo->bdev->lru_lock);
>> -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
>> +	ttm_bo_move_to_lru_tail(bo, NULL);
>>   	spin_unlock(&bo->bdev->lru_lock);
>>   }
>>   
>> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
>> index 323c14a30c6b..181e82e3d806 100644
>> --- a/include/drm/ttm/ttm_resource.h
>> +++ b/include/drm/ttm/ttm_resource.h
>> @@ -26,10 +26,12 @@
>>   #define _TTM_RESOURCE_H_
>>   
>>   #include <linux/types.h>
>> +#include <linux/list.h>
>>   #include <linux/mutex.h>
>>   #include <linux/atomic.h>
>>   #include <linux/dma-buf-map.h>
>>   #include <linux/dma-fence.h>
>> +
>>   #include <drm/drm_print.h>
>>   #include <drm/ttm/ttm_caching.h>
>>   #include <drm/ttm/ttm_kmap_iter.h>
>> @@ -179,6 +181,33 @@ struct ttm_resource {
>>   	uint32_t placement;
>>   	struct ttm_bus_placement bus;
>>   	struct ttm_buffer_object *bo;
>> +	struct list_head lru;
>> +};
>> +
>> +/**
>> + * struct ttm_lru_bulk_move_pos
>> + *
>> + * @first: first res in the bulk move range
>> + * @last: last res in the bulk move range
>> + *
>> + * Positions for a lru bulk move.
>> + */
>> +struct ttm_lru_bulk_move_pos {
>> +	struct ttm_resource *first;
>> +	struct ttm_resource *last;
>> +};
>> +
>> +/**
>> + * struct ttm_lru_bulk_move
>> + *
>> + * @tt: first/last lru entry for resources in the TT domain
>> + * @vram: first/last lru entry for resources in the VRAM domain
>> + *
>> + * Helper structure for bulk moves on the LRU list.
>> + */
>> +struct ttm_lru_bulk_move {
>> +	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
>> +	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
>>   };
>>   
>>   /**
>> @@ -267,6 +296,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
>>   	man->move = NULL;
>>   }
>>   
>> +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
>> +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
>> +
>> +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
>> +				   struct ttm_lru_bulk_move *bulk);
>> +
>>   void ttm_resource_init(struct ttm_buffer_object *bo,
>>                          const struct ttm_place *place,
>>                          struct ttm_resource *res);
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/6] drm/ttm: move the LRU into resource handling v4
  2022-03-24 14:25   ` Christian König
@ 2022-03-24 17:27     ` Daniel Vetter
  0 siblings, 0 replies; 18+ messages in thread
From: Daniel Vetter @ 2022-03-24 17:27 UTC (permalink / raw)
  To: Christian König
  Cc: Christian König, felix.kuehling, matthew.william.auld, dri-devel

On Thu, Mar 24, 2022 at 03:25:08PM +0100, Christian König wrote:
> Am 23.03.22 um 11:35 schrieb Daniel Vetter:
> > On Mon, Mar 21, 2022 at 02:25:56PM +0100, Christian König wrote:
> > > [SNIP]
> > >   EXPORT_SYMBOL(ttm_resource_init);
> > > @@ -66,15 +172,21 @@ EXPORT_SYMBOL(ttm_resource_init);
> > >    * @res: the resource to clean up
> > >    *
> > >    * Should be used by resource manager backends to clean up the TTM resource
> > > - * objects before freeing the underlying structure. Counterpart of
> > > - * &ttm_resource_init
> > > + * objects before freeing the underlying structure. Makes sure the resource is
> > > + * removed from the LRU before destruction.
> > > + * Counterpart of &ttm_resource_init.
> > ttm_resource_init() or the link wont work correctly. There's a few more
> > like this. From earlier patches, but please fix.
> 
> Hui what? I've just tried it and the current documentation works fine, but
> when I add the () it doesn't work any more.

&foo is for linking to structs (but struct foo is recommended since it's
easier on the eyes), foo() is for linking to functions.

So if that doesn't work then something fishy is going on, maybe the
kerneldoc for ttm_resource_init is using the struct header and not the
function header?

https://dri.freedesktop.org/docs/drm/doc-guide/kernel-doc.html#highlights-and-cross-references
 
> > Also in my earlier review I had a bunch more kerneldoc comments that
> > arean't addressed here yet:
> > 
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fdri-devel%2FYfAqtI95aewAb10L%40phenom.ffwll.local%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C43a4517eb6bc4ccda10708da0cb8cbd6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637836285385616052%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=%2BqlN6044DRHaXsYub9pCEVKUu5a0Nfod06lOp%2BaXNP0%3D&amp;reserved=0
> 
> What comment exactly do you mean? I thought I've addressed everything except
> for the lockdep checks.

There was a bunch about adding more cross references. The main one is that
imo struct ttm_lru_bulk_move should link to all the functions you need to
use this with. I made similar comments in later patches (e.g. for
set_bulk_move). That struct just seems like the best place to tie this all
together, and maybe even add a bit of wording why this is a cool idea.

I guess you only read the initial review up to the locking comments, since
that's the stuff we ended up discussing a bit :-)

Cheers, Daniel

> 
> Thanks,
> Christian.
> 
> > 
> > With that addressed Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > still holds.
> > -Daniel
> > 
> > 
> > >    */
> > >   void ttm_resource_fini(struct ttm_resource_manager *man,
> > >   		       struct ttm_resource *res)
> > >   {
> > > -	spin_lock(&man->bdev->lru_lock);
> > > -	man->usage -= res->bo->base.size;
> > > -	spin_unlock(&man->bdev->lru_lock);
> > > +	struct ttm_device *bdev = man->bdev;
> > > +
> > > +	spin_lock(&bdev->lru_lock);
> > > +	list_del_init(&res->lru);
> > > +	if (res->bo && bdev->funcs->del_from_lru_notify)
> > > +		bdev->funcs->del_from_lru_notify(res->bo);
> > > +	man->usage -= res->num_pages << PAGE_SHIFT;
> > > +	spin_unlock(&bdev->lru_lock);
> > >   }
> > >   EXPORT_SYMBOL(ttm_resource_fini);
> > > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> > > index c17b2df9178b..3da77fc54552 100644
> > > --- a/include/drm/ttm/ttm_bo_api.h
> > > +++ b/include/drm/ttm/ttm_bo_api.h
> > > @@ -55,8 +55,6 @@ struct ttm_placement;
> > >   struct ttm_place;
> > > -struct ttm_lru_bulk_move;
> > > -
> > >   /**
> > >    * enum ttm_bo_type
> > >    *
> > > @@ -94,7 +92,6 @@ struct ttm_tt;
> > >    * @ttm: TTM structure holding system pages.
> > >    * @evicted: Whether the object was evicted without user-space knowing.
> > >    * @deleted: True if the object is only a zombie and already deleted.
> > > - * @lru: List head for the lru list.
> > >    * @ddestroy: List head for the delayed destroy list.
> > >    * @swap: List head for swap LRU list.
> > >    * @moving: Fence set when BO is moving
> > > @@ -143,7 +140,6 @@ struct ttm_buffer_object {
> > >   	 * Members protected by the bdev::lru_lock.
> > >   	 */
> > > -	struct list_head lru;
> > >   	struct list_head ddestroy;
> > >   	/**
> > > @@ -295,7 +291,6 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
> > >    * ttm_bo_move_to_lru_tail
> > >    *
> > >    * @bo: The buffer object.
> > > - * @mem: Resource object.
> > >    * @bulk: optional bulk move structure to remember BO positions
> > >    *
> > >    * Move this BO to the tail of all lru lists used to lookup and reserve an
> > > @@ -303,19 +298,8 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
> > >    * held, and is used to make a BO less likely to be considered for eviction.
> > >    */
> > >   void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> > > -			     struct ttm_resource *mem,
> > >   			     struct ttm_lru_bulk_move *bulk);
> > > -/**
> > > - * ttm_bo_bulk_move_lru_tail
> > > - *
> > > - * @bulk: bulk move structure
> > > - *
> > > - * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
> > > - * BO order never changes. Should be called with ttm_global::lru_lock held.
> > > - */
> > > -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk);
> > > -
> > >   /**
> > >    * ttm_bo_lock_delayed_workqueue
> > >    *
> > > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> > > index 5f087575194b..6c7352e13708 100644
> > > --- a/include/drm/ttm/ttm_bo_driver.h
> > > +++ b/include/drm/ttm/ttm_bo_driver.h
> > > @@ -45,33 +45,6 @@
> > >   #include "ttm_tt.h"
> > >   #include "ttm_pool.h"
> > > -/**
> > > - * struct ttm_lru_bulk_move_pos
> > > - *
> > > - * @first: first BO in the bulk move range
> > > - * @last: last BO in the bulk move range
> > > - *
> > > - * Positions for a lru bulk move.
> > > - */
> > > -struct ttm_lru_bulk_move_pos {
> > > -	struct ttm_buffer_object *first;
> > > -	struct ttm_buffer_object *last;
> > > -};
> > > -
> > > -/**
> > > - * struct ttm_lru_bulk_move
> > > - *
> > > - * @tt: first/last lru entry for BOs in the TT domain
> > > - * @vram: first/last lru entry for BOs in the VRAM domain
> > > - * @swap: first/last lru entry for BOs on the swap list
> > > - *
> > > - * Helper structure for bulk moves on the LRU list.
> > > - */
> > > -struct ttm_lru_bulk_move {
> > > -	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> > > -	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> > > -};
> > > -
> > >   /*
> > >    * ttm_bo.c
> > >    */
> > > @@ -182,7 +155,7 @@ static inline void
> > >   ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> > >   {
> > >   	spin_lock(&bo->bdev->lru_lock);
> > > -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> > > +	ttm_bo_move_to_lru_tail(bo, NULL);
> > >   	spin_unlock(&bo->bdev->lru_lock);
> > >   }
> > > diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> > > index 323c14a30c6b..181e82e3d806 100644
> > > --- a/include/drm/ttm/ttm_resource.h
> > > +++ b/include/drm/ttm/ttm_resource.h
> > > @@ -26,10 +26,12 @@
> > >   #define _TTM_RESOURCE_H_
> > >   #include <linux/types.h>
> > > +#include <linux/list.h>
> > >   #include <linux/mutex.h>
> > >   #include <linux/atomic.h>
> > >   #include <linux/dma-buf-map.h>
> > >   #include <linux/dma-fence.h>
> > > +
> > >   #include <drm/drm_print.h>
> > >   #include <drm/ttm/ttm_caching.h>
> > >   #include <drm/ttm/ttm_kmap_iter.h>
> > > @@ -179,6 +181,33 @@ struct ttm_resource {
> > >   	uint32_t placement;
> > >   	struct ttm_bus_placement bus;
> > >   	struct ttm_buffer_object *bo;
> > > +	struct list_head lru;
> > > +};
> > > +
> > > +/**
> > > + * struct ttm_lru_bulk_move_pos
> > > + *
> > > + * @first: first res in the bulk move range
> > > + * @last: last res in the bulk move range
> > > + *
> > > + * Positions for a lru bulk move.
> > > + */
> > > +struct ttm_lru_bulk_move_pos {
> > > +	struct ttm_resource *first;
> > > +	struct ttm_resource *last;
> > > +};
> > > +
> > > +/**
> > > + * struct ttm_lru_bulk_move
> > > + *
> > > + * @tt: first/last lru entry for resources in the TT domain
> > > + * @vram: first/last lru entry for resources in the VRAM domain
> > > + *
> > > + * Helper structure for bulk moves on the LRU list.
> > > + */
> > > +struct ttm_lru_bulk_move {
> > > +	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> > > +	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> > >   };
> > >   /**
> > > @@ -267,6 +296,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
> > >   	man->move = NULL;
> > >   }
> > > +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
> > > +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
> > > +
> > > +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> > > +				   struct ttm_lru_bulk_move *bulk);
> > > +
> > >   void ttm_resource_init(struct ttm_buffer_object *bo,
> > >                          const struct ttm_place *place,
> > >                          struct ttm_resource *res);
> > > -- 
> > > 2.25.1
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 5/6] drm/ttm: rework bulk move handling v4
  2022-02-15 17:22 Reworking TTMs LRU handling Christian König
@ 2022-02-15 17:22 ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2022-02-15 17:22 UTC (permalink / raw)
  To: matthew.william.auld, felix.kuehling, daniel; +Cc: dri-devel

Instead of providing the bulk move structure for each LRU update set
this as property of the BO. This should avoid costly bulk move rebuilds
with some games under RADV.

v2: some name polishing, add a few more kerneldoc words.
v3: add some lockdep
v4: fix bugs, handle pin/unpin as well

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 72 +++-----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 -
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c |  2 +-
 drivers/gpu/drm/ttm/ttm_bo.c            | 59 +++++++++++++---
 drivers/gpu/drm/ttm/ttm_resource.c      | 90 ++++++++++++++++++-------
 include/drm/ttm/ttm_bo_api.h            | 16 ++---
 include/drm/ttm/ttm_bo_driver.h         |  2 +-
 include/drm/ttm/ttm_device.h            |  9 ---
 include/drm/ttm/ttm_resource.h          |  9 ++-
 10 files changed, 138 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5859ed0552a4..57ac118fc266 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1498,7 +1498,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = {
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
 	.access_memory = &amdgpu_ttm_access_memory,
-	.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f2ce5a0defd9..28f5e8b21a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -375,7 +375,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 	if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
 		return;
 
-	vm->bulk_moveable = false;
+	ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
 	if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
 		amdgpu_vm_bo_relocated(base);
 	else
@@ -637,36 +637,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }
 
-/**
- * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
- *
- * @bo: BO which was removed from the LRU
- *
- * Make sure the bulk_moveable flag is updated when a BO is removed from the
- * LRU.
- */
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
-{
-	struct amdgpu_bo *abo;
-	struct amdgpu_vm_bo_base *bo_base;
-
-	if (!amdgpu_bo_is_amdgpu_bo(bo))
-		return;
-
-	if (bo->pin_count)
-		return;
-
-	abo = ttm_to_amdgpu_bo(bo);
-	if (!abo->parent)
-		return;
-	for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
-		struct amdgpu_vm *vm = bo_base->vm;
-
-		if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv)
-			vm->bulk_moveable = false;
-	}
-
-}
 /**
  * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
  *
@@ -679,33 +649,9 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm)
 {
-	struct amdgpu_vm_bo_base *bo_base;
-
-	if (vm->bulk_moveable) {
-		spin_lock(&adev->mman.bdev.lru_lock);
-		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
-		spin_unlock(&adev->mman.bdev.lru_lock);
-		return;
-	}
-
-	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
-
 	spin_lock(&adev->mman.bdev.lru_lock);
-	list_for_each_entry(bo_base, &vm->idle, vm_status) {
-		struct amdgpu_bo *bo = bo_base->bo;
-		struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
-
-		if (!bo->parent)
-			continue;
-
-		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
-		if (shadow)
-			ttm_bo_move_to_lru_tail(&shadow->tbo,
-						&vm->lru_bulk_move);
-	}
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
 	spin_unlock(&adev->mman.bdev.lru_lock);
-
-	vm->bulk_moveable = true;
 }
 
 /**
@@ -728,8 +674,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct amdgpu_vm_bo_base *bo_base, *tmp;
 	int r;
 
-	vm->bulk_moveable &= list_empty(&vm->evicted);
-
 	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 		struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
@@ -1047,10 +991,16 @@ static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry)
 
 	if (!entry->bo)
 		return;
+
 	shadow = amdgpu_bo_shadowed(entry->bo);
+	if (shadow) {
+		ttm_bo_set_bulk_move(&shadow->tbo, NULL);
+		amdgpu_bo_unref(&shadow);
+	}
+
+	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
 	entry->bo->vm_bo = NULL;
 	list_del(&entry->vm_status);
-	amdgpu_bo_unref(&shadow);
 	amdgpu_bo_unref(&entry->bo);
 }
 
@@ -1070,8 +1020,6 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_vm_bo_base *entry;
 
-	vm->bulk_moveable = false;
-
 	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
 		amdgpu_vm_free_table(entry);
 
@@ -2651,7 +2599,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 	if (bo) {
 		if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
-			vm->bulk_moveable = false;
+			ttm_bo_set_bulk_move(&bo->tbo, NULL);
 
 		for (base = &bo_va->base.bo->vm_bo; *base;
 		     base = &(*base)->next) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 85fcfb8c5efd..4d236682a118 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -317,8 +317,6 @@ struct amdgpu_vm {
 
 	/* Store positions of group of BOs */
 	struct ttm_lru_bulk_move lru_bulk_move;
-	/* mark whether can do the bulk move */
-	bool			bulk_moveable;
 	/* Flag to indicate if VM is used for compute */
 	bool			is_compute_context;
 };
@@ -454,7 +452,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
 void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
 				uint64_t *gtt_mem, uint64_t *cpu_mem);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 582e8dc9bc8c..6fc192082d8c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -849,7 +849,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 			bo->priority = I915_TTM_PRIO_NO_PAGES;
 	}
 
-	ttm_bo_move_to_lru_tail(bo, NULL);
+	ttm_bo_move_to_lru_tail(bo);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index ed28a4229885..0ac0b8f95691 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -69,16 +69,53 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
 	}
 }
 
-void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
-			     struct ttm_lru_bulk_move *bulk)
+/**
+ * ttm_bo_move_to_lru_tail
+ *
+ * @bo: The buffer object.
+ *
+ * Move this BO to the tail of all lru lists used to lookup and reserve an
+ * object. This function must be called with struct ttm_global::lru_lock
+ * held, and is used to make a BO less likely to be considered for eviction.
+ */
+void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo)
 {
 	dma_resv_assert_held(bo->base.resv);
 
 	if (bo->resource)
-		ttm_resource_move_to_lru_tail(bo->resource, bulk);
+		ttm_resource_move_to_lru_tail(bo->resource);
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 
+/**
+ * ttm_bo_set_bulk_move - update BOs bulk move object
+ *
+ * @bo: The buffer object.
+ *
+ * Update the BOs bulk move object, making sure that resources are added/removed
+ * as well. A bulk move allows to move many resource on the LRU at once,
+ * resulting in much less overhead of maintaining the LRU.
+ * The only requirement is that the resources stay together on the LRU and are
+ * never separated. This is enforces by setting the bulk_move structure on a BO.
+ */
+void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
+			  struct ttm_lru_bulk_move *bulk)
+{
+	dma_resv_assert_held(bo->base.resv);
+
+	if (bo->bulk_move == bulk)
+		return;
+
+	spin_lock(&bo->bdev->lru_lock);
+	if (bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+	bo->bulk_move = bulk;
+	if (bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+	spin_unlock(&bo->bdev->lru_lock);
+}
+EXPORT_SYMBOL(ttm_bo_set_bulk_move);
+
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 				  struct ttm_resource *mem, bool evict,
 				  struct ttm_operation_ctx *ctx,
@@ -316,6 +353,7 @@ static void ttm_bo_release(struct kref *kref)
 	int ret;
 
 	WARN_ON_ONCE(bo->pin_count);
+	WARN_ON_ONCE(bo->bulk_move);
 
 	if (!bo->deleted) {
 		ret = ttm_bo_individualize_resv(bo);
@@ -352,7 +390,7 @@ static void ttm_bo_release(struct kref *kref)
 		 */
 		if (bo->pin_count) {
 			bo->pin_count = 0;
-			ttm_resource_move_to_lru_tail(bo->resource, NULL);
+			ttm_resource_move_to_lru_tail(bo->resource);
 		}
 
 		kref_init(&bo->kref);
@@ -644,7 +682,8 @@ void ttm_bo_pin(struct ttm_buffer_object *bo)
 {
 	dma_resv_assert_held(bo->base.resv);
 	WARN_ON_ONCE(!kref_read(&bo->kref));
-	++bo->pin_count;
+	if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
 }
 EXPORT_SYMBOL(ttm_bo_pin);
 
@@ -658,10 +697,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
 {
 	dma_resv_assert_held(bo->base.resv);
 	WARN_ON_ONCE(!kref_read(&bo->kref));
-	if (bo->pin_count)
-		--bo->pin_count;
-	else
-		WARN_ON_ONCE(true);
+	if (WARN_ON_ONCE(!bo->pin_count))
+		return;
+
+	if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
+		ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
 }
 EXPORT_SYMBOL(ttm_bo_unpin);
 
@@ -906,6 +946,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
 	bo->moving = NULL;
 	bo->pin_count = 0;
 	bo->sg = sg;
+	bo->bulk_move = NULL;
 	if (resv) {
 		bo->base.resv = resv;
 		dma_resv_assert_held(bo->base.resv);
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 85a33473e4d2..64dba94cc85a 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -73,42 +73,77 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
 }
 EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
 
-/* Record a resource position in a bulk move structure */
-static void ttm_lru_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
-				      struct ttm_resource *res)
+/* Return the bulk move pos object for this resource */
+static struct ttm_lru_bulk_move_pos *
+ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, struct ttm_resource *res)
 {
-	if (!pos->first)
+	return &bulk->pos[res->mem_type][res->bo->priority];
+}
+
+/* Move the resource to the tail of the bulk move range */
+static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
+				       struct ttm_resource *res)
+{
+	if (pos->last != res) {
+		list_move(&res->lru, &pos->last->lru);
+		pos->last = res;
+	}
+}
+
+/* Add the resource to a bulk_move cursor */
+void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res)
+{
+	struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
+
+	if (!pos->first) {
 		pos->first = res;
-	pos->last = res;
+		pos->last = res;
+	} else {
+		ttm_lru_bulk_move_pos_tail(pos, res);
+	}
+}
+
+/* Remove the resource from a bulk_move range */
+void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res)
+{
+	struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
+
+	if (unlikely(pos->first == res && pos->last == res)) {
+		pos->first = NULL;
+		pos->last = NULL;
+	} else if (pos->first == res) {
+		pos->first = list_next_entry(res, lru);
+	} else if (pos->last == res) {
+		pos->last = list_prev_entry(res, lru);
+	} else {
+		list_move(&res->lru, &pos->last->lru);
+	}
 }
 
-/* Move a resource to the LRU tail and track the bulk position */
-void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
-				   struct ttm_lru_bulk_move *bulk)
+/* Move a resource to the LRU or bulk tail */
+void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
 {
 	struct ttm_buffer_object *bo = res->bo;
 	struct ttm_device *bdev = bo->bdev;
-	struct ttm_resource_manager *man;
 
 	lockdep_assert_held(&bo->bdev->lru_lock);
 
 	if (bo->pin_count) {
 		list_move_tail(&res->lru, &bdev->pinned);
-		if (bdev->funcs->del_from_lru_notify)
-			bdev->funcs->del_from_lru_notify(res->bo);
-		return;
-	}
 
-	man = ttm_manager_type(bdev, res->mem_type);
-	list_move_tail(&res->lru, &man->lru[bo->priority]);
+	} else	if (bo->bulk_move) {
+		struct ttm_lru_bulk_move_pos *pos =
+			ttm_lru_bulk_move_pos(bo->bulk_move, res);
 
-	if (bdev->funcs->del_from_lru_notify)
-		bdev->funcs->del_from_lru_notify(bo);
-
-	if (!bulk)
-		return;
+		ttm_lru_bulk_move_pos_tail(pos, res);
+	} else {
+		struct ttm_resource_manager *man;
 
-	ttm_lru_bulk_move_set_pos(&bulk->pos[res->mem_type][bo->priority], res);
+		man = ttm_manager_type(bdev, res->mem_type);
+		list_move_tail(&res->lru, &man->lru[bo->priority]);
+	}
 }
 
 /**
@@ -139,7 +174,10 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
 	man = ttm_manager_type(bo->bdev, place->mem_type);
 	spin_lock(&bo->bdev->lru_lock);
 	man->usage += bo->base.size;
-	ttm_resource_move_to_lru_tail(res, NULL);
+	if (bo->bulk_move)
+		ttm_lru_bulk_move_add(bo->bulk_move, res);
+	else
+		ttm_resource_move_to_lru_tail(res);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_resource_init);
@@ -161,8 +199,6 @@ void ttm_resource_fini(struct ttm_resource_manager *man,
 
 	spin_lock(&bdev->lru_lock);
 	list_del_init(&res->lru);
-	if (res->bo && bdev->funcs->del_from_lru_notify)
-		bdev->funcs->del_from_lru_notify(res->bo);
 	man->usage -= res->num_pages << PAGE_SHIFT;
 	spin_unlock(&bdev->lru_lock);
 }
@@ -185,6 +221,12 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
 	if (!*res)
 		return;
 
+	if (bo->bulk_move) {
+		spin_lock(&bo->bdev->lru_lock);
+		ttm_lru_bulk_move_del(bo->bulk_move, *res);
+		spin_unlock(&bo->bdev->lru_lock);
+	}
+
 	man = ttm_manager_type(bo->bdev, (*res)->mem_type);
 	man->func->free(man, *res);
 	*res = NULL;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 885b7698fd65..c61e1e5ceb83 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -135,6 +135,7 @@ struct ttm_buffer_object {
 	struct ttm_resource *resource;
 	struct ttm_tt *ttm;
 	bool deleted;
+	struct ttm_lru_bulk_move *bulk_move;
 
 	/**
 	 * Members protected by the bdev::lru_lock.
@@ -287,18 +288,9 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
  */
 void ttm_bo_put(struct ttm_buffer_object *bo);
 
-/**
- * ttm_bo_move_to_lru_tail
- *
- * @bo: The buffer object.
- * @bulk: optional bulk move structure to remember BO positions
- *
- * Move this BO to the tail of all lru lists used to lookup and reserve an
- * object. This function must be called with struct ttm_global::lru_lock
- * held, and is used to make a BO less likely to be considered for eviction.
- */
-void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
-			     struct ttm_lru_bulk_move *bulk);
+void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo);
+void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
+			  struct ttm_lru_bulk_move *bulk);
 
 /**
  * ttm_bo_lock_delayed_workqueue
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 6c7352e13708..059a595e14e5 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -155,7 +155,7 @@ static inline void
 ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
 {
 	spin_lock(&bo->bdev->lru_lock);
-	ttm_bo_move_to_lru_tail(bo, NULL);
+	ttm_bo_move_to_lru_tail(bo);
 	spin_unlock(&bo->bdev->lru_lock);
 }
 
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 425150f35fbe..95b3c04b1ab9 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -198,15 +198,6 @@ struct ttm_device_funcs {
 	int (*access_memory)(struct ttm_buffer_object *bo, unsigned long offset,
 			     void *buf, int len, int write);
 
-	/**
-	 * struct ttm_bo_driver member del_from_lru_notify
-	 *
-	 * @bo: the buffer object deleted from lru
-	 *
-	 * notify driver that a BO was deleted from LRU.
-	 */
-	void (*del_from_lru_notify)(struct ttm_buffer_object *bo);
-
 	/**
 	 * Notify the driver that we're about to release a BO
 	 *
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index e8a64ca3cf25..07a17b22cf39 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -202,7 +202,7 @@ struct ttm_resource_cursor {
  * @first: first res in the bulk move range
  * @last: last res in the bulk move range
  *
- * Positions for a lru bulk move.
+ * Range of resources for a lru bulk move.
  */
 struct ttm_lru_bulk_move_pos {
 	struct ttm_resource *first;
@@ -308,10 +308,13 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
 }
 
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
+void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res);
+void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+			   struct ttm_resource *res);
 void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
 
-void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
-				   struct ttm_lru_bulk_move *bulk);
+void ttm_resource_move_to_lru_tail(struct ttm_resource *res);
 
 void ttm_resource_init(struct ttm_buffer_object *bo,
                        const struct ttm_place *place,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-03-24 17:27 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-21 13:25 [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Christian König
2022-03-21 13:25 ` [PATCH 2/6] drm/ttm: add resource iterator v4 Christian König
2022-03-21 13:25 ` [PATCH 3/6] drm/ttm: allow bulk moves for all domains Christian König
2022-03-21 13:25 ` [PATCH 4/6] drm/ttm: de-inline ttm_bo_pin/unpin Christian König
2022-03-23 10:37   ` Daniel Vetter
2022-03-21 13:26 ` [PATCH 5/6] drm/ttm: rework bulk move handling v4 Christian König
2022-03-23 10:49   ` Daniel Vetter
2022-03-21 13:26 ` [PATCH 6/6] drm/amdgpu: drop amdgpu_gtt_node Christian König
2022-03-23 10:35 ` [PATCH 1/6] drm/ttm: move the LRU into resource handling v4 Daniel Vetter
2022-03-23 10:39   ` Daniel Vetter
2022-03-24 14:25   ` Christian König
2022-03-24 17:27     ` Daniel Vetter
2022-03-23 11:59 ` Daniel Vetter
2022-03-23 12:20   ` Christian König
2022-03-23 13:36     ` Daniel Vetter
2022-03-23 13:44       ` Christian König
2022-03-23 15:22         ` Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2022-02-15 17:22 Reworking TTMs LRU handling Christian König
2022-02-15 17:22 ` [PATCH 5/6] drm/ttm: rework bulk move handling v4 Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.