All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/11] drm/ttm: Make LRU removal optional.
@ 2019-05-14 12:31 Christian König
  2019-05-14 12:31 ` [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8 Christian König
                   ` (3 more replies)
  0 siblings, 4 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

We are already doing this for DMA-buf imports and also for
amdgpu VM BOs for quite a while now.

If this doesn't run into any problems we are probably going
to stop removing BOs from the LRU altogether.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 +++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  4 ++--
 drivers/gpu/drm/qxl/qxl_release.c             |  2 +-
 drivers/gpu/drm/radeon/radeon_gem.c           |  2 +-
 drivers/gpu/drm/radeon/radeon_object.c        |  2 +-
 drivers/gpu/drm/ttm/ttm_execbuf_util.c        | 20 +++++++++++--------
 drivers/gpu/drm/virtio/virtgpu_ioctl.c        |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c      |  3 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.h    |  2 +-
 include/drm/ttm/ttm_bo_driver.h               |  5 ++++-
 include/drm/ttm/ttm_execbuf_util.h            |  3 ++-
 13 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e1cae4a37113..647e18f9e136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -574,7 +574,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates);
+				     false, &ctx->duplicates, true);
 	if (!ret)
 		ctx->reserved = true;
 	else {
@@ -647,7 +647,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	}
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates);
+				     false, &ctx->duplicates, true);
 	if (!ret)
 		ctx->reserved = true;
 	else
@@ -1800,7 +1800,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	}
 
 	/* Reserve all BOs and page tables for validation */
-	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
+				     true);
 	WARN(!list_empty(&duplicates), "Duplicates should be empty");
 	if (ret)
 		goto out_free;
@@ -2006,7 +2007,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	}
 
 	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
-				     false, &duplicate_save);
+				     false, &duplicate_save, true);
 	if (ret) {
 		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
 		goto ttm_reserve_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d72cc583ebd1..fff558cf385b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-				   &duplicates);
+				   &duplicates, true);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 54dd02a898b9..06f83cac0d3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	list_add(&csa_tv.head, &list);
 	amdgpu_vm_get_pd_bo(vm, &list, &pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, true);
 	if (r) {
 		DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7b840367004c..d513a5ad03dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, true);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@@ -608,7 +608,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, true);
 	if (r)
 		goto error_unref;
 
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 30f85f0130cb..49f9a9385393 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -256,7 +256,7 @@ int qxl_release_reserve_list(struct qxl_release *release, bool no_intr)
 		return 0;
 
 	ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos,
-				     !no_intr, NULL);
+				     !no_intr, NULL, true);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 44617dec8183..7411e69e2712 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -559,7 +559,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
 	if (!vm_bos)
 		return;
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, true);
 	if (r)
 		goto error_free;
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 833e909706a9..36683de0300b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -539,7 +539,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
 
 	INIT_LIST_HEAD(&duplicates);
-	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
+	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates, true);
 	if (unlikely(r != 0)) {
 		return r;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 0075eb9a0b52..957ec375a4ba 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -69,7 +69,8 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		ttm_bo_add_to_lru(bo);
+		if (list_empty(&bo->lru))
+			ttm_bo_add_to_lru(bo);
 		reservation_object_unlock(bo->resv);
 	}
 	spin_unlock(&glob->lru_lock);
@@ -93,7 +94,7 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
 
 int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 			   struct list_head *list, bool intr,
-			   struct list_head *dups)
+			   struct list_head *dups, bool del_lru)
 {
 	struct ttm_bo_global *glob;
 	struct ttm_validate_buffer *entry;
@@ -172,11 +173,11 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 		list_add(&entry->head, list);
 	}
 
-	if (ticket)
-		ww_acquire_done(ticket);
-	spin_lock(&glob->lru_lock);
-	ttm_eu_del_from_lru_locked(list);
-	spin_unlock(&glob->lru_lock);
+	if (del_lru) {
+		spin_lock(&glob->lru_lock);
+		ttm_eu_del_from_lru_locked(list);
+		spin_unlock(&glob->lru_lock);
+	}
 	return 0;
 }
 EXPORT_SYMBOL(ttm_eu_reserve_buffers);
@@ -203,7 +204,10 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 			reservation_object_add_shared_fence(bo->resv, fence);
 		else
 			reservation_object_add_excl_fence(bo->resv, fence);
-		ttm_bo_add_to_lru(bo);
+		if (list_empty(&bo->lru))
+			ttm_bo_add_to_lru(bo);
+		else
+			ttm_bo_move_to_lru_tail(bo, NULL);
 		reservation_object_unlock(bo->resv);
 	}
 	spin_unlock(&glob->lru_lock);
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 161b80fee492..5cffaa24259f 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -63,7 +63,7 @@ static int virtio_gpu_object_list_validate(struct ww_acquire_ctx *ticket,
 	struct virtio_gpu_object *qobj;
 	int ret;
 
-	ret = ttm_eu_reserve_buffers(ticket, head, true, NULL);
+	ret = ttm_eu_reserve_buffers(ticket, head, true, NULL, true);
 	if (ret != 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index a7c30e567f09..d28cbedba0b5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -465,7 +465,8 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket,
 	val_buf->bo = &res->backup->base;
 	val_buf->num_shared = 0;
 	list_add_tail(&val_buf->head, &val_list);
-	ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL);
+	ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL,
+				     true);
 	if (unlikely(ret != 0))
 		goto out_no_reserve;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index 3b396fea40d7..ac435b51f4eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -165,7 +165,7 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx,
 			  bool intr)
 {
 	return ttm_eu_reserve_buffers(&ctx->ticket, &ctx->bo_list, intr,
-				      NULL);
+				      NULL, true);
 }
 
 /**
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c008346c2401..fc0d995ac90d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -769,7 +769,10 @@ static inline void ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
 	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
 		spin_lock(&bo->bdev->glob->lru_lock);
-		ttm_bo_add_to_lru(bo);
+		if (list_empty(&bo->lru))
+			ttm_bo_add_to_lru(bo);
+		else
+			ttm_bo_move_to_lru_tail(bo, NULL);
 		spin_unlock(&bo->bdev->glob->lru_lock);
 	}
 	reservation_object_unlock(bo->resv);
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 621615fa7728..7e46cc678e7e 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -70,6 +70,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
  * @list:    thread private list of ttm_validate_buffer structs.
  * @intr:    should the wait be interruptible
  * @dups:    [out] optional list of duplicates.
+ * @del_lru: true if BOs should be removed from the LRU.
  *
  * Tries to reserve bos pointed to by the list entries for validation.
  * If the function returns 0, all buffers are marked as "unfenced",
@@ -98,7 +99,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 
 extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 				  struct list_head *list, bool intr,
-				  struct list_head *dups);
+				  struct list_head *dups, bool del_lru);
 
 /**
  * function ttm_eu_fence_buffer_objects.
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8
  2019-05-14 12:31 [PATCH 01/11] drm/ttm: Make LRU removal optional Christian König
@ 2019-05-14 12:31 ` Christian König
       [not found]   ` <20190514123127.1650-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-05-14 12:31 ` [PATCH 07/11] drm/ttm: immediately move BOs to the new LRU Christian König
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak, David1.Zhou, Prike.Liang, dri-devel, amd-gfx

From: Chunming Zhou <david1.zhou@amd.com>

heavy gpu job could occupy memory long time, which lead other user fail to get memory.

basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).
2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return -EBUSY.

v2: fix some minor check
v3: address Christian v2 comments.
v4: fix some missing
v5: handle first_bo unlock and bo_get/put
v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
v7: pass request bo->resv to ttm_bo_evict_first
v8 (chk): minimal coding style fix

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
 1 file changed, 96 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2845fceb2fbd..e634d3a36923 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
  * b. Otherwise, trylock it.
  */
 static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-			struct ttm_operation_ctx *ctx, bool *locked)
+			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
 {
 	bool ret = false;
 
 	*locked = false;
+	if (busy)
+		*busy = false;
 	if (bo->resv == ctx->resv) {
 		reservation_object_assert_held(bo->resv);
 		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
 	} else {
 		*locked = reservation_object_trylock(bo->resv);
 		ret = *locked;
+		if (!ret && busy)
+			*busy = true;
 	}
 
 	return ret;
 }
 
-static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-			       uint32_t mem_type,
-			       const struct ttm_place *place,
-			       struct ttm_operation_ctx *ctx)
+static struct ttm_buffer_object*
+ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
+			 struct ttm_mem_type_manager *man,
+			 const struct ttm_place *place,
+			 struct ttm_operation_ctx *ctx,
+			 struct ttm_buffer_object **first_bo,
+			 bool *locked)
 {
-	struct ttm_bo_global *glob = bdev->glob;
-	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_buffer_object *bo = NULL;
-	bool locked = false;
-	unsigned i;
-	int ret;
+	int i;
 
-	spin_lock(&glob->lru_lock);
+	if (first_bo)
+		*first_bo = NULL;
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
+			bool busy = false;
+
+			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
+							    &busy)) {
+				if (first_bo && !(*first_bo) && busy) {
+					ttm_bo_get(bo);
+					*first_bo = bo;
+				}
 				continue;
+			}
 
 			if (place && !bdev->driver->eviction_valuable(bo,
 								      place)) {
-				if (locked)
+				if (*locked)
 					reservation_object_unlock(bo->resv);
 				continue;
 			}
+
 			break;
 		}
 
@@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 		bo = NULL;
 	}
 
+	return bo;
+}
+
+static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
+			       uint32_t mem_type,
+			       const struct ttm_place *place,
+			       struct ttm_operation_ctx *ctx,
+			       struct reservation_object *request_resv)
+{
+	struct ttm_bo_global *glob = bdev->glob;
+	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
+	bool locked = false;
+	int ret;
+
+	spin_lock(&glob->lru_lock);
+	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
+				      &locked);
 	if (!bo) {
+		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
+		struct ttm_operation_ctx busy_ctx;
+
 		spin_unlock(&glob->lru_lock);
-		return -EBUSY;
+		/* check if other user occupy memory too long time */
+		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
+			if (first_bo)
+				ttm_bo_put(first_bo);
+			return -EBUSY;
+		}
+		if (first_bo->resv == request_resv) {
+			ttm_bo_put(first_bo);
+			return -EBUSY;
+		}
+		if (ctx->interruptible)
+			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
+							  acquire_ctx);
+		else
+			ret = ww_mutex_lock(&first_bo->resv->lock,
+					    acquire_ctx);
+		if (ret) {
+			ttm_bo_put(first_bo);
+			return ret;
+		}
+		spin_lock(&glob->lru_lock);
+		/* previous busy resv lock is held by above, idle now,
+		 * so let them evictable.
+		 */
+		busy_ctx.interruptible = ctx->interruptible;
+		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
+		busy_ctx.resv	       = first_bo->resv;
+		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
+
+		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
+					      &locked);
+		if (bo && (bo->resv == first_bo->resv))
+			locked = true;
+		else if (bo)
+			ww_mutex_unlock(&first_bo->resv->lock);
+		if (!bo) {
+			spin_unlock(&glob->lru_lock);
+			ttm_bo_put(first_bo);
+			return -EBUSY;
+		}
 	}
 
 	kref_get(&bo->list_kref);
@@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
 					  ctx->no_wait_gpu, locked);
 		kref_put(&bo->list_kref, ttm_bo_release_list);
+		if (first_bo)
+			ttm_bo_put(first_bo);
 		return ret;
 	}
 
 	ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
+	if (first_bo)
+		ttm_bo_put(first_bo);
 
 	ret = ttm_bo_evict(bo, ctx);
 	if (locked) {
@@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
+		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
@@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
 			spin_unlock(&glob->lru_lock);
-			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
+			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
+						  NULL);
 			if (ret)
 				return ret;
 			spin_lock(&glob->lru_lock);
@@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
+			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+							   NULL)) {
 				ret = 0;
 				break;
 			}
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 03/11] drm/ttm: remove the backing store if no placement is given
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-14 12:31   ` Christian König
  2019-05-14 12:31   ` [PATCH 04/11] drm/ttm: return immediately in case of a signal Christian König
                     ` (6 subsequent siblings)
  7 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Pipeline removal of the BOs backing store when no placement is given
during validation.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e634d3a36923..6e9ff94a085d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1237,6 +1237,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	uint32_t new_flags;
 
 	reservation_object_assert_held(bo->resv);
+
+	/*
+	 * Remove the backing store if no placement is given.
+	 */
+	if (!placement->num_placement && !placement->num_busy_placement) {
+		ret = ttm_bo_pipeline_gutting(bo);
+		if (ret)
+			return ret;
+
+		return ttm_tt_create(bo, false);
+	}
+
 	/*
 	 * Check whether we need to move buffer.
 	 */
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 04/11] drm/ttm: return immediately in case of a signal
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-05-14 12:31   ` [PATCH 03/11] drm/ttm: remove the backing store if no placement is given Christian König
@ 2019-05-14 12:31   ` Christian König
  2019-05-14 12:31   ` [PATCH 05/11] drm/ttm: remove manual placement preference Christian König
                     ` (5 subsequent siblings)
  7 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

When a signal arrives we should return immediately for
handling it and not try other placements first.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6e9ff94a085d..6f692f92d0f4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1055,7 +1055,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	uint32_t cur_flags = 0;
 	bool type_found = false;
 	bool type_ok = false;
-	bool has_erestartsys = false;
 	int i, ret;
 
 	ret = reservation_object_reserve_shared(bo->resv, 1);
@@ -1146,8 +1145,8 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			mem->placement = cur_flags;
 			return 0;
 		}
-		if (ret == -ERESTARTSYS)
-			has_erestartsys = true;
+		if (ret && ret != -EBUSY)
+			return ret;
 	}
 
 	if (!type_found) {
@@ -1155,7 +1154,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		return -EINVAL;
 	}
 
-	return (has_erestartsys) ? -ERESTARTSYS : -ENOMEM;
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(ttm_bo_mem_space);
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 05/11] drm/ttm: remove manual placement preference
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-05-14 12:31   ` [PATCH 03/11] drm/ttm: remove the backing store if no placement is given Christian König
  2019-05-14 12:31   ` [PATCH 04/11] drm/ttm: return immediately in case of a signal Christian König
@ 2019-05-14 12:31   ` Christian König
  2019-05-14 12:31   ` [PATCH 06/11] drm/ttm: cleanup ttm_bo_mem_space Christian König
                     ` (4 subsequent siblings)
  7 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

If drivers don't prefer a system memory placement
they should not but it into the placement list first.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6f692f92d0f4..1fe302dee1a8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1088,8 +1088,12 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		ttm_flag_masked(&cur_flags, place->flags,
 				~TTM_PL_MASK_MEMTYPE);
 
-		if (mem_type == TTM_PL_SYSTEM)
-			break;
+		if (mem_type == TTM_PL_SYSTEM) {
+			mem->mem_type = mem_type;
+			mem->placement = cur_flags;
+			mem->mm_node = NULL;
+			return 0;
+		}
 
 		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret))
@@ -1101,16 +1105,12 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 				(*man->func->put_node)(man, mem);
 				return ret;
 			}
-			break;
+			mem->mem_type = mem_type;
+			mem->placement = cur_flags;
+			return 0;
 		}
 	}
 
-	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) {
-		mem->mem_type = mem_type;
-		mem->placement = cur_flags;
-		return 0;
-	}
-
 	for (i = 0; i < placement->num_busy_placement; ++i) {
 		const struct ttm_place *place = &placement->busy_placement[i];
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 06/11] drm/ttm: cleanup ttm_bo_mem_space
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2019-05-14 12:31   ` [PATCH 05/11] drm/ttm: remove manual placement preference Christian König
@ 2019-05-14 12:31   ` Christian König
  2019-05-14 12:31   ` [PATCH 08/11] drm/ttm: put new BOs immediately on the LRU Christian König
                     ` (3 subsequent siblings)
  7 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

We tried this once before, but that turned out to be more
complicated than thought. With all the right prerequisites
it looks like we can do this now.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 128 ++++++++++++++++++-----------------
 1 file changed, 67 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1fe302dee1a8..ec0bcc0241a8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -969,13 +969,12 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
  * space, or we've evicted everything and there isn't enough space.
  */
 static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
-					uint32_t mem_type,
-					const struct ttm_place *place,
-					struct ttm_mem_reg *mem,
-					struct ttm_operation_ctx *ctx)
+				  const struct ttm_place *place,
+				  struct ttm_mem_reg *mem,
+				  struct ttm_operation_ctx *ctx)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	int ret;
 
 	do {
@@ -984,11 +983,12 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
+		ret = ttm_mem_evict_first(bdev, mem->mem_type, place,
+					  ctx, bo->resv);
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
-	mem->mem_type = mem_type;
+
 	return ttm_bo_add_move_fence(bo, man, mem);
 }
 
@@ -1036,6 +1036,51 @@ static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
 	return true;
 }
 
+/**
+ * ttm_bo_mem_placement - check if placement is compatible
+ * @bo: BO to find memory for
+ * @place: where to search
+ * @mem: the memory object to fill in
+ * @ctx: operation context
+ *
+ * Check if placement is compatible and fill in mem structure.
+ * Returns -EBUSY if placement won't work or negative error code.
+ * 0 when placement can be used.
+ */
+static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
+				const struct ttm_place *place,
+				struct ttm_mem_reg *mem,
+				struct ttm_operation_ctx *ctx)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	uint32_t mem_type = TTM_PL_SYSTEM;
+	struct ttm_mem_type_manager *man;
+	uint32_t cur_flags = 0;
+	int ret;
+
+	ret = ttm_mem_type_from_place(place, &mem_type);
+	if (ret)
+		return ret;
+
+	man = &bdev->man[mem_type];
+	if (!man->has_type || !man->use_type)
+		return -EBUSY;
+
+	if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags))
+		return -EBUSY;
+
+	cur_flags = ttm_bo_select_caching(man, bo->mem.placement, cur_flags);
+	/*
+	 * Use the access and other non-mapping-related flag bits from
+	 * the memory placement flags to the current flags
+	 */
+	ttm_flag_masked(&cur_flags, place->flags, ~TTM_PL_MASK_MEMTYPE);
+
+	mem->mem_type = mem_type;
+	mem->placement = cur_flags;
+	return 0;
+}
+
 /**
  * Creates space for memory region @mem according to its type.
  *
@@ -1050,11 +1095,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			struct ttm_operation_ctx *ctx)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_mem_type_manager *man;
-	uint32_t mem_type = TTM_PL_SYSTEM;
-	uint32_t cur_flags = 0;
 	bool type_found = false;
-	bool type_ok = false;
 	int i, ret;
 
 	ret = reservation_object_reserve_shared(bo->resv, 1);
@@ -1064,37 +1105,20 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	mem->mm_node = NULL;
 	for (i = 0; i < placement->num_placement; ++i) {
 		const struct ttm_place *place = &placement->placement[i];
+		struct ttm_mem_type_manager *man;
 
-		ret = ttm_mem_type_from_place(place, &mem_type);
+		ret = ttm_bo_mem_placement(bo, place, mem, ctx);
+		if (ret == -EBUSY)
+			continue;
 		if (ret)
 			return ret;
-		man = &bdev->man[mem_type];
-		if (!man->has_type || !man->use_type)
-			continue;
-
-		type_ok = ttm_bo_mt_compatible(man, mem_type, place,
-						&cur_flags);
-
-		if (!type_ok)
-			continue;
 
 		type_found = true;
-		cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
-						  cur_flags);
-		/*
-		 * Use the access and other non-mapping-related flag bits from
-		 * the memory placement flags to the current flags
-		 */
-		ttm_flag_masked(&cur_flags, place->flags,
-				~TTM_PL_MASK_MEMTYPE);
-
-		if (mem_type == TTM_PL_SYSTEM) {
-			mem->mem_type = mem_type;
-			mem->placement = cur_flags;
-			mem->mm_node = NULL;
+		mem->mm_node = NULL;
+		if (mem->mem_type == TTM_PL_SYSTEM)
 			return 0;
-		}
 
+		man = &bdev->man[mem->mem_type];
 		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret))
 			return ret;
@@ -1105,8 +1129,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 				(*man->func->put_node)(man, mem);
 				return ret;
 			}
-			mem->mem_type = mem_type;
-			mem->placement = cur_flags;
 			return 0;
 		}
 	}
@@ -1114,37 +1136,21 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	for (i = 0; i < placement->num_busy_placement; ++i) {
 		const struct ttm_place *place = &placement->busy_placement[i];
 
-		ret = ttm_mem_type_from_place(place, &mem_type);
+		ret = ttm_bo_mem_placement(bo, place, mem, ctx);
+		if (ret == -EBUSY)
+			continue;
 		if (ret)
 			return ret;
-		man = &bdev->man[mem_type];
-		if (!man->has_type || !man->use_type)
-			continue;
-		if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags))
-			continue;
 
 		type_found = true;
-		cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
-						  cur_flags);
-		/*
-		 * Use the access and other non-mapping-related flag bits from
-		 * the memory placement flags to the current flags
-		 */
-		ttm_flag_masked(&cur_flags, place->flags,
-				~TTM_PL_MASK_MEMTYPE);
-
-		if (mem_type == TTM_PL_SYSTEM) {
-			mem->mem_type = mem_type;
-			mem->placement = cur_flags;
-			mem->mm_node = NULL;
+		mem->mm_node = NULL;
+		if (mem->mem_type == TTM_PL_SYSTEM)
 			return 0;
-		}
 
-		ret = ttm_bo_mem_force_space(bo, mem_type, place, mem, ctx);
-		if (ret == 0 && mem->mm_node) {
-			mem->placement = cur_flags;
+		ret = ttm_bo_mem_force_space(bo, place, mem, ctx);
+		if (ret == 0 && mem->mm_node)
 			return 0;
-		}
+
 		if (ret && ret != -EBUSY)
 			return ret;
 	}
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 07/11] drm/ttm: immediately move BOs to the new LRU
  2019-05-14 12:31 [PATCH 01/11] drm/ttm: Make LRU removal optional Christian König
  2019-05-14 12:31 ` [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8 Christian König
@ 2019-05-14 12:31 ` Christian König
  2019-05-14 12:31 ` [PATCH 10/11] drm/amd/display: use ttm_eu_reserve_buffers instead of amdgpu_bo_reserve v2 Christian König
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  3 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak, David1.Zhou, Prike.Liang, dri-devel, amd-gfx

Move BOs which are currently in the system domain to
the new LRU before allocating backing space.

This makes sure that we always have enough entries on the
LRU to allow for other processes to wait for an operation
to complete.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 45 ++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index ec0bcc0241a8..233bfb86068b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -166,17 +166,17 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
 }
 
-void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
+				  struct ttm_mem_reg *mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man;
 
 	reservation_object_assert_held(bo->resv);
+	BUG_ON(!list_empty(&bo->lru));
 
-	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
-		BUG_ON(!list_empty(&bo->lru));
-
-		man = &bdev->man[bo->mem.mem_type];
+	if (!(mem->placement & TTM_PL_FLAG_NO_EVICT)) {
+		man = &bdev->man[mem->mem_type];
 		list_add_tail(&bo->lru, &man->lru[bo->priority]);
 		kref_get(&bo->list_kref);
 
@@ -188,6 +188,11 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 		}
 	}
 }
+
+void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+{
+	ttm_bo_add_mem_to_lru(bo, &bo->mem);
+}
 EXPORT_SYMBOL(ttm_bo_add_to_lru);
 
 static void ttm_bo_ref_bug(struct kref *list_kref)
@@ -1078,6 +1083,14 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
 
 	mem->mem_type = mem_type;
 	mem->placement = cur_flags;
+
+	if (bo->mem.mem_type == TTM_PL_SYSTEM && !list_empty(&bo->lru)) {
+		spin_lock(&bo->bdev->glob->lru_lock);
+		ttm_bo_del_from_lru(bo);
+		ttm_bo_add_mem_to_lru(bo, mem);
+		spin_unlock(&bo->bdev->glob->lru_lock);
+	}
+
 	return 0;
 }
 
@@ -1111,7 +1124,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		if (ret == -EBUSY)
 			continue;
 		if (ret)
-			return ret;
+			goto error;
 
 		type_found = true;
 		mem->mm_node = NULL;
@@ -1121,13 +1134,13 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		man = &bdev->man[mem->mem_type];
 		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret))
-			return ret;
+			goto error;
 
 		if (mem->mm_node) {
 			ret = ttm_bo_add_move_fence(bo, man, mem);
 			if (unlikely(ret)) {
 				(*man->func->put_node)(man, mem);
-				return ret;
+				goto error;
 			}
 			return 0;
 		}
@@ -1140,7 +1153,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		if (ret == -EBUSY)
 			continue;
 		if (ret)
-			return ret;
+			goto error;
 
 		type_found = true;
 		mem->mm_node = NULL;
@@ -1152,15 +1165,23 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			return 0;
 
 		if (ret && ret != -EBUSY)
-			return ret;
+			goto error;
 	}
 
+	ret = -ENOMEM;
 	if (!type_found) {
 		pr_err(TTM_PFX "No compatible memory type found\n");
-		return -EINVAL;
+		ret = -EINVAL;
 	}
 
-	return -ENOMEM;
+error:
+	if (bo->mem.mem_type == TTM_PL_SYSTEM && !list_empty(&bo->lru)) {
+		spin_lock(&bo->bdev->glob->lru_lock);
+		ttm_bo_move_to_lru_tail(bo, NULL);
+		spin_unlock(&bo->bdev->glob->lru_lock);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_mem_space);
 
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 08/11] drm/ttm: put new BOs immediately on the LRU
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2019-05-14 12:31   ` [PATCH 06/11] drm/ttm: cleanup ttm_bo_mem_space Christian König
@ 2019-05-14 12:31   ` Christian König
  2019-05-14 12:31   ` [PATCH 09/11] drm/ttm: convert EDEADLK into EAGAIN Christian König
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This way they are available for eviction immediately.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 233bfb86068b..a301c876ae31 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1390,21 +1390,18 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 		WARN_ON(!locked);
 	}
 
-	if (likely(!ret))
+	if (likely(!ret)) {
+		spin_lock(&bdev->glob->lru_lock);
+		ttm_bo_add_to_lru(bo);
+		spin_unlock(&bdev->glob->lru_lock);
 		ret = ttm_bo_validate(bo, placement, ctx);
+	}
 
 	if (unlikely(ret)) {
 		if (!resv)
 			ttm_bo_unreserve(bo);
 
 		ttm_bo_put(bo);
-		return ret;
-	}
-
-	if (resv && !(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
-		spin_lock(&bdev->glob->lru_lock);
-		ttm_bo_add_to_lru(bo);
-		spin_unlock(&bdev->glob->lru_lock);
 	}
 
 	return ret;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 09/11] drm/ttm: convert EDEADLK into EAGAIN
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2019-05-14 12:31   ` [PATCH 08/11] drm/ttm: put new BOs immediately on the LRU Christian König
@ 2019-05-14 12:31   ` Christian König
       [not found]     ` <20190514123127.1650-9-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-05-14 12:31   ` [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS Christian König
  2019-05-17 14:05   ` [PATCH 01/11] drm/ttm: Make LRU removal optional Zhou, David(ChunMing)
  7 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Let userspace try again if we really run into a deadlock during eviction.

This has a low chance of live locking, but with guaranteed forward process.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a301c876ae31..ce85cd8b4970 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -877,6 +877,8 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 					    acquire_ctx);
 		if (ret) {
 			ttm_bo_put(first_bo);
+			if (ret == -EDEADLK)
+				ret = -EAGAIN;
 			return ret;
 		}
 		spin_lock(&glob->lru_lock);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 10/11] drm/amd/display: use ttm_eu_reserve_buffers instead of amdgpu_bo_reserve v2
  2019-05-14 12:31 [PATCH 01/11] drm/ttm: Make LRU removal optional Christian König
  2019-05-14 12:31 ` [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8 Christian König
  2019-05-14 12:31 ` [PATCH 07/11] drm/ttm: immediately move BOs to the new LRU Christian König
@ 2019-05-14 12:31 ` Christian König
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  3 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak, David1.Zhou, Prike.Liang, dri-devel, amd-gfx

From: Chunming Zhou <david1.zhou@amd.com>

add ticket for display bo, so that it can preempt busy bo.

v2: fix stupid rebase error

Change-Id: I9f031cdcc8267de00e819ae303baa0a52df8ebb9
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d7f9d3998641..1c85ed772161 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4176,6 +4176,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	struct amdgpu_device *adev;
 	struct amdgpu_bo *rbo;
 	struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
+	struct list_head list;
+	struct ttm_validate_buffer tv;
+	struct ww_acquire_ctx ticket;
 	uint64_t tiling_flags;
 	uint32_t domain;
 	int r;
@@ -4192,9 +4195,17 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	obj = new_state->fb->obj[0];
 	rbo = gem_to_amdgpu_bo(obj);
 	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
-	r = amdgpu_bo_reserve(rbo, false);
-	if (unlikely(r != 0))
+	INIT_LIST_HEAD(&list);
+
+	tv.bo = &rbo->tbo;
+	tv.num_shared = 1;
+	list_add(&tv.head, &list);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL, true);
+	if (r) {
+		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
 		return r;
+	}
 
 	if (plane->type != DRM_PLANE_TYPE_CURSOR)
 		domain = amdgpu_display_supported_domains(adev);
@@ -4205,21 +4216,21 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
-		amdgpu_bo_unreserve(rbo);
+		ttm_eu_backoff_reservation(&ticket, &list);
 		return r;
 	}
 
 	r = amdgpu_ttm_alloc_gart(&rbo->tbo);
 	if (unlikely(r != 0)) {
 		amdgpu_bo_unpin(rbo);
-		amdgpu_bo_unreserve(rbo);
+		ttm_eu_backoff_reservation(&ticket, &list);
 		DRM_ERROR("%p bind failed\n", rbo);
 		return r;
 	}
 
 	amdgpu_bo_get_tiling_flags(rbo, &tiling_flags);
 
-	amdgpu_bo_unreserve(rbo);
+	ttm_eu_backoff_reservation(&ticket, &list);
 
 	afb->address = amdgpu_bo_gpu_offset(rbo);
 
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2019-05-14 12:31   ` [PATCH 09/11] drm/ttm: convert EDEADLK into EAGAIN Christian König
@ 2019-05-14 12:31   ` Christian König
       [not found]     ` <20190514123127.1650-11-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-05-14 19:33     ` [PATCH " Marek Olšák
  2019-05-17 14:05   ` [PATCH 01/11] drm/ttm: Make LRU removal optional Zhou, David(ChunMing)
  7 siblings, 2 replies; 30+ messages in thread
From: Christian König @ 2019-05-14 12:31 UTC (permalink / raw)
  To: Marek.Olsak-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-				   &duplicates, true);
+				   &duplicates, false);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re:[PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
       [not found]     ` <20190514123127.1650-11-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-14 13:12       ` Zhou, David(ChunMing)
  2019-05-14 13:47         ` [PATCH " Christian König
  0 siblings, 1 reply; 30+ messages in thread
From: Zhou, David(ChunMing) @ 2019-05-14 13:12 UTC (permalink / raw)
  To: Christian König, Olsak, Marek, Zhou, David(ChunMing),
	Liang, Prike, dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 1332 bytes --]

my only concern is how to fresh LRU when bo is from bo list.

-David

-------- Original Message --------
Subject: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Olsak, Marek" ,"Zhou, David(ChunMing)" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
CC:

[CAUTION: External Email]

This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }

        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-                                  &duplicates, true);
+                                  &duplicates, false);
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
--
2.17.1


[-- Attachment #1.2: Type: text/html, Size: 2670 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
  2019-05-14 13:12       ` Zhou, David(ChunMing)
@ 2019-05-14 13:47         ` Christian König
       [not found]           ` <f9017911-b08a-1f98-3fc9-98121bbde78a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2019-05-14 13:47 UTC (permalink / raw)
  To: Zhou, David(ChunMing), Olsak, Marek, Liang, Prike, dri-devel, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1422 bytes --]

Hui? What do you mean with that?

Christian.

Am 14.05.19 um 15:12 schrieb Zhou, David(ChunMing):
> my only concern is how to fresh LRU when bo is from bo list.
>
> -David
>
> -------- Original Message --------
> Subject: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU 
> during CS
> From: Christian König
> To: "Olsak, Marek" ,"Zhou, David(ChunMing)" ,"Liang, Prike" 
> ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org
> CC:
>
> [CAUTION: External Email]
>
> This avoids OOM situations when we have lots of threads
> submitting at the same time.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index fff558cf385b..f9240a94217b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct 
> amdgpu_cs_parser *p,
>         }
>
>         r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
> -                                  &duplicates, true);
> +                                  &duplicates, false);
>         if (unlikely(r != 0)) {
>                 if (r != -ERESTARTSYS)
>                         DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> --
> 2.17.1
>


[-- Attachment #1.2: Type: text/html, Size: 3132 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re:[PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
       [not found]           ` <f9017911-b08a-1f98-3fc9-98121bbde78a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-05-14 14:31             ` Zhou, David(ChunMing)
       [not found]               ` <-wsx1tz-kxfbz1yns7x33sra134gl11xhlux4lx3izissqr2httt4mb1vleyxgj8i7k6-q6ze8ub3ff8c4o0fxmx7niu76yg4-ybakue-3v14jw-ed5ol8ybh6o9-1ze886-hbstfi448pvq3pwhkj.1557844282594-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Zhou, David(ChunMing) @ 2019-05-14 14:31 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing),
	Olsak, Marek, Liang, Prike,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 2136 bytes --]

how to refresh LRU to keep the order align with bo list passed from user space?

you can verify it by some games, performance could be different much between multiple runnings.

-David

-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Zhou, David(ChunMing)" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
CC:

[CAUTION: External Email]
Hui? What do you mean with that?

Christian.

Am 14.05.19 um 15:12 schrieb Zhou, David(ChunMing):
my only concern is how to fresh LRU when bo is from bo list.

-David

-------- Original Message --------
Subject: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Olsak, Marek" ,"Zhou, David(ChunMing)" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:dri-devel-PD4FTy7X32mptlylMvRsHA@public.gmane.orgdesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
CC:

[CAUTION: External Email]

This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org><mailto:christian.koenig-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }

        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-                                  &duplicates, true);
+                                  &duplicates, false);
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
--
2.17.1



[-- Attachment #1.2: Type: text/html, Size: 3839 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
  2019-05-14 12:31   ` [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS Christian König
       [not found]     ` <20190514123127.1650-11-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-14 19:33     ` Marek Olšák
  2019-05-15  2:00       ` Liang, Prike
  1 sibling, 1 reply; 30+ messages in thread
From: Marek Olšák @ 2019-05-14 19:33 UTC (permalink / raw)
  To: Christian König; +Cc: Prike.Liang, amd-gfx mailing list, dri-devel


[-- Attachment #1.1: Type: text/plain, Size: 1599 bytes --]

This series fixes the OOM errors. However, if I torture the kernel driver
more, I can get it to deadlock and end up with unkillable processes. I can
also get an OOM error. I just ran the test 5 times:

AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears &
AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears &
AMD_DEBUG=testgdsmm glxgears

Marek

On Tue, May 14, 2019 at 8:31 AM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> This avoids OOM situations when we have lots of threads
> submitting at the same time.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index fff558cf385b..f9240a94217b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct
> amdgpu_cs_parser *p,
>         }
>
>         r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
> -                                  &duplicates, true);
> +                                  &duplicates, false);
>         if (unlikely(r != 0)) {
>                 if (r != -ERESTARTSYS)
>                         DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 2363 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* RE: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
  2019-05-14 19:33     ` [PATCH " Marek Olšák
@ 2019-05-15  2:00       ` Liang, Prike
       [not found]         ` <BYAPR12MB35256D8A0583B5DD019C2925FB090-ZGDeBxoHBPmbr42z19MNgwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Liang, Prike @ 2019-05-15  2:00 UTC (permalink / raw)
  To: Marek Olšák, Christian König
  Cc: amd-gfx mailing list, dri-devel


[-- Attachment #1.1: Type: text/plain, Size: 3202 bytes --]

Hi Christian ,

I just wonder when encounter ENOMEM error during pin amdgpu BOs can we retry validate again as below.
With the following simply patch the Abaqus pinned issue not observed.

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 11cbf63..72a32f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -902,11 +902,15 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                        bo->placements[i].lpfn = lpfn;
                bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
        }
-
+retry:
        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
        if (unlikely(r)) {
-               dev_err(adev->dev, "%p pin failed\n", bo);
-               goto error;
+                if (r == -ENOMEM){
+                        goto retry;
+                } else {
+                       dev_err(adev->dev, "%p pin failed\n", bo);
+                       goto error;
+                }
        }

        bo->pin_count = 1;


Thanks,
Prike

From: Marek Olšák <maraeo@gmail.com>
Sent: Wednesday, May 15, 2019 3:33 AM
To: Christian König <ckoenig.leichtzumerken@gmail.com>
Cc: Zhou, David(ChunMing) <David1.Zhou@amd.com>; Liang, Prike <Prike.Liang@amd.com>; dri-devel <dri-devel@lists.freedesktop.org>; amd-gfx mailing list <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS

[CAUTION: External Email]
This series fixes the OOM errors. However, if I torture the kernel driver more, I can get it to deadlock and end up with unkillable processes. I can also get an OOM error. I just ran the test 5 times:

AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears

Marek

On Tue, May 14, 2019 at 8:31 AM Christian König <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig@amd.com<mailto:christian.koenig@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }

        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-                                  &duplicates, true);
+                                  &duplicates, false);
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
--
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 9132 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
       [not found]         ` <BYAPR12MB35256D8A0583B5DD019C2925FB090-ZGDeBxoHBPmbr42z19MNgwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-05-15  7:04           ` Christian König
  2019-05-17  8:16             ` Liang, Prike
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2019-05-15  7:04 UTC (permalink / raw)
  To: Liang, Prike, Marek Olšák
  Cc: Zhou, David(ChunMing), amd-gfx mailing list, dri-devel


[-- Attachment #1.1: Type: text/plain, Size: 4115 bytes --]

Hi Prike,

no, that can lead to massive problems in a real OOM situation and is not 
something we can do here.

Christian.

Am 15.05.19 um 04:00 schrieb Liang, Prike:
>
> Hi Christian ,
>
> I just wonder when encounter ENOMEM error during pin amdgpu BOs can we 
> retry validate again as below.
>
> With the following simply patch the Abaqus pinned issue not observed.
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>
> index 11cbf63..72a32f5 100644
>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>
> @@ -902,11 +902,15 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
> *bo, u32 domain,
>
> bo->placements[i].lpfn = lpfn;
>
>                 bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
>
>         }
>
> -
>
> +retry:
>
>         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>
>         if (unlikely(r)) {
>
> -               dev_err(adev->dev, "%p pin failed\n", bo);
>
> -               goto error;
>
> +                if (r == -ENOMEM){
>
> +                        goto retry;
>
> +                } else {
>
> + dev_err(adev->dev, "%p pin failed\n", bo);
>
> +                       goto error;
>
> +                }
>
>         }
>
>         bo->pin_count = 1;
>
> Thanks,
>
> Prike
>
> *From:* Marek Olšák <maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> *Sent:* Wednesday, May 15, 2019 3:33 AM
> *To:* Christian König <ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> *Cc:* Zhou, David(ChunMing) <David1.Zhou-5C7GfCeVMHo@public.gmane.org>; Liang, Prike 
> <Prike.Liang-5C7GfCeVMHo@public.gmane.org>; dri-devel <dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; 
> amd-gfx mailing list <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> *Subject:* Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the 
> LRU during CS
>
> [CAUTION: External Email]
>
> This series fixes the OOM errors. However, if I torture the kernel 
> driver more, I can get it to deadlock and end up with unkillable 
> processes. I can also get an OOM error. I just ran the test 5 times:
>
> AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & 
> AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & 
> AMD_DEBUG=testgdsmm glxgears
>
> Marek
>
> On Tue, May 14, 2019 at 8:31 AM Christian König 
> <ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org 
> <mailto:ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>> wrote:
>
>     This avoids OOM situations when we have lots of threads
>     submitting at the same time.
>
>     Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org
>     <mailto:christian.koenig-5C7GfCeVMHo@public.gmane.org>>
>     ---
>      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>      1 file changed, 1 insertion(+), 1 deletion(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     index fff558cf385b..f9240a94217b 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct
>     amdgpu_cs_parser *p,
>             }
>
>             r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
>     -                                  &duplicates, true);
>     +                                  &duplicates, false);
>             if (unlikely(r != 0)) {
>                     if (r != -ERESTARTSYS)
>     DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
>     -- 
>     2.17.1
>
>     _______________________________________________
>     amd-gfx mailing list
>     amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>     https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>


[-- Attachment #1.2: Type: text/html, Size: 10904 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8
       [not found]   ` <20190514123127.1650-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-15  8:38     ` Daniel Vetter
  2019-05-15  8:45       ` Daniel Vetter
  0 siblings, 1 reply; 30+ messages in thread
From: Daniel Vetter @ 2019-05-15  8:38 UTC (permalink / raw)
  To: Christian König
  Cc: David1.Zhou-5C7GfCeVMHo, Marek.Olsak-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
> From: Chunming Zhou <david1.zhou@amd.com>
> 
> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
> 
> basically pick up Christian idea:
> 
> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
> 6. If any of the "If's" above fail we just back off and return -EBUSY.
> 
> v2: fix some minor check
> v3: address Christian v2 comments.
> v4: fix some missing
> v5: handle first_bo unlock and bo_get/put
> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
> v7: pass request bo->resv to ttm_bo_evict_first
> v8 (chk): minimal coding style fix
> 
> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>

I think this closes a big gap between ttm and the bkl/struct_mutex
drivers - it's much easier to guarantee you can evict everything if
there's only a single lock :-)

Would be absolutely awesome if we could extract this as some kind of
building block, like we've done with lots of other ttm concepts already
(reservation_obj, fences, ...).

Just an aside really.
-Daniel

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
>  1 file changed, 96 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 2845fceb2fbd..e634d3a36923 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   * b. Otherwise, trylock it.
>   */
>  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> -			struct ttm_operation_ctx *ctx, bool *locked)
> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>  {
>  	bool ret = false;
>  
>  	*locked = false;
> +	if (busy)
> +		*busy = false;
>  	if (bo->resv == ctx->resv) {
>  		reservation_object_assert_held(bo->resv);
>  		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>  	} else {
>  		*locked = reservation_object_trylock(bo->resv);
>  		ret = *locked;
> +		if (!ret && busy)
> +			*busy = true;
>  	}
>  
>  	return ret;
>  }
>  
> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -			       uint32_t mem_type,
> -			       const struct ttm_place *place,
> -			       struct ttm_operation_ctx *ctx)
> +static struct ttm_buffer_object*
> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
> +			 struct ttm_mem_type_manager *man,
> +			 const struct ttm_place *place,
> +			 struct ttm_operation_ctx *ctx,
> +			 struct ttm_buffer_object **first_bo,
> +			 bool *locked)
>  {
> -	struct ttm_bo_global *glob = bdev->glob;
> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>  	struct ttm_buffer_object *bo = NULL;
> -	bool locked = false;
> -	unsigned i;
> -	int ret;
> +	int i;
>  
> -	spin_lock(&glob->lru_lock);
> +	if (first_bo)
> +		*first_bo = NULL;
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		list_for_each_entry(bo, &man->lru[i], lru) {
> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
> +			bool busy = false;
> +
> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
> +							    &busy)) {
> +				if (first_bo && !(*first_bo) && busy) {
> +					ttm_bo_get(bo);
> +					*first_bo = bo;
> +				}
>  				continue;
> +			}
>  
>  			if (place && !bdev->driver->eviction_valuable(bo,
>  								      place)) {
> -				if (locked)
> +				if (*locked)
>  					reservation_object_unlock(bo->resv);
>  				continue;
>  			}
> +
>  			break;
>  		}
>  
> @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>  		bo = NULL;
>  	}
>  
> +	return bo;
> +}
> +
> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> +			       uint32_t mem_type,
> +			       const struct ttm_place *place,
> +			       struct ttm_operation_ctx *ctx,
> +			       struct reservation_object *request_resv)
> +{
> +	struct ttm_bo_global *glob = bdev->glob;
> +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
> +	bool locked = false;
> +	int ret;
> +
> +	spin_lock(&glob->lru_lock);
> +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
> +				      &locked);
>  	if (!bo) {
> +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
> +		struct ttm_operation_ctx busy_ctx;
> +
>  		spin_unlock(&glob->lru_lock);
> -		return -EBUSY;
> +		/* check if other user occupy memory too long time */
> +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
> +			if (first_bo)
> +				ttm_bo_put(first_bo);
> +			return -EBUSY;
> +		}
> +		if (first_bo->resv == request_resv) {
> +			ttm_bo_put(first_bo);
> +			return -EBUSY;
> +		}
> +		if (ctx->interruptible)
> +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
> +							  acquire_ctx);
> +		else
> +			ret = ww_mutex_lock(&first_bo->resv->lock,
> +					    acquire_ctx);
> +		if (ret) {
> +			ttm_bo_put(first_bo);
> +			return ret;
> +		}
> +		spin_lock(&glob->lru_lock);
> +		/* previous busy resv lock is held by above, idle now,
> +		 * so let them evictable.
> +		 */
> +		busy_ctx.interruptible = ctx->interruptible;
> +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
> +		busy_ctx.resv	       = first_bo->resv;
> +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
> +
> +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
> +					      &locked);
> +		if (bo && (bo->resv == first_bo->resv))
> +			locked = true;
> +		else if (bo)
> +			ww_mutex_unlock(&first_bo->resv->lock);
> +		if (!bo) {
> +			spin_unlock(&glob->lru_lock);
> +			ttm_bo_put(first_bo);
> +			return -EBUSY;
> +		}
>  	}
>  
>  	kref_get(&bo->list_kref);
> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>  		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>  					  ctx->no_wait_gpu, locked);
>  		kref_put(&bo->list_kref, ttm_bo_release_list);
> +		if (first_bo)
> +			ttm_bo_put(first_bo);
>  		return ret;
>  	}
>  
>  	ttm_bo_del_from_lru(bo);
>  	spin_unlock(&glob->lru_lock);
> +	if (first_bo)
> +		ttm_bo_put(first_bo);
>  
>  	ret = ttm_bo_evict(bo, ctx);
>  	if (locked) {
> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>  			return ret;
>  		if (mem->mm_node)
>  			break;
> -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
> +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>  		if (unlikely(ret != 0))
>  			return ret;
>  	} while (1);
> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		while (!list_empty(&man->lru[i])) {
>  			spin_unlock(&glob->lru_lock);
> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
> +						  NULL);
>  			if (ret)
>  				return ret;
>  			spin_lock(&glob->lru_lock);
> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>  	spin_lock(&glob->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> +							   NULL)) {
>  				ret = 0;
>  				break;
>  			}
> -- 
> 2.17.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 09/11] drm/ttm: convert EDEADLK into EAGAIN
       [not found]     ` <20190514123127.1650-9-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-15  8:40       ` Daniel Vetter
  2019-05-15  9:28         ` Christian König
  0 siblings, 1 reply; 30+ messages in thread
From: Daniel Vetter @ 2019-05-15  8:40 UTC (permalink / raw)
  To: Christian König
  Cc: David1.Zhou-5C7GfCeVMHo, Marek.Olsak-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Tue, May 14, 2019 at 02:31:25PM +0200, Christian König wrote:
> Let userspace try again if we really run into a deadlock during eviction.
> 
> This has a low chance of live locking, but with guaranteed forward process.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index a301c876ae31..ce85cd8b4970 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -877,6 +877,8 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>  					    acquire_ctx);
>  		if (ret) {
>  			ttm_bo_put(first_bo);
> +			if (ret == -EDEADLK)

Is this the ww_mutex EDEADLK or something else? If the former then letting
that escape unhandling into userspace sounds like a kernel bug ...
-Daniel

> +				ret = -EAGAIN;
>  			return ret;
>  		}
>  		spin_lock(&glob->lru_lock);
> -- 
> 2.17.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8
  2019-05-15  8:38     ` Daniel Vetter
@ 2019-05-15  8:45       ` Daniel Vetter
  2019-05-15  9:27         ` Christian König
       [not found]         ` <20190515084551.GD17751-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  0 siblings, 2 replies; 30+ messages in thread
From: Daniel Vetter @ 2019-05-15  8:45 UTC (permalink / raw)
  To: Christian König; +Cc: Marek.Olsak, amd-gfx, Prike.Liang, dri-devel

On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
> > From: Chunming Zhou <david1.zhou@amd.com>
> > 
> > heavy gpu job could occupy memory long time, which lead other user fail to get memory.
> > 
> > basically pick up Christian idea:
> > 
> > 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
> > 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
> > 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
> > 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
> > 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
> > 6. If any of the "If's" above fail we just back off and return -EBUSY.
> > 
> > v2: fix some minor check
> > v3: address Christian v2 comments.
> > v4: fix some missing
> > v5: handle first_bo unlock and bo_get/put
> > v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
> > v7: pass request bo->resv to ttm_bo_evict_first
> > v8 (chk): minimal coding style fix
> > 
> > Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> > Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> > Reviewed-by: Christian König <christian.koenig@amd.com>
> 
> I think this closes a big gap between ttm and the bkl/struct_mutex
> drivers - it's much easier to guarantee you can evict everything if
> there's only a single lock :-)
> 
> Would be absolutely awesome if we could extract this as some kind of
> building block, like we've done with lots of other ttm concepts already
> (reservation_obj, fences, ...).
> 
> Just an aside really.

Ofc this is meant as a comment on the entire patch series, without all the
other patches to make sure BO always stay on a relevant LRU there's still
gaps in the guaranteed forward progress eviction algorithm.
-Daniel

> -Daniel
> 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
> >  1 file changed, 96 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 2845fceb2fbd..e634d3a36923 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
> >   * b. Otherwise, trylock it.
> >   */
> >  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> > -			struct ttm_operation_ctx *ctx, bool *locked)
> > +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
> >  {
> >  	bool ret = false;
> >  
> >  	*locked = false;
> > +	if (busy)
> > +		*busy = false;
> >  	if (bo->resv == ctx->resv) {
> >  		reservation_object_assert_held(bo->resv);
> >  		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> > @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> >  	} else {
> >  		*locked = reservation_object_trylock(bo->resv);
> >  		ret = *locked;
> > +		if (!ret && busy)
> > +			*busy = true;
> >  	}
> >  
> >  	return ret;
> >  }
> >  
> > -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> > -			       uint32_t mem_type,
> > -			       const struct ttm_place *place,
> > -			       struct ttm_operation_ctx *ctx)
> > +static struct ttm_buffer_object*
> > +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
> > +			 struct ttm_mem_type_manager *man,
> > +			 const struct ttm_place *place,
> > +			 struct ttm_operation_ctx *ctx,
> > +			 struct ttm_buffer_object **first_bo,
> > +			 bool *locked)
> >  {
> > -	struct ttm_bo_global *glob = bdev->glob;
> > -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> >  	struct ttm_buffer_object *bo = NULL;
> > -	bool locked = false;
> > -	unsigned i;
> > -	int ret;
> > +	int i;
> >  
> > -	spin_lock(&glob->lru_lock);
> > +	if (first_bo)
> > +		*first_bo = NULL;
> >  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >  		list_for_each_entry(bo, &man->lru[i], lru) {
> > -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
> > +			bool busy = false;
> > +
> > +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
> > +							    &busy)) {
> > +				if (first_bo && !(*first_bo) && busy) {
> > +					ttm_bo_get(bo);
> > +					*first_bo = bo;
> > +				}
> >  				continue;
> > +			}
> >  
> >  			if (place && !bdev->driver->eviction_valuable(bo,
> >  								      place)) {
> > -				if (locked)
> > +				if (*locked)
> >  					reservation_object_unlock(bo->resv);
> >  				continue;
> >  			}
> > +
> >  			break;
> >  		}
> >  
> > @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> >  		bo = NULL;
> >  	}
> >  
> > +	return bo;
> > +}
> > +
> > +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> > +			       uint32_t mem_type,
> > +			       const struct ttm_place *place,
> > +			       struct ttm_operation_ctx *ctx,
> > +			       struct reservation_object *request_resv)
> > +{
> > +	struct ttm_bo_global *glob = bdev->glob;
> > +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> > +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
> > +	bool locked = false;
> > +	int ret;
> > +
> > +	spin_lock(&glob->lru_lock);
> > +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
> > +				      &locked);
> >  	if (!bo) {
> > +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
> > +		struct ttm_operation_ctx busy_ctx;
> > +
> >  		spin_unlock(&glob->lru_lock);
> > -		return -EBUSY;
> > +		/* check if other user occupy memory too long time */
> > +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
> > +			if (first_bo)
> > +				ttm_bo_put(first_bo);
> > +			return -EBUSY;
> > +		}
> > +		if (first_bo->resv == request_resv) {
> > +			ttm_bo_put(first_bo);
> > +			return -EBUSY;
> > +		}
> > +		if (ctx->interruptible)
> > +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
> > +							  acquire_ctx);
> > +		else
> > +			ret = ww_mutex_lock(&first_bo->resv->lock,
> > +					    acquire_ctx);
> > +		if (ret) {
> > +			ttm_bo_put(first_bo);
> > +			return ret;
> > +		}
> > +		spin_lock(&glob->lru_lock);
> > +		/* previous busy resv lock is held by above, idle now,
> > +		 * so let them evictable.
> > +		 */
> > +		busy_ctx.interruptible = ctx->interruptible;
> > +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
> > +		busy_ctx.resv	       = first_bo->resv;
> > +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
> > +
> > +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
> > +					      &locked);
> > +		if (bo && (bo->resv == first_bo->resv))
> > +			locked = true;
> > +		else if (bo)
> > +			ww_mutex_unlock(&first_bo->resv->lock);
> > +		if (!bo) {
> > +			spin_unlock(&glob->lru_lock);
> > +			ttm_bo_put(first_bo);
> > +			return -EBUSY;
> > +		}
> >  	}
> >  
> >  	kref_get(&bo->list_kref);
> > @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> >  		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
> >  					  ctx->no_wait_gpu, locked);
> >  		kref_put(&bo->list_kref, ttm_bo_release_list);
> > +		if (first_bo)
> > +			ttm_bo_put(first_bo);
> >  		return ret;
> >  	}
> >  
> >  	ttm_bo_del_from_lru(bo);
> >  	spin_unlock(&glob->lru_lock);
> > +	if (first_bo)
> > +		ttm_bo_put(first_bo);
> >  
> >  	ret = ttm_bo_evict(bo, ctx);
> >  	if (locked) {
> > @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
> >  			return ret;
> >  		if (mem->mm_node)
> >  			break;
> > -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
> > +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
> >  		if (unlikely(ret != 0))
> >  			return ret;
> >  	} while (1);
> > @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
> >  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >  		while (!list_empty(&man->lru[i])) {
> >  			spin_unlock(&glob->lru_lock);
> > -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> > +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
> > +						  NULL);
> >  			if (ret)
> >  				return ret;
> >  			spin_lock(&glob->lru_lock);
> > @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
> >  	spin_lock(&glob->lru_lock);
> >  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >  		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> > -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
> > +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> > +							   NULL)) {
> >  				ret = 0;
> >  				break;
> >  			}
> > -- 
> > 2.17.1
> > 
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8
  2019-05-15  8:45       ` Daniel Vetter
@ 2019-05-15  9:27         ` Christian König
       [not found]           ` <6f862969-3937-df25-949f-9740a90dd457-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
       [not found]         ` <20190515084551.GD17751-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  1 sibling, 1 reply; 30+ messages in thread
From: Christian König @ 2019-05-15  9:27 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Marek.Olsak, amd-gfx, Prike.Liang, dri-devel

Am 15.05.19 um 10:45 schrieb Daniel Vetter:
> On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
>> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
>>> From: Chunming Zhou <david1.zhou@amd.com>
>>>
>>> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
>>> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
>>> v7: pass request bo->resv to ttm_bo_evict_first
>>> v8 (chk): minimal coding style fix
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>> I think this closes a big gap between ttm and the bkl/struct_mutex
>> drivers - it's much easier to guarantee you can evict everything if
>> there's only a single lock :-)
>>
>> Would be absolutely awesome if we could extract this as some kind of
>> building block, like we've done with lots of other ttm concepts already
>> (reservation_obj, fences, ...).
>>
>> Just an aside really.
> Ofc this is meant as a comment on the entire patch series, without all the
> other patches to make sure BO always stay on a relevant LRU there's still
> gaps in the guaranteed forward progress eviction algorithm.

Yeah, the problem surfaced because of patch #4. Previously TTM would 
have just ignored all errors and continued to try different placements 
and only return -ENOMEM when we ran out of a possible placements.

I probably need to either fix patch #4 or reorder the patches.

Thanks for the note,
Christian.

> -Daniel
>
>> -Daniel
>>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
>>>   1 file changed, 96 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 2845fceb2fbd..e634d3a36923 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>    * b. Otherwise, trylock it.
>>>    */
>>>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>> -			struct ttm_operation_ctx *ctx, bool *locked)
>>> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>   {
>>>   	bool ret = false;
>>>   
>>>   	*locked = false;
>>> +	if (busy)
>>> +		*busy = false;
>>>   	if (bo->resv == ctx->resv) {
>>>   		reservation_object_assert_held(bo->resv);
>>>   		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>   	} else {
>>>   		*locked = reservation_object_trylock(bo->resv);
>>>   		ret = *locked;
>>> +		if (!ret && busy)
>>> +			*busy = true;
>>>   	}
>>>   
>>>   	return ret;
>>>   }
>>>   
>>> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> -			       uint32_t mem_type,
>>> -			       const struct ttm_place *place,
>>> -			       struct ttm_operation_ctx *ctx)
>>> +static struct ttm_buffer_object*
>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>> +			 struct ttm_mem_type_manager *man,
>>> +			 const struct ttm_place *place,
>>> +			 struct ttm_operation_ctx *ctx,
>>> +			 struct ttm_buffer_object **first_bo,
>>> +			 bool *locked)
>>>   {
>>> -	struct ttm_bo_global *glob = bdev->glob;
>>> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>   	struct ttm_buffer_object *bo = NULL;
>>> -	bool locked = false;
>>> -	unsigned i;
>>> -	int ret;
>>> +	int i;
>>>   
>>> -	spin_lock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		*first_bo = NULL;
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &man->lru[i], lru) {
>>> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>> +			bool busy = false;
>>> +
>>> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>> +							    &busy)) {
>>> +				if (first_bo && !(*first_bo) && busy) {
>>> +					ttm_bo_get(bo);
>>> +					*first_bo = bo;
>>> +				}
>>>   				continue;
>>> +			}
>>>   
>>>   			if (place && !bdev->driver->eviction_valuable(bo,
>>>   								      place)) {
>>> -				if (locked)
>>> +				if (*locked)
>>>   					reservation_object_unlock(bo->resv);
>>>   				continue;
>>>   			}
>>> +
>>>   			break;
>>>   		}
>>>   
>>> @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		bo = NULL;
>>>   	}
>>>   
>>> +	return bo;
>>> +}
>>> +
>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> +			       uint32_t mem_type,
>>> +			       const struct ttm_place *place,
>>> +			       struct ttm_operation_ctx *ctx,
>>> +			       struct reservation_object *request_resv)
>>> +{
>>> +	struct ttm_bo_global *glob = bdev->glob;
>>> +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>> +	bool locked = false;
>>> +	int ret;
>>> +
>>> +	spin_lock(&glob->lru_lock);
>>> +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
>>> +				      &locked);
>>>   	if (!bo) {
>>> +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
>>> +		struct ttm_operation_ctx busy_ctx;
>>> +
>>>   		spin_unlock(&glob->lru_lock);
>>> -		return -EBUSY;
>>> +		/* check if other user occupy memory too long time */
>>> +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
>>> +			if (first_bo)
>>> +				ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (first_bo->resv == request_resv) {
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (ctx->interruptible)
>>> +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>> +							  acquire_ctx);
>>> +		else
>>> +			ret = ww_mutex_lock(&first_bo->resv->lock,
>>> +					    acquire_ctx);
>>> +		if (ret) {
>>> +			ttm_bo_put(first_bo);
>>> +			return ret;
>>> +		}
>>> +		spin_lock(&glob->lru_lock);
>>> +		/* previous busy resv lock is held by above, idle now,
>>> +		 * so let them evictable.
>>> +		 */
>>> +		busy_ctx.interruptible = ctx->interruptible;
>>> +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
>>> +		busy_ctx.resv	       = first_bo->resv;
>>> +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
>>> +
>>> +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
>>> +					      &locked);
>>> +		if (bo && (bo->resv == first_bo->resv))
>>> +			locked = true;
>>> +		else if (bo)
>>> +			ww_mutex_unlock(&first_bo->resv->lock);
>>> +		if (!bo) {
>>> +			spin_unlock(&glob->lru_lock);
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>>   	}
>>>   
>>>   	kref_get(&bo->list_kref);
>>> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>>>   					  ctx->no_wait_gpu, locked);
>>>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>>> +		if (first_bo)
>>> +			ttm_bo_put(first_bo);
>>>   		return ret;
>>>   	}
>>>   
>>>   	ttm_bo_del_from_lru(bo);
>>>   	spin_unlock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		ttm_bo_put(first_bo);
>>>   
>>>   	ret = ttm_bo_evict(bo, ctx);
>>>   	if (locked) {
>>> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>>>   			return ret;
>>>   		if (mem->mm_node)
>>>   			break;
>>> -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>>> +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>>>   		if (unlikely(ret != 0))
>>>   			return ret;
>>>   	} while (1);
>>> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		while (!list_empty(&man->lru[i])) {
>>>   			spin_unlock(&glob->lru_lock);
>>> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
>>> +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
>>> +						  NULL);
>>>   			if (ret)
>>>   				return ret;
>>>   			spin_lock(&glob->lru_lock);
>>> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>>>   	spin_lock(&glob->lru_lock);
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>> +							   NULL)) {
>>>   				ret = 0;
>>>   				break;
>>>   			}
>>> -- 
>>> 2.17.1
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8
       [not found]           ` <6f862969-3937-df25-949f-9740a90dd457-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-05-15  9:28             ` Christian König
  0 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-15  9:28 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: David1.Zhou-5C7GfCeVMHo, Marek.Olsak-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.05.19 um 11:27 schrieb Christian König:
> Am 15.05.19 um 10:45 schrieb Daniel Vetter:
>> On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
>>> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
>>>> From: Chunming Zhou <david1.zhou@amd.com>
>>>>
>>>> heavy gpu job could occupy memory long time, which lead other user 
>>>> fail to get memory.
>>>>
>>>> basically pick up Christian idea:
>>>>
>>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>>> 2. If we then run into this EBUSY condition in TTM check if the BO 
>>>> we need memory for (or rather the ww_mutex of its reservation 
>>>> object) has a ticket assigned.
>>>> 3. If we have a ticket we grab a reference to the first BO on the 
>>>> LRU, drop the LRU lock and try to grab the reservation lock with 
>>>> the ticket.
>>>> 4. If getting the reservation lock with the ticket succeeded we 
>>>> check if the BO is still the first one on the LRU in question (the 
>>>> BO could have moved).
>>>> 5. If the BO is still the first one on the LRU in question we try 
>>>> to evict it as we would evict any other BO.
>>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>>
>>>> v2: fix some minor check
>>>> v3: address Christian v2 comments.
>>>> v4: fix some missing
>>>> v5: handle first_bo unlock and bo_get/put
>>>> v6: abstract unified iterate function, and handle all possible 
>>>> usecase not only pinned bo.
>>>> v7: pass request bo->resv to ttm_bo_evict_first
>>>> v8 (chk): minimal coding style fix
>>>>
>>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>> I think this closes a big gap between ttm and the bkl/struct_mutex
>>> drivers - it's much easier to guarantee you can evict everything if
>>> there's only a single lock :-)
>>>
>>> Would be absolutely awesome if we could extract this as some kind of
>>> building block, like we've done with lots of other ttm concepts already
>>> (reservation_obj, fences, ...).
>>>
>>> Just an aside really.
>> Ofc this is meant as a comment on the entire patch series, without 
>> all the
>> other patches to make sure BO always stay on a relevant LRU there's 
>> still
>> gaps in the guaranteed forward progress eviction algorithm.
>
> Yeah, the problem surfaced because of patch #4. Previously TTM would 
> have just ignored all errors and continued to try different placements 
> and only return -ENOMEM when we ran out of a possible placements.
>
> I probably need to either fix patch #4 or reorder the patches.

Ups, please ignore. I accidentally replied to the wrong mail.

Christian.

>
> Thanks for the note,
> Christian.
>
>> -Daniel
>>
>>> -Daniel
>>>
>>>> ---
>>>>   drivers/gpu/drm/ttm/ttm_bo.c | 113 
>>>> +++++++++++++++++++++++++++++------
>>>>   1 file changed, 96 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 2845fceb2fbd..e634d3a36923 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>>    * b. Otherwise, trylock it.
>>>>    */
>>>>   static bool ttm_bo_evict_swapout_allowable(struct 
>>>> ttm_buffer_object *bo,
>>>> -            struct ttm_operation_ctx *ctx, bool *locked)
>>>> +            struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>>   {
>>>>       bool ret = false;
>>>>         *locked = false;
>>>> +    if (busy)
>>>> +        *busy = false;
>>>>       if (bo->resv == ctx->resv) {
>>>>           reservation_object_assert_held(bo->resv);
>>>>           if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>>> @@ -779,35 +781,46 @@ static bool 
>>>> ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>>       } else {
>>>>           *locked = reservation_object_trylock(bo->resv);
>>>>           ret = *locked;
>>>> +        if (!ret && busy)
>>>> +            *busy = true;
>>>>       }
>>>>         return ret;
>>>>   }
>>>>   -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>> -                   uint32_t mem_type,
>>>> -                   const struct ttm_place *place,
>>>> -                   struct ttm_operation_ctx *ctx)
>>>> +static struct ttm_buffer_object*
>>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>>> +             struct ttm_mem_type_manager *man,
>>>> +             const struct ttm_place *place,
>>>> +             struct ttm_operation_ctx *ctx,
>>>> +             struct ttm_buffer_object **first_bo,
>>>> +             bool *locked)
>>>>   {
>>>> -    struct ttm_bo_global *glob = bdev->glob;
>>>> -    struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>>       struct ttm_buffer_object *bo = NULL;
>>>> -    bool locked = false;
>>>> -    unsigned i;
>>>> -    int ret;
>>>> +    int i;
>>>>   -    spin_lock(&glob->lru_lock);
>>>> +    if (first_bo)
>>>> +        *first_bo = NULL;
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>           list_for_each_entry(bo, &man->lru[i], lru) {
>>>> -            if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>>> +            bool busy = false;
>>>> +
>>>> +            if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>>> +                                &busy)) {
>>>> +                if (first_bo && !(*first_bo) && busy) {
>>>> +                    ttm_bo_get(bo);
>>>> +                    *first_bo = bo;
>>>> +                }
>>>>                   continue;
>>>> +            }
>>>>                 if (place && !bdev->driver->eviction_valuable(bo,
>>>>                                         place)) {
>>>> -                if (locked)
>>>> +                if (*locked)
>>>> reservation_object_unlock(bo->resv);
>>>>                   continue;
>>>>               }
>>>> +
>>>>               break;
>>>>           }
>>>>   @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct 
>>>> ttm_bo_device *bdev,
>>>>           bo = NULL;
>>>>       }
>>>>   +    return bo;
>>>> +}
>>>> +
>>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>> +                   uint32_t mem_type,
>>>> +                   const struct ttm_place *place,
>>>> +                   struct ttm_operation_ctx *ctx,
>>>> +                   struct reservation_object *request_resv)
>>>> +{
>>>> +    struct ttm_bo_global *glob = bdev->glob;
>>>> +    struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>> +    struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>>> +    bool locked = false;
>>>> +    int ret;
>>>> +
>>>> +    spin_lock(&glob->lru_lock);
>>>> +    bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
>>>> +                      &locked);
>>>>       if (!bo) {
>>>> +        struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
>>>> +        struct ttm_operation_ctx busy_ctx;
>>>> +
>>>>           spin_unlock(&glob->lru_lock);
>>>> -        return -EBUSY;
>>>> +        /* check if other user occupy memory too long time */
>>>> +        if (!first_bo || !request_resv || !request_resv->lock.ctx) {
>>>> +            if (first_bo)
>>>> +                ttm_bo_put(first_bo);
>>>> +            return -EBUSY;
>>>> +        }
>>>> +        if (first_bo->resv == request_resv) {
>>>> +            ttm_bo_put(first_bo);
>>>> +            return -EBUSY;
>>>> +        }
>>>> +        if (ctx->interruptible)
>>>> +            ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>>> +                              acquire_ctx);
>>>> +        else
>>>> +            ret = ww_mutex_lock(&first_bo->resv->lock,
>>>> +                        acquire_ctx);
>>>> +        if (ret) {
>>>> +            ttm_bo_put(first_bo);
>>>> +            return ret;
>>>> +        }
>>>> +        spin_lock(&glob->lru_lock);
>>>> +        /* previous busy resv lock is held by above, idle now,
>>>> +         * so let them evictable.
>>>> +         */
>>>> +        busy_ctx.interruptible = ctx->interruptible;
>>>> +        busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
>>>> +        busy_ctx.resv           = first_bo->resv;
>>>> +        busy_ctx.flags           = TTM_OPT_FLAG_ALLOW_RES_EVICT;
>>>> +
>>>> +        bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, 
>>>> NULL,
>>>> +                          &locked);
>>>> +        if (bo && (bo->resv == first_bo->resv))
>>>> +            locked = true;
>>>> +        else if (bo)
>>>> + ww_mutex_unlock(&first_bo->resv->lock);
>>>> +        if (!bo) {
>>>> +            spin_unlock(&glob->lru_lock);
>>>> +            ttm_bo_put(first_bo);
>>>> +            return -EBUSY;
>>>> +        }
>>>>       }
>>>>         kref_get(&bo->list_kref);
>>>> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct 
>>>> ttm_bo_device *bdev,
>>>>           ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>>>>                         ctx->no_wait_gpu, locked);
>>>>           kref_put(&bo->list_kref, ttm_bo_release_list);
>>>> +        if (first_bo)
>>>> +            ttm_bo_put(first_bo);
>>>>           return ret;
>>>>       }
>>>>         ttm_bo_del_from_lru(bo);
>>>>       spin_unlock(&glob->lru_lock);
>>>> +    if (first_bo)
>>>> +        ttm_bo_put(first_bo);
>>>>         ret = ttm_bo_evict(bo, ctx);
>>>>       if (locked) {
>>>> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct 
>>>> ttm_buffer_object *bo,
>>>>               return ret;
>>>>           if (mem->mm_node)
>>>>               break;
>>>> -        ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>>>> +        ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, 
>>>> bo->resv);
>>>>           if (unlikely(ret != 0))
>>>>               return ret;
>>>>       } while (1);
>>>> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct 
>>>> ttm_bo_device *bdev,
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>           while (!list_empty(&man->lru[i])) {
>>>>               spin_unlock(&glob->lru_lock);
>>>> -            ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
>>>> +            ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
>>>> +                          NULL);
>>>>               if (ret)
>>>>                   return ret;
>>>>               spin_lock(&glob->lru_lock);
>>>> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global 
>>>> *glob, struct ttm_operation_ctx *ctx)
>>>>       spin_lock(&glob->lru_lock);
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>           list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>>> -            if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>>> +            if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>>> +                               NULL)) {
>>>>                   ret = 0;
>>>>                   break;
>>>>               }
>>>> -- 
>>>> 2.17.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>> -- 
>>> Daniel Vetter
>>> Software Engineer, Intel Corporation
>>> http://blog.ffwll.ch
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 09/11] drm/ttm: convert EDEADLK into EAGAIN
  2019-05-15  8:40       ` Daniel Vetter
@ 2019-05-15  9:28         ` Christian König
  0 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-15  9:28 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Marek.Olsak, amd-gfx, Prike.Liang, dri-devel

Am 15.05.19 um 10:40 schrieb Daniel Vetter:
> On Tue, May 14, 2019 at 02:31:25PM +0200, Christian König wrote:
>> Let userspace try again if we really run into a deadlock during eviction.
>>
>> This has a low chance of live locking, but with guaranteed forward process.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
>>   1 file changed, 2 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index a301c876ae31..ce85cd8b4970 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -877,6 +877,8 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>   					    acquire_ctx);
>>   		if (ret) {
>>   			ttm_bo_put(first_bo);
>> +			if (ret == -EDEADLK)
> Is this the ww_mutex EDEADLK or something else? If the former then letting
> that escape unhandling into userspace sounds like a kernel bug ...

Yeah, the problem surfaced because of patch #4. Previously TTM would 
have just ignored all errors and continued to try different placements 
and only return -ENOMEM when we ran out of a possible placements.

I probably need to either fix patch #4 or reorder the patches.

Thanks for the note,
Christian.

> -Daniel
>
>> +				ret = -EAGAIN;
>>   			return ret;
>>   		}
>>   		spin_lock(&glob->lru_lock);
>> -- 
>> 2.17.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8
       [not found]         ` <20190515084551.GD17751-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
@ 2019-05-15  9:30           ` Christian König
  0 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2019-05-15  9:30 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: David1.Zhou-5C7GfCeVMHo, Marek.Olsak-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Prike.Liang-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.05.19 um 10:45 schrieb Daniel Vetter:
> On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
>> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
>>> From: Chunming Zhou <david1.zhou@amd.com>
>>>
>>> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
>>> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
>>> v7: pass request bo->resv to ttm_bo_evict_first
>>> v8 (chk): minimal coding style fix
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>> I think this closes a big gap between ttm and the bkl/struct_mutex
>> drivers - it's much easier to guarantee you can evict everything if
>> there's only a single lock :-)
>>
>> Would be absolutely awesome if we could extract this as some kind of
>> building block, like we've done with lots of other ttm concepts already
>> (reservation_obj, fences, ...).
>>
>> Just an aside really.
> Ofc this is meant as a comment on the entire patch series, without all the
> other patches to make sure BO always stay on a relevant LRU there's still
> gaps in the guaranteed forward progress eviction algorithm.

Yeah, and especially that Marek ran into a bad in kernel deadlock is a 
serious no-go for the moment.

Need to figure out what exactly is going wrong here first, but in 
general I completely agree that we should move this logic out of TTM.

Christian.

> -Daniel
>
>> -Daniel
>>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
>>>   1 file changed, 96 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 2845fceb2fbd..e634d3a36923 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>    * b. Otherwise, trylock it.
>>>    */
>>>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>> -			struct ttm_operation_ctx *ctx, bool *locked)
>>> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>   {
>>>   	bool ret = false;
>>>   
>>>   	*locked = false;
>>> +	if (busy)
>>> +		*busy = false;
>>>   	if (bo->resv == ctx->resv) {
>>>   		reservation_object_assert_held(bo->resv);
>>>   		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>   	} else {
>>>   		*locked = reservation_object_trylock(bo->resv);
>>>   		ret = *locked;
>>> +		if (!ret && busy)
>>> +			*busy = true;
>>>   	}
>>>   
>>>   	return ret;
>>>   }
>>>   
>>> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> -			       uint32_t mem_type,
>>> -			       const struct ttm_place *place,
>>> -			       struct ttm_operation_ctx *ctx)
>>> +static struct ttm_buffer_object*
>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>> +			 struct ttm_mem_type_manager *man,
>>> +			 const struct ttm_place *place,
>>> +			 struct ttm_operation_ctx *ctx,
>>> +			 struct ttm_buffer_object **first_bo,
>>> +			 bool *locked)
>>>   {
>>> -	struct ttm_bo_global *glob = bdev->glob;
>>> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>   	struct ttm_buffer_object *bo = NULL;
>>> -	bool locked = false;
>>> -	unsigned i;
>>> -	int ret;
>>> +	int i;
>>>   
>>> -	spin_lock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		*first_bo = NULL;
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &man->lru[i], lru) {
>>> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>> +			bool busy = false;
>>> +
>>> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>> +							    &busy)) {
>>> +				if (first_bo && !(*first_bo) && busy) {
>>> +					ttm_bo_get(bo);
>>> +					*first_bo = bo;
>>> +				}
>>>   				continue;
>>> +			}
>>>   
>>>   			if (place && !bdev->driver->eviction_valuable(bo,
>>>   								      place)) {
>>> -				if (locked)
>>> +				if (*locked)
>>>   					reservation_object_unlock(bo->resv);
>>>   				continue;
>>>   			}
>>> +
>>>   			break;
>>>   		}
>>>   
>>> @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		bo = NULL;
>>>   	}
>>>   
>>> +	return bo;
>>> +}
>>> +
>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> +			       uint32_t mem_type,
>>> +			       const struct ttm_place *place,
>>> +			       struct ttm_operation_ctx *ctx,
>>> +			       struct reservation_object *request_resv)
>>> +{
>>> +	struct ttm_bo_global *glob = bdev->glob;
>>> +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>> +	bool locked = false;
>>> +	int ret;
>>> +
>>> +	spin_lock(&glob->lru_lock);
>>> +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
>>> +				      &locked);
>>>   	if (!bo) {
>>> +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
>>> +		struct ttm_operation_ctx busy_ctx;
>>> +
>>>   		spin_unlock(&glob->lru_lock);
>>> -		return -EBUSY;
>>> +		/* check if other user occupy memory too long time */
>>> +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
>>> +			if (first_bo)
>>> +				ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (first_bo->resv == request_resv) {
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (ctx->interruptible)
>>> +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>> +							  acquire_ctx);
>>> +		else
>>> +			ret = ww_mutex_lock(&first_bo->resv->lock,
>>> +					    acquire_ctx);
>>> +		if (ret) {
>>> +			ttm_bo_put(first_bo);
>>> +			return ret;
>>> +		}
>>> +		spin_lock(&glob->lru_lock);
>>> +		/* previous busy resv lock is held by above, idle now,
>>> +		 * so let them evictable.
>>> +		 */
>>> +		busy_ctx.interruptible = ctx->interruptible;
>>> +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
>>> +		busy_ctx.resv	       = first_bo->resv;
>>> +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
>>> +
>>> +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
>>> +					      &locked);
>>> +		if (bo && (bo->resv == first_bo->resv))
>>> +			locked = true;
>>> +		else if (bo)
>>> +			ww_mutex_unlock(&first_bo->resv->lock);
>>> +		if (!bo) {
>>> +			spin_unlock(&glob->lru_lock);
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>>   	}
>>>   
>>>   	kref_get(&bo->list_kref);
>>> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>>>   					  ctx->no_wait_gpu, locked);
>>>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>>> +		if (first_bo)
>>> +			ttm_bo_put(first_bo);
>>>   		return ret;
>>>   	}
>>>   
>>>   	ttm_bo_del_from_lru(bo);
>>>   	spin_unlock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		ttm_bo_put(first_bo);
>>>   
>>>   	ret = ttm_bo_evict(bo, ctx);
>>>   	if (locked) {
>>> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>>>   			return ret;
>>>   		if (mem->mm_node)
>>>   			break;
>>> -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>>> +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>>>   		if (unlikely(ret != 0))
>>>   			return ret;
>>>   	} while (1);
>>> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		while (!list_empty(&man->lru[i])) {
>>>   			spin_unlock(&glob->lru_lock);
>>> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
>>> +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
>>> +						  NULL);
>>>   			if (ret)
>>>   				return ret;
>>>   			spin_lock(&glob->lru_lock);
>>> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>>>   	spin_lock(&glob->lru_lock);
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>> +							   NULL)) {
>>>   				ret = 0;
>>>   				break;
>>>   			}
>>> -- 
>>> 2.17.1
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
       [not found]               ` <-wsx1tz-kxfbz1yns7x33sra134gl11xhlux4lx3izissqr2httt4mb1vleyxgj8i7k6-q6ze8ub3ff8c4o0fxmx7niu76yg4-ybakue-3v14jw-ed5ol8ybh6o9-1ze886-hbstfi448pvq3pwhkj.1557844282594-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2019-05-15 14:16                 ` Christian König
       [not found]                   ` <451e8757-b509-c0f7-eced-6ccedc45117b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2019-05-15 14:16 UTC (permalink / raw)
  To: Zhou, David(ChunMing),
	Koenig, Christian, Olsak, Marek, Liang, Prike,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 2816 bytes --]

That is a good point, but actually not a problem in practice.

See the change to ttm_eu_fence_buffer_objects:
> -               ttm_bo_add_to_lru(bo);
> +               if (list_empty(&bo->lru))
> +                       ttm_bo_add_to_lru(bo);
> +               else
> +                       ttm_bo_move_to_lru_tail(bo, NULL);

We still move the BOs to the end of the LRU in the same order we have 
before, we just don't remove them when they are reserved.

Regards,
Christian.

Am 14.05.19 um 16:31 schrieb Zhou, David(ChunMing):
> how to refresh LRU to keep the order align with bo list passed from 
> user space?
>
> you can verify it by some games, performance could be different much 
> between multiple runnings.
>
> -David
>
> -------- Original Message --------
> Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU 
> during CS
> From: Christian König
> To: "Zhou, David(ChunMing)" ,"Olsak, Marek" ,"Liang, Prike" 
> ,dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> CC:
>
> [CAUTION: External Email]
> Hui? What do you mean with that?
>
> Christian.
>
> Am 14.05.19 um 15:12 schrieb Zhou, David(ChunMing):
>> my only concern is how to fresh LRU when bo is from bo list.
>>
>> -David
>>
>> -------- Original Message --------
>> Subject: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU 
>> during CS
>> From: Christian König
>> To: "Olsak, Marek" ,"Zhou, David(ChunMing)" ,"Liang, Prike" 
>> ,dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> CC:
>>
>> [CAUTION: External Email]
>>
>> This avoids OOM situations when we have lots of threads
>> submitting at the same time.
>>
>> Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index fff558cf385b..f9240a94217b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct 
>> amdgpu_cs_parser *p,
>>         }
>>
>>         r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
>> -                                  &duplicates, true);
>> +                                  &duplicates, false);
>>         if (unlikely(r != 0)) {
>>                 if (r != -ERESTARTSYS)
>> DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
>> --
>> 2.17.1
>>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 6104 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re:[PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
       [not found]                   ` <451e8757-b509-c0f7-eced-6ccedc45117b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-05-15 14:21                     ` Zhou, David(ChunMing)
  2019-05-15 14:22                       ` [PATCH " Koenig, Christian
  0 siblings, 1 reply; 30+ messages in thread
From: Zhou, David(ChunMing) @ 2019-05-15 14:21 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing),
	Olsak, Marek, Liang, Prike,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 3515 bytes --]

Isn't this patch trying to stop removing for all BOs  from bo list?

-David

-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Zhou, David(ChunMing)" ,"Koenig, Christian" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
CC:

[CAUTION: External Email]
That is a good point, but actually not a problem in practice.

See the change to ttm_eu_fence_buffer_objects:
-               ttm_bo_add_to_lru(bo);
+               if (list_empty(&bo->lru))
+                       ttm_bo_add_to_lru(bo);
+               else
+                       ttm_bo_move_to_lru_tail(bo, NULL);

We still move the BOs to the end of the LRU in the same order we have before, we just don't remove them when they are reserved.

Regards,
Christian.

Am 14.05.19 um 16:31 schrieb Zhou, David(ChunMing):
how to refresh LRU to keep the order align with bo list passed from user space?

you can verify it by some games, performance could be different much between multiple runnings.

-David

-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Zhou, David(ChunMing)" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:dri-devel-PD4FTy7X32mptlylMvRsHA@public.gmane.orgdesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
CC:

[CAUTION: External Email]
Hui? What do you mean with that?

Christian.

Am 14.05.19 um 15:12 schrieb Zhou, David(ChunMing):
my only concern is how to fresh LRU when bo is from bo list.

-David

-------- Original Message --------
Subject: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Olsak, Marek" ,"Zhou, David(ChunMing)" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:dri-devel-PD4FTy7X32mptlylMvRsHA@public.gmane.orgdesktop.org,amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
CC:

[CAUTION: External Email]

This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org><mailto:christian.koenig-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }

        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-                                  &duplicates, true);
+                                  &duplicates, false);
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
--
2.17.1





_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 6282 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
  2019-05-15 14:21                     ` Zhou, David(ChunMing)
@ 2019-05-15 14:22                       ` Koenig, Christian
  2019-05-15 14:27                         ` Zhou, David(ChunMing)
  0 siblings, 1 reply; 30+ messages in thread
From: Koenig, Christian @ 2019-05-15 14:22 UTC (permalink / raw)
  To: Zhou, David(ChunMing), Olsak, Marek, Liang, Prike, dri-devel, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 3453 bytes --]

BO list? No, we stop removing them from the LRU.

But we still move them to the end of the LRU before releasing them.

Christian.

Am 15.05.19 um 16:21 schrieb Zhou, David(ChunMing):
Isn't this patch trying to stop removing for all BOs  from bo list?

-David

-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Zhou, David(ChunMing)" ,"Koenig, Christian" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org>
CC:

[CAUTION: External Email]
That is a good point, but actually not a problem in practice.

See the change to ttm_eu_fence_buffer_objects:
-               ttm_bo_add_to_lru(bo);
+               if (list_empty(&bo->lru))
+                       ttm_bo_add_to_lru(bo);
+               else
+                       ttm_bo_move_to_lru_tail(bo, NULL);

We still move the BOs to the end of the LRU in the same order we have before, we just don't remove them when they are reserved.

Regards,
Christian.

Am 14.05.19 um 16:31 schrieb Zhou, David(ChunMing):
how to refresh LRU to keep the order align with bo list passed from user space?

you can verify it by some games, performance could be different much between multiple runnings.

-David

-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Zhou, David(ChunMing)" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org>
CC:

[CAUTION: External Email]
Hui? What do you mean with that?

Christian.

Am 14.05.19 um 15:12 schrieb Zhou, David(ChunMing):
my only concern is how to fresh LRU when bo is from bo list.

-David

-------- Original Message --------
Subject: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Olsak, Marek" ,"Zhou, David(ChunMing)" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org>
CC:

[CAUTION: External Email]

This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig@amd.com><mailto:christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }

        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-                                  &duplicates, true);
+                                  &duplicates, false);
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
--
2.17.1





_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 6687 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re:[PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
  2019-05-15 14:22                       ` [PATCH " Koenig, Christian
@ 2019-05-15 14:27                         ` Zhou, David(ChunMing)
  0 siblings, 0 replies; 30+ messages in thread
From: Zhou, David(ChunMing) @ 2019-05-15 14:27 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing),
	Olsak, Marek, Liang, Prike,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 3942 bytes --]

Ah, sorry, I missed  "+                      ttm_bo_move_to_lru_tail(bo, NULL);".

Right, moving them to end before releasing is fixing my concern.

Sorry for noise.
-David


-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: "Koenig, Christian"
To: "Zhou, David(ChunMing)" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org
CC:

[CAUTION: External Email]
BO list? No, we stop removing them from the LRU.

But we still move them to the end of the LRU before releasing them.

Christian.

Am 15.05.19 um 16:21 schrieb Zhou, David(ChunMing):
Isn't this patch trying to stop removing for all BOs  from bo list?

-David

-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Zhou, David(ChunMing)" ,"Koenig, Christian" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org>
CC:

[CAUTION: External Email]
That is a good point, but actually not a problem in practice.

See the change to ttm_eu_fence_buffer_objects:
-               ttm_bo_add_to_lru(bo);
+               if (list_empty(&bo->lru))
+                       ttm_bo_add_to_lru(bo);
+               else
+                       ttm_bo_move_to_lru_tail(bo, NULL);

We still move the BOs to the end of the LRU in the same order we have before, we just don't remove them when they are reserved.

Regards,
Christian.

Am 14.05.19 um 16:31 schrieb Zhou, David(ChunMing):
how to refresh LRU to keep the order align with bo list passed from user space?

you can verify it by some games, performance could be different much between multiple runnings.

-David

-------- Original Message --------
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Zhou, David(ChunMing)" ,"Olsak, Marek" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org>
CC:

[CAUTION: External Email]
Hui? What do you mean with that?

Christian.

Am 14.05.19 um 15:12 schrieb Zhou, David(ChunMing):
my only concern is how to fresh LRU when bo is from bo list.

-David

-------- Original Message --------
Subject: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
From: Christian König
To: "Olsak, Marek" ,"Zhou, David(ChunMing)" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org,amd-gfx@lists.freedesktop.org>
CC:

[CAUTION: External Email]

This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig@amd.com><mailto:christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }

        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-                                  &duplicates, true);
+                                  &duplicates, false);
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
--
2.17.1





_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 6818 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* RE: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS
  2019-05-15  7:04           ` Christian König
@ 2019-05-17  8:16             ` Liang, Prike
  0 siblings, 0 replies; 30+ messages in thread
From: Liang, Prike @ 2019-05-17  8:16 UTC (permalink / raw)
  To: Koenig, Christian, Marek Olšák; +Cc: amd-gfx mailing list, dri-devel


[-- Attachment #1.1: Type: text/plain, Size: 5297 bytes --]

Hi Christian,

With the series patch set , amdgpu_vm_validate_pt_bos occasionally evicted amdgpu BOs failed and can’t
find the valid first busy bo . Another problem is that  during the first BOs get lock period will run into deadlock .

/* check if other user occupy memory too long time */
                if (!first_bo || !request_resv || !request_resv->lock.ctx) {
                        if (first_bo)
                                ttm_bo_put(first_bo);
                        return -EBUSY;
                }
                if (first_bo->resv == request_resv) {
                        ttm_bo_put(first_bo);
                        return -EBUSY;
                }
                if (ctx->interruptible)
                        ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
                                                          request_resv->lock.ctx);
                else
                        ret = ww_mutex_lock(&first_bo->resv->lock, request_resv->lock.ctx);
                if (ret) {
                        ttm_bo_put(first_bo);
                        if (ret == -EDEADLK) {
                                ret = -EAGAIN;
                        }

                        return ret;
                }

Thanks
Prike

From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Wednesday, May 15, 2019 3:05 PM
To: Liang, Prike <Prike.Liang@amd.com>; Marek Olšák <maraeo@gmail.com>
Cc: Zhou, David(ChunMing) <David1.Zhou@amd.com>; dri-devel <dri-devel@lists.freedesktop.org>; amd-gfx mailing list <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS

[CAUTION: External Email]
Hi Prike,

no, that can lead to massive problems in a real OOM situation and is not something we can do here.

Christian.

Am 15.05.19 um 04:00 schrieb Liang, Prike:
Hi Christian ,

I just wonder when encounter ENOMEM error during pin amdgpu BOs can we retry validate again as below.
With the following simply patch the Abaqus pinned issue not observed.

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 11cbf63..72a32f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -902,11 +902,15 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                        bo->placements[i].lpfn = lpfn;
                bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
        }
-
+retry:
        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
        if (unlikely(r)) {
-               dev_err(adev->dev, "%p pin failed\n", bo);
-               goto error;
+                if (r == -ENOMEM){
+                        goto retry;
+                } else {
+                       dev_err(adev->dev, "%p pin failed\n", bo);
+                       goto error;
+                }
        }

        bo->pin_count = 1;


Thanks,
Prike

From: Marek Olšák <maraeo@gmail.com><mailto:maraeo@gmail.com>
Sent: Wednesday, May 15, 2019 3:33 AM
To: Christian König <ckoenig.leichtzumerken@gmail.com><mailto:ckoenig.leichtzumerken@gmail.com>
Cc: Zhou, David(ChunMing) <David1.Zhou@amd.com><mailto:David1.Zhou@amd.com>; Liang, Prike <Prike.Liang@amd.com><mailto:Prike.Liang@amd.com>; dri-devel <dri-devel@lists.freedesktop.org><mailto:dri-devel@lists.freedesktop.org>; amd-gfx mailing list <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS

[CAUTION: External Email]
This series fixes the OOM errors. However, if I torture the kernel driver more, I can get it to deadlock and end up with unkillable processes. I can also get an OOM error. I just ran the test 5 times:

AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears & AMD_DEBUG=testgdsmm glxgears

Marek

On Tue, May 14, 2019 at 8:31 AM Christian König <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
This avoids OOM situations when we have lots of threads
submitting at the same time.

Signed-off-by: Christian König <christian.koenig@amd.com<mailto:christian.koenig@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fff558cf385b..f9240a94217b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }

        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-                                  &duplicates, true);
+                                  &duplicates, false);
        if (unlikely(r != 0)) {
                if (r != -ERESTARTSYS)
                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
--
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 17132 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* RE: [PATCH 01/11] drm/ttm: Make LRU removal optional.
       [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (6 preceding siblings ...)
  2019-05-14 12:31   ` [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS Christian König
@ 2019-05-17 14:05   ` Zhou, David(ChunMing)
  7 siblings, 0 replies; 30+ messages in thread
From: Zhou, David(ChunMing) @ 2019-05-17 14:05 UTC (permalink / raw)
  To: Christian König, Olsak, Marek, Liang, Prike,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, May 14, 2019 8:31 PM
> To: Olsak, Marek <Marek.Olsak@amd.com>; Zhou, David(ChunMing)
> <David1.Zhou@amd.com>; Liang, Prike <Prike.Liang@amd.com>; dri-
> devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
> Subject: [PATCH 01/11] drm/ttm: Make LRU removal optional.
> 
> [CAUTION: External Email]
> 
> We are already doing this for DMA-buf imports and also for amdgpu VM BOs
> for quite a while now.
> 
> If this doesn't run into any problems we are probably going to stop removing
> BOs from the LRU altogether.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
[snip]
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 0075eb9a0b52..957ec375a4ba 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -69,7 +69,8 @@ void ttm_eu_backoff_reservation(struct
> ww_acquire_ctx *ticket,
>         list_for_each_entry(entry, list, head) {
>                 struct ttm_buffer_object *bo = entry->bo;
> 
> -               ttm_bo_add_to_lru(bo);
> +               if (list_empty(&bo->lru))
> +                       ttm_bo_add_to_lru(bo);
>                 reservation_object_unlock(bo->resv);
>         }
>         spin_unlock(&glob->lru_lock);
> @@ -93,7 +94,7 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
> 
>  int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
>                            struct list_head *list, bool intr,
> -                          struct list_head *dups)
> +                          struct list_head *dups, bool del_lru)
>  {
>         struct ttm_bo_global *glob;
>         struct ttm_validate_buffer *entry; @@ -172,11 +173,11 @@ int
> ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
>                 list_add(&entry->head, list);
>         }
> 
> -       if (ticket)
> -               ww_acquire_done(ticket);
> -       spin_lock(&glob->lru_lock);
> -       ttm_eu_del_from_lru_locked(list);
> -       spin_unlock(&glob->lru_lock);
> +       if (del_lru) {
> +               spin_lock(&glob->lru_lock);
> +               ttm_eu_del_from_lru_locked(list);
> +               spin_unlock(&glob->lru_lock);
> +       }

Can you make bo to lru tail here when del_lru is false?

Busy iteration in evict_first will try other process Bos first, which could save loop time.

>         return 0;
>  }
>  EXPORT_SYMBOL(ttm_eu_reserve_buffers);
> @@ -203,7 +204,10 @@ void ttm_eu_fence_buffer_objects(struct
> ww_acquire_ctx *ticket,
>                         reservation_object_add_shared_fence(bo->resv, fence);
>                 else
>                         reservation_object_add_excl_fence(bo->resv, fence);
> -               ttm_bo_add_to_lru(bo);
> +               if (list_empty(&bo->lru))
> +                       ttm_bo_add_to_lru(bo);
> +               else
> +                       ttm_bo_move_to_lru_tail(bo, NULL);

If this line is done in above, then we don't need this here.

-David
>                 reservation_object_unlock(bo->resv);
>         }
>         spin_unlock(&glob->lru_lock);
> diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> index 161b80fee492..5cffaa24259f 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> @@ -63,7 +63,7 @@ static int virtio_gpu_object_list_validate(struct
> ww_acquire_ctx *ticket,
>         struct virtio_gpu_object *qobj;
>         int ret;
> 
> -       ret = ttm_eu_reserve_buffers(ticket, head, true, NULL);
> +       ret = ttm_eu_reserve_buffers(ticket, head, true, NULL, true);
>         if (ret != 0)
>                 return ret;
> 
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index a7c30e567f09..d28cbedba0b5 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -465,7 +465,8 @@ vmw_resource_check_buffer(struct ww_acquire_ctx
> *ticket,
>         val_buf->bo = &res->backup->base;
>         val_buf->num_shared = 0;
>         list_add_tail(&val_buf->head, &val_list);
> -       ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL);
> +       ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL,
> +                                    true);
>         if (unlikely(ret != 0))
>                 goto out_no_reserve;
> 
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
> b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
> index 3b396fea40d7..ac435b51f4eb 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
> @@ -165,7 +165,7 @@ vmw_validation_bo_reserve(struct
> vmw_validation_context *ctx,
>                           bool intr)
>  {
>         return ttm_eu_reserve_buffers(&ctx->ticket, &ctx->bo_list, intr,
> -                                     NULL);
> +                                     NULL, true);
>  }
> 
>  /**
> diff --git a/include/drm/ttm/ttm_bo_driver.h
> b/include/drm/ttm/ttm_bo_driver.h index c008346c2401..fc0d995ac90d
> 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -769,7 +769,10 @@ static inline void ttm_bo_unreserve(struct
> ttm_buffer_object *bo)  {
>         if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
>                 spin_lock(&bo->bdev->glob->lru_lock);
> -               ttm_bo_add_to_lru(bo);
> +               if (list_empty(&bo->lru))
> +                       ttm_bo_add_to_lru(bo);
> +               else
> +                       ttm_bo_move_to_lru_tail(bo, NULL);
>                 spin_unlock(&bo->bdev->glob->lru_lock);
>         }
>         reservation_object_unlock(bo->resv);
> diff --git a/include/drm/ttm/ttm_execbuf_util.h
> b/include/drm/ttm/ttm_execbuf_util.h
> index 621615fa7728..7e46cc678e7e 100644
> --- a/include/drm/ttm/ttm_execbuf_util.h
> +++ b/include/drm/ttm/ttm_execbuf_util.h
> @@ -70,6 +70,7 @@ extern void ttm_eu_backoff_reservation(struct
> ww_acquire_ctx *ticket,
>   * @list:    thread private list of ttm_validate_buffer structs.
>   * @intr:    should the wait be interruptible
>   * @dups:    [out] optional list of duplicates.
> + * @del_lru: true if BOs should be removed from the LRU.
>   *
>   * Tries to reserve bos pointed to by the list entries for validation.
>   * If the function returns 0, all buffers are marked as "unfenced", @@ -98,7
> +99,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx
> *ticket,
> 
>  extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
>                                   struct list_head *list, bool intr,
> -                                 struct list_head *dups);
> +                                 struct list_head *dups, bool del_lru);
> 
>  /**
>   * function ttm_eu_fence_buffer_objects.
> --
> 2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2019-05-17 14:05 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-14 12:31 [PATCH 01/11] drm/ttm: Make LRU removal optional Christian König
2019-05-14 12:31 ` [PATCH 02/11] drm/ttm: fix busy memory to fail other user v8 Christian König
     [not found]   ` <20190514123127.1650-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-05-15  8:38     ` Daniel Vetter
2019-05-15  8:45       ` Daniel Vetter
2019-05-15  9:27         ` Christian König
     [not found]           ` <6f862969-3937-df25-949f-9740a90dd457-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-05-15  9:28             ` Christian König
     [not found]         ` <20190515084551.GD17751-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2019-05-15  9:30           ` Christian König
2019-05-14 12:31 ` [PATCH 07/11] drm/ttm: immediately move BOs to the new LRU Christian König
2019-05-14 12:31 ` [PATCH 10/11] drm/amd/display: use ttm_eu_reserve_buffers instead of amdgpu_bo_reserve v2 Christian König
     [not found] ` <20190514123127.1650-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-05-14 12:31   ` [PATCH 03/11] drm/ttm: remove the backing store if no placement is given Christian König
2019-05-14 12:31   ` [PATCH 04/11] drm/ttm: return immediately in case of a signal Christian König
2019-05-14 12:31   ` [PATCH 05/11] drm/ttm: remove manual placement preference Christian König
2019-05-14 12:31   ` [PATCH 06/11] drm/ttm: cleanup ttm_bo_mem_space Christian König
2019-05-14 12:31   ` [PATCH 08/11] drm/ttm: put new BOs immediately on the LRU Christian König
2019-05-14 12:31   ` [PATCH 09/11] drm/ttm: convert EDEADLK into EAGAIN Christian König
     [not found]     ` <20190514123127.1650-9-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-05-15  8:40       ` Daniel Vetter
2019-05-15  9:28         ` Christian König
2019-05-14 12:31   ` [PATCH 11/11] drm/amdgpu: stop removing BOs from the LRU during CS Christian König
     [not found]     ` <20190514123127.1650-11-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-05-14 13:12       ` Zhou, David(ChunMing)
2019-05-14 13:47         ` [PATCH " Christian König
     [not found]           ` <f9017911-b08a-1f98-3fc9-98121bbde78a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-05-14 14:31             ` Zhou, David(ChunMing)
     [not found]               ` <-wsx1tz-kxfbz1yns7x33sra134gl11xhlux4lx3izissqr2httt4mb1vleyxgj8i7k6-q6ze8ub3ff8c4o0fxmx7niu76yg4-ybakue-3v14jw-ed5ol8ybh6o9-1ze886-hbstfi448pvq3pwhkj.1557844282594-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2019-05-15 14:16                 ` [PATCH " Christian König
     [not found]                   ` <451e8757-b509-c0f7-eced-6ccedc45117b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-05-15 14:21                     ` Zhou, David(ChunMing)
2019-05-15 14:22                       ` [PATCH " Koenig, Christian
2019-05-15 14:27                         ` Zhou, David(ChunMing)
2019-05-14 19:33     ` [PATCH " Marek Olšák
2019-05-15  2:00       ` Liang, Prike
     [not found]         ` <BYAPR12MB35256D8A0583B5DD019C2925FB090-ZGDeBxoHBPmbr42z19MNgwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-05-15  7:04           ` Christian König
2019-05-17  8:16             ` Liang, Prike
2019-05-17 14:05   ` [PATCH 01/11] drm/ttm: Make LRU removal optional Zhou, David(ChunMing)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.