[PATCH 01/10] drm/radeon: allow move_notify to be called without reservation

dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation
@ 2012-11-12 14:00 Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use Maarten Lankhorst
                   ` (9 more replies)
  0 siblings, 10 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

The few places that care should have those checks instead.
This allow destruction of bo backed memory without a reservation.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/radeon/radeon_gart.c   | 1 -
 drivers/gpu/drm/radeon/radeon_object.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 8690be7..6e24f84 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -1237,7 +1237,6 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
 {
 	struct radeon_bo_va *bo_va;
 
-	BUG_ON(!radeon_bo_is_reserved(bo));
 	list_for_each_entry(bo_va, &bo->va, bo_list) {
 		bo_va->valid = false;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 65c5555..50aa508 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -520,7 +520,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop)
 {
-	BUG_ON(!radeon_bo_is_reserved(bo));
+	BUG_ON(!radeon_bo_is_reserved(bo) && !force_drop);
 
 	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
 		return 0;
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-19 13:26   ` Thomas Hellstrom
  2012-11-12 14:00 ` [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean Maarten Lankhorst
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

move to release_list instead

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 47 +++++++++++++-------------------------------
 1 file changed, 14 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9c48e8f..74d6e7c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -143,12 +143,20 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	BUG_ON(atomic_read(&bo->kref.refcount));
 	BUG_ON(atomic_read(&bo->cpu_writers));
 	BUG_ON(bo->sync_obj != NULL);
-	BUG_ON(bo->mem.mm_node != NULL);
 	BUG_ON(!list_empty(&bo->lru));
 	BUG_ON(!list_empty(&bo->ddestroy));
 
-	if (bo->ttm)
+	if (bo->bdev->driver->move_notify)
+		bo->bdev->driver->move_notify(bo, NULL);
+
+	if (bo->ttm) {
+		ttm_tt_unbind(bo->ttm);
 		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
+	}
+	ttm_bo_mem_put(bo, &bo->mem);
+	BUG_ON(bo->mem.mm_node != NULL);
+
 	atomic_dec(&bo->glob->bo_count);
 	if (bo->destroy)
 		bo->destroy(bo);
@@ -466,35 +474,6 @@ out_err:
 	return ret;
 }
 
-/**
- * Call bo::reserved.
- * Will release GPU memory type usage on destruction.
- * This is the place to put in driver specific hooks to release
- * driver private resources.
- * Will release the bo::reserved lock.
- */
-
-static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
-{
-	if (bo->bdev->driver->move_notify)
-		bo->bdev->driver->move_notify(bo, NULL);
-
-	if (bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
-		ttm_tt_destroy(bo->ttm);
-		bo->ttm = NULL;
-	}
-	ttm_bo_mem_put(bo, &bo->mem);
-
-	atomic_set(&bo->reserved, 0);
-
-	/*
-	 * Make processes trying to reserve really pick it up.
-	 */
-	smp_mb__after_atomic_dec();
-	wake_up_all(&bo->event_queue);
-}
-
 static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -523,8 +502,9 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 		spin_unlock(&bdev->fence_lock);
 		put_count = ttm_bo_del_from_lru(bo);
 
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
 		spin_unlock(&glob->lru_lock);
-		ttm_bo_cleanup_memtype_use(bo);
 
 		ttm_bo_list_ref_sub(bo, put_count, true);
 
@@ -619,8 +599,9 @@ retry_reserve:
 	list_del_init(&bo->ddestroy);
 	++put_count;
 
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
 	spin_unlock(&glob->lru_lock);
-	ttm_bo_cleanup_memtype_use(bo);
 
 	ttm_bo_list_ref_sub(bo, put_count, true);
 
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-19 13:33   ` Thomas Hellstrom
  2012-11-12 14:00 ` [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3 Maarten Lankhorst
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

Just use the return error from ttm_mem_evict_first instead.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 74d6e7c..a3383a7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1302,29 +1302,18 @@ EXPORT_SYMBOL(ttm_bo_create);
 static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 					unsigned mem_type, bool allow_errors)
 {
-	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
-	struct ttm_bo_global *glob = bdev->glob;
-	int ret;
-
-	/*
-	 * Can't use standard list traversal since we're unlocking.
-	 */
+	int ret = 0;
 
-	spin_lock(&glob->lru_lock);
-	while (!list_empty(&man->lru)) {
-		spin_unlock(&glob->lru_lock);
+	while (!ret) {
 		ret = ttm_mem_evict_first(bdev, mem_type, false, false, false);
-		if (ret) {
-			if (allow_errors) {
-				return ret;
-			} else {
-				pr_err("Cleanup eviction failed\n");
-			}
+		if (ret == -EBUSY)
+			return 0;
+		else if (ret && !allow_errors) {
+			pr_err("Cleanup eviction failed\n");
+			ret = 0;
 		}
-		spin_lock(&glob->lru_lock);
 	}
-	spin_unlock(&glob->lru_lock);
-	return 0;
+	return ret;
 }
 
 int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-19 14:17   ` Thomas Hellstrom
  2012-11-12 14:00 ` [PATCH 05/10] drm/ttm: add sense to ttm_bo_cleanup_refs, v4 Maarten Lankhorst
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

I changed the hierarchy to make fence_lock the most inner lock,
instead of outer lock. This will simplify things slightly, and
hopefully makes it easier to make fence_lock global at one point
should it be needed.

To make things clearer, I change the order around in ttm_bo_cleanup_refs
and ttm_bo_cleanup_refs_or_queue.

A reservation is taken first, then fence lock is taken and a wait is attempted.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>

v2:
 - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs
v3:
 - change removal of fence_lock to making it a inner lock instead
---
 drivers/gpu/drm/ttm/ttm_bo.c           | 95 ++++++++++++++++------------------
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |  4 +-
 2 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a3383a7..70285ff 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bo->glob;
-	struct ttm_bo_driver *driver;
+	struct ttm_bo_driver *driver = bdev->driver;
 	void *sync_obj = NULL;
 	int put_count;
 	int ret;
 
-	spin_lock(&bdev->fence_lock);
-	(void) ttm_bo_wait(bo, false, false, true);
-	if (!bo->sync_obj) {
-
-		spin_lock(&glob->lru_lock);
-
-		/**
-		 * Lock inversion between bo:reserve and bdev::fence_lock here,
-		 * but that's OK, since we're only trylocking.
-		 */
-
-		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+	spin_lock(&glob->lru_lock);
+	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+	if (!ret) {
+		spin_lock(&bdev->fence_lock);
+		ret = ttm_bo_wait(bo, false, false, true);
 
-		if (unlikely(ret == -EBUSY))
+		if (unlikely(ret == -EBUSY)) {
+			sync_obj = driver->sync_obj_ref(bo->sync_obj);
+			spin_unlock(&bdev->fence_lock);
+			atomic_set(&bo->reserved, 0);
+			wake_up_all(&bo->event_queue);
 			goto queue;
-
+		}
 		spin_unlock(&bdev->fence_lock);
+
 		put_count = ttm_bo_del_from_lru(bo);
 
 		atomic_set(&bo->reserved, 0);
@@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 		ttm_bo_list_ref_sub(bo, put_count, true);
 
 		return;
-	} else {
-		spin_lock(&glob->lru_lock);
 	}
 queue:
-	driver = bdev->driver;
-	if (bo->sync_obj)
-		sync_obj = driver->sync_obj_ref(bo->sync_obj);
-
 	kref_get(&bo->list_kref);
 	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
 	spin_unlock(&glob->lru_lock);
-	spin_unlock(&bdev->fence_lock);
 
 	if (sync_obj) {
 		driver->sync_obj_flush(sync_obj);
@@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			       bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
 	struct ttm_bo_global *glob = bo->glob;
 	int put_count;
 	int ret = 0;
+	void *sync_obj;
 
 retry:
-	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bdev->fence_lock);
+	spin_lock(&glob->lru_lock);
 
-	if (unlikely(ret != 0))
-		return ret;
+	ret = ttm_bo_reserve_locked(bo, interruptible,
+				    no_wait_reserve, false, 0);
 
-retry_reserve:
-	spin_lock(&glob->lru_lock);
+	if (unlikely(ret)) {
+		spin_unlock(&glob->lru_lock);
+		return ret;
+	}
 
 	if (unlikely(list_empty(&bo->ddestroy))) {
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
 		spin_unlock(&glob->lru_lock);
 		return 0;
 	}
 
-	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
-
-	if (unlikely(ret == -EBUSY)) {
-		spin_unlock(&glob->lru_lock);
-		if (likely(!no_wait_reserve))
-			ret = ttm_bo_wait_unreserved(bo, interruptible);
-		if (unlikely(ret != 0))
+	spin_lock(&bdev->fence_lock);
+	ret = ttm_bo_wait(bo, false, false, true);
+	if (ret) {
+		if (no_wait_gpu) {
+			spin_unlock(&bdev->fence_lock);
+			atomic_set(&bo->reserved, 0);
+			wake_up_all(&bo->event_queue);
+			spin_unlock(&glob->lru_lock);
 			return ret;
+		}
 
-		goto retry_reserve;
-	}
-
-	BUG_ON(ret != 0);
-
-	/**
-	 * We can re-check for sync object without taking
-	 * the bo::lock since setting the sync object requires
-	 * also bo::reserved. A busy object at this point may
-	 * be caused by another thread recently starting an accelerated
-	 * eviction.
-	 */
+		/**
+		 * Take a reference to the fence and unreserve, if the wait
+		 * was succesful and no new sync_obj was attached,
+		 * ttm_bo_wait in retry will return ret = 0, and end the loop.
+		 */
 
-	if (unlikely(bo->sync_obj)) {
+		sync_obj = driver->sync_obj_ref(&bo->sync_obj);
+		spin_unlock(&bdev->fence_lock);
 		atomic_set(&bo->reserved, 0);
 		wake_up_all(&bo->event_queue);
 		spin_unlock(&glob->lru_lock);
+
+		ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
+		driver->sync_obj_unref(&sync_obj);
+		if (ret)
+			return ret;
 		goto retry;
 	}
+	spin_unlock(&bdev->fence_lock);
 
 	put_count = ttm_bo_del_from_lru(bo);
 	list_del_init(&bo->ddestroy);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 1986d00..cd9e452 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
 	driver = bdev->driver;
 	glob = bo->glob;
 
-	spin_lock(&bdev->fence_lock);
 	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->fence_lock);
 
 	list_for_each_entry(entry, list, head) {
 		bo = entry->bo;
@@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
 		ttm_bo_unreserve_locked(bo);
 		entry->reserved = false;
 	}
-	spin_unlock(&glob->lru_lock);
 	spin_unlock(&bdev->fence_lock);
+	spin_unlock(&glob->lru_lock);
 
 	list_for_each_entry(entry, list, head) {
 		if (entry->old_sync_obj)
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 05/10] drm/ttm: add sense to ttm_bo_cleanup_refs, v4
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
                   ` (2 preceding siblings ...)
  2012-11-12 14:00 ` [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3 Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 06/10] drm/ttm: remove no_wait_reserve, v2 Maarten Lankhorst
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

Require lru_lock and reservation to be held, kill off the loop, no
new sync objects should be attached at this point any more.

v2:
 - moved upwards in patch list and fixed conflicts.
v3:
 - rebase for fence lock, and rename to ttm_bo_cleanup_refs_and_unlock
   for clarity that it unlocks lru.
v4:
 - add WARN_ON(!atomic_read(&bo->kref.refcount)) in reserve to ensure
   that nobody accidentally reserves a dead buffer.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c           | 123 ++++++++++++++++-----------------
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |   1 +
 2 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 70285ff..e6df086 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -296,6 +296,8 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 	int put_count = 0;
 	int ret;
 
+	WARN_ON(!atomic_read(&bo->kref.refcount));
+
 	spin_lock(&glob->lru_lock);
 	ret = ttm_bo_reserve_locked(bo, interruptible, no_wait, use_sequence,
 				    sequence);
@@ -522,82 +524,78 @@ queue:
 }
 
 /**
- * function ttm_bo_cleanup_refs
+ * function ttm_bo_cleanup_refs_and_unlock
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, do nothing.
  *
+ * Must be called with lru_lock and reservation held, this function
+ * will drop both before returning.
+ *
  * @interruptible         Any sleeps should occur interruptibly.
- * @no_wait_reserve       Never wait for reserve. Return -EBUSY instead.
  * @no_wait_gpu           Never wait for gpu. Return -EBUSY instead.
  */
 
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-			       bool interruptible,
-			       bool no_wait_reserve,
-			       bool no_wait_gpu)
+static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
+					  bool interruptible,
+					  bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_driver *driver = bdev->driver;
 	struct ttm_bo_global *glob = bo->glob;
 	int put_count;
 	int ret = 0;
-	void *sync_obj;
-
-retry:
-	spin_lock(&glob->lru_lock);
-
-	ret = ttm_bo_reserve_locked(bo, interruptible,
-				    no_wait_reserve, false, 0);
 
-	if (unlikely(ret)) {
-		spin_unlock(&glob->lru_lock);
-		return ret;
-	}
+	spin_lock(&bdev->fence_lock);
+	ret = ttm_bo_wait(bo, false, false, true);
 
-	if (unlikely(list_empty(&bo->ddestroy))) {
+	if (ret && no_wait_gpu) {
+		spin_unlock(&bdev->fence_lock);
 		atomic_set(&bo->reserved, 0);
 		wake_up_all(&bo->event_queue);
 		spin_unlock(&glob->lru_lock);
-		return 0;
-	}
-
-	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, false, true);
-	if (ret) {
-		if (no_wait_gpu) {
-			spin_unlock(&bdev->fence_lock);
-			atomic_set(&bo->reserved, 0);
-			wake_up_all(&bo->event_queue);
-			spin_unlock(&glob->lru_lock);
-			return ret;
-		}
+		return ret;
+	} else if (ret) {
+		void *sync_obj;
 
 		/**
-		 * Take a reference to the fence and unreserve, if the wait
-		 * was succesful and no new sync_obj was attached,
-		 * ttm_bo_wait in retry will return ret = 0, and end the loop.
+		 * Take a reference to the fence and unreserve,
+		 * at this point the buffer should be dead, so
+		 * no new sync objects can be attached.
 		 */
-
 		sync_obj = driver->sync_obj_ref(&bo->sync_obj);
 		spin_unlock(&bdev->fence_lock);
 		atomic_set(&bo->reserved, 0);
 		wake_up_all(&bo->event_queue);
 		spin_unlock(&glob->lru_lock);
 
-		ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
+		ret = driver->sync_obj_wait(sync_obj, false, interruptible);
 		driver->sync_obj_unref(&sync_obj);
 		if (ret)
 			return ret;
-		goto retry;
+		spin_lock(&glob->lru_lock);
+
+		/* remove sync_obj with ttm_bo_wait */
+		spin_lock(&bdev->fence_lock);
+		ret = ttm_bo_wait(bo, false, false, true);
+		spin_unlock(&bdev->fence_lock);
+
+		WARN_ON(ret);
+
+	} else {
+		spin_unlock(&bdev->fence_lock);
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
+	}
+
+	if (unlikely(list_empty(&bo->ddestroy))) {
+		spin_unlock(&glob->lru_lock);
+		return 0;
 	}
-	spin_unlock(&bdev->fence_lock);
 
 	put_count = ttm_bo_del_from_lru(bo);
 	list_del_init(&bo->ddestroy);
 	++put_count;
 
-	atomic_set(&bo->reserved, 0);
-	wake_up_all(&bo->event_queue);
 	spin_unlock(&glob->lru_lock);
 
 	ttm_bo_list_ref_sub(bo, put_count, true);
@@ -606,8 +604,8 @@ retry:
 }
 
 /**
- * Traverse the delayed list, and call ttm_bo_cleanup_refs on all
- * encountered buffers.
+ * Traverse the delayed list, and call ttm_bo_cleanup_refs_and_unlock
+ * on all encountered buffers.
  */
 
 static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
@@ -633,9 +631,14 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 			kref_get(&nentry->list_kref);
 		}
 
-		spin_unlock(&glob->lru_lock);
-		ret = ttm_bo_cleanup_refs(entry, false, !remove_all,
-					  !remove_all);
+		ret = ttm_bo_reserve_locked(entry, false, !remove_all,
+					    false, 0);
+		if (ret)
+			spin_unlock(&glob->lru_lock);
+		else
+			ret = ttm_bo_cleanup_refs_and_unlock(entry, false,
+							     !remove_all);
+
 		kref_put(&entry->list_kref, ttm_bo_release_list);
 		entry = nentry;
 
@@ -788,15 +791,6 @@ retry:
 	bo = list_first_entry(&man->lru, struct ttm_buffer_object, lru);
 	kref_get(&bo->list_kref);
 
-	if (!list_empty(&bo->ddestroy)) {
-		spin_unlock(&glob->lru_lock);
-		ret = ttm_bo_cleanup_refs(bo, interruptible,
-					  no_wait_reserve, no_wait_gpu);
-		kref_put(&bo->list_kref, ttm_bo_release_list);
-
-		return ret;
-	}
-
 	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
 
 	if (unlikely(ret == -EBUSY)) {
@@ -815,6 +809,13 @@ retry:
 		goto retry;
 	}
 
+	if (!list_empty(&bo->ddestroy)) {
+		ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
+						     no_wait_gpu);
+		kref_put(&bo->list_kref, ttm_bo_release_list);
+		return ret;
+	}
+
 	put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
@@ -1778,14 +1779,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 				      struct ttm_buffer_object, swap);
 		kref_get(&bo->list_kref);
 
-		if (!list_empty(&bo->ddestroy)) {
-			spin_unlock(&glob->lru_lock);
-			(void) ttm_bo_cleanup_refs(bo, false, false, false);
-			kref_put(&bo->list_kref, ttm_bo_release_list);
-			spin_lock(&glob->lru_lock);
-			continue;
-		}
-
 		/**
 		 * Reserve buffer. Since we unlock while sleeping, we need
 		 * to re-check that nobody removed us from the swap-list while
@@ -1801,6 +1794,12 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 		}
 	}
 
+	if (!list_empty(&bo->ddestroy)) {
+		ret = ttm_bo_cleanup_refs_and_unlock(bo, false, false);
+		kref_put(&bo->list_kref, ttm_bo_release_list);
+		return ret;
+	}
+
 	BUG_ON(ret != 0);
 	put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index cd9e452..5490492 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -152,6 +152,7 @@ retry:
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
+		WARN_ON(!atomic_read(&bo->kref.refcount));
 retry_this_bo:
 		ret = ttm_bo_reserve_locked(bo, true, true, true, val_seq);
 		switch (ret) {
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 06/10] drm/ttm: remove no_wait_reserve, v2
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
                   ` (3 preceding siblings ...)
  2012-11-12 14:00 ` [PATCH 05/10] drm/ttm: add sense to ttm_bo_cleanup_refs, v4 Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 07/10] drm/ttm: cope with reserved buffers on swap list in ttm_bo_swapout Maarten Lankhorst
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

All items on the lru list are always reservable, so this is a stupid
thing to keep. Not only that, it is used in a way which would
guarantee deadlocks if it were ever to be set to block on reserve.

This is a lot of churn, but mostly because of the removal of the
argument which can be nested arbitrarily deeply in many places.

Only 1 place changed meaningfully, the rest is just churn that through
an arbitrarily complex chain that can end up calling ttm_mem_evict_first.
The change is simply that ttm_mem_evict_first always assumes
no_wait_reserve is true.

This should work since currently ttm guarantees items on the lru are
always reservable, and reserving items blockingly with some bo held
are enough to cause you to run into a deadlock.

v2:
 - Warn if -EBUSY is returned on reservation, all objects on the list
   should be reservable. Adjusted patch slightly due to conflicts.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ast/ast_ttm.c            | 10 +++---
 drivers/gpu/drm/cirrus/cirrus_ttm.c      | 10 +++---
 drivers/gpu/drm/mgag200/mgag200_ttm.c    | 10 +++---
 drivers/gpu/drm/nouveau/nouveau_bo.c     | 55 ++++++++++++++---------------
 drivers/gpu/drm/nouveau/nouveau_bo.h     |  2 +-
 drivers/gpu/drm/nouveau/nouveau_gem.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_object.c   |  8 ++---
 drivers/gpu/drm/radeon/radeon_ttm.c      | 31 +++++++++--------
 drivers/gpu/drm/ttm/ttm_bo.c             | 60 ++++++++++++++------------------
 drivers/gpu/drm/ttm/ttm_bo_util.c        |  6 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c   | 13 ++++---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c  |  4 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |  5 ++-
 include/drm/ttm/ttm_bo_api.h             |  3 +-
 include/drm/ttm/ttm_bo_driver.h          | 19 ++++------
 15 files changed, 110 insertions(+), 128 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 1a026ac..adcac90 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -186,11 +186,11 @@ static void ast_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *
 
 static int ast_bo_move(struct ttm_buffer_object *bo,
 		       bool evict, bool interruptible,
-		       bool no_wait_reserve, bool no_wait_gpu,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	return r;
 }
 
@@ -383,7 +383,7 @@ int ast_bo_pin(struct ast_bo *bo, u32 pl_flag, u64 *gpu_addr)
 	ast_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -406,7 +406,7 @@ int ast_bo_unpin(struct ast_bo *bo)
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -431,7 +431,7 @@ int ast_bo_push_sysram(struct ast_bo *bo)
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
 		DRM_ERROR("pushing to VRAM failed\n");
 		return ret;
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index bc83f83..b0e0365 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -186,11 +186,11 @@ static void cirrus_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 
 static int cirrus_bo_move(struct ttm_buffer_object *bo,
 		       bool evict, bool interruptible,
-		       bool no_wait_reserve, bool no_wait_gpu,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	return r;
 }
 
@@ -388,7 +388,7 @@ int cirrus_bo_pin(struct cirrus_bo *bo, u32 pl_flag, u64 *gpu_addr)
 	cirrus_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -411,7 +411,7 @@ int cirrus_bo_unpin(struct cirrus_bo *bo)
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -436,7 +436,7 @@ int cirrus_bo_push_sysram(struct cirrus_bo *bo)
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
 		DRM_ERROR("pushing to VRAM failed\n");
 		return ret;
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 1504699..44ee1d8 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -186,11 +186,11 @@ static void mgag200_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_r
 
 static int mgag200_bo_move(struct ttm_buffer_object *bo,
 		       bool evict, bool interruptible,
-		       bool no_wait_reserve, bool no_wait_gpu,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	return r;
 }
 
@@ -382,7 +382,7 @@ int mgag200_bo_pin(struct mgag200_bo *bo, u32 pl_flag, u64 *gpu_addr)
 	mgag200_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -405,7 +405,7 @@ int mgag200_bo_unpin(struct mgag200_bo *bo)
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -430,7 +430,7 @@ int mgag200_bo_push_sysram(struct mgag200_bo *bo)
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
 		DRM_ERROR("pushing to VRAM failed\n");
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 3ee2295..b6f1c1e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -315,7 +315,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype)
 
 	nouveau_bo_placement_set(nvbo, memtype, 0);
 
-	ret = nouveau_bo_validate(nvbo, false, false, false);
+	ret = nouveau_bo_validate(nvbo, false, false);
 	if (ret == 0) {
 		switch (bo->mem.mem_type) {
 		case TTM_PL_VRAM:
@@ -351,7 +351,7 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
 
 	nouveau_bo_placement_set(nvbo, bo->mem.placement, 0);
 
-	ret = nouveau_bo_validate(nvbo, false, false, false);
+	ret = nouveau_bo_validate(nvbo, false, false);
 	if (ret == 0) {
 		switch (bo->mem.mem_type) {
 		case TTM_PL_VRAM:
@@ -392,12 +392,12 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
 
 int
 nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
-		    bool no_wait_reserve, bool no_wait_gpu)
+		    bool no_wait_gpu)
 {
 	int ret;
 
-	ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement, interruptible,
-			      no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
+			      interruptible, no_wait_gpu);
 	if (ret)
 		return ret;
 
@@ -556,8 +556,7 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 			      struct nouveau_bo *nvbo, bool evict,
-			      bool no_wait_reserve, bool no_wait_gpu,
-			      struct ttm_mem_reg *new_mem)
+			      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_fence *fence = NULL;
 	int ret;
@@ -567,7 +566,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 		return ret;
 
 	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, evict,
-					no_wait_reserve, no_wait_gpu, new_mem);
+					no_wait_gpu, new_mem);
 	nouveau_fence_unref(&fence);
 	return ret;
 }
@@ -965,8 +964,7 @@ nouveau_vma_getmap(struct nouveau_channel *chan, struct nouveau_bo *nvbo,
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-		     bool no_wait_reserve, bool no_wait_gpu,
-		     struct ttm_mem_reg *new_mem)
+		     bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_channel *chan = chan = drm->channel;
@@ -995,7 +993,6 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
 	if (ret == 0) {
 		ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
-						    no_wait_reserve,
 						    no_wait_gpu, new_mem);
 	}
 
@@ -1064,8 +1061,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_reserve, bool no_wait_gpu,
-		      struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
 	struct ttm_placement placement;
@@ -1078,7 +1074,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
@@ -1086,11 +1082,11 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, &tmp_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_mem);
 	if (ret)
 		goto out;
 
-	ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
 out:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return ret;
@@ -1098,8 +1094,7 @@ out:
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_reserve, bool no_wait_gpu,
-		      struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
 	struct ttm_placement placement;
@@ -1112,15 +1107,15 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+	ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, new_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_mem);
 	if (ret)
 		goto out;
 
@@ -1195,8 +1190,7 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 
 static int
 nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-		bool no_wait_reserve, bool no_wait_gpu,
-		struct ttm_mem_reg *new_mem)
+		bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
@@ -1220,23 +1214,26 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 
 	/* CPU copy if we have no accelerated method available */
 	if (!drm->ttm.move) {
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 		goto out;
 	}
 
 	/* Hardware assisted copy. */
 	if (new_mem->mem_type == TTM_PL_SYSTEM)
-		ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = nouveau_bo_move_flipd(bo, evict, intr,
+					    no_wait_gpu, new_mem);
 	else if (old_mem->mem_type == TTM_PL_SYSTEM)
-		ret = nouveau_bo_move_flips(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = nouveau_bo_move_flips(bo, evict, intr,
+					    no_wait_gpu, new_mem);
 	else
-		ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = nouveau_bo_move_m2mf(bo, evict, intr,
+					   no_wait_gpu, new_mem);
 
 	if (!ret)
 		goto out;
 
 	/* Fallback to software copy. */
-	ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 
 out:
 	if (nv_device(drm->device)->card_type < NV_50) {
@@ -1343,7 +1340,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	nvbo->placement.fpfn = 0;
 	nvbo->placement.lpfn = mappable;
 	nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_VRAM, 0);
-	return nouveau_bo_validate(nvbo, false, true, false);
+	return nouveau_bo_validate(nvbo, false, false);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 61b8980..689b59b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -76,7 +76,7 @@ u32  nouveau_bo_rd32(struct nouveau_bo *, unsigned index);
 void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
 void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
 int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
-			 bool no_wait_reserve, bool no_wait_gpu);
+			 bool no_wait_gpu);
 
 struct nouveau_vma *
 nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 6d8391d..7b9364b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -434,7 +434,7 @@ validate_list(struct nouveau_channel *chan, struct list_head *list,
 			return ret;
 		}
 
-		ret = nouveau_bo_validate(nvbo, true, false, false);
+		ret = nouveau_bo_validate(nvbo, true, false);
 		if (unlikely(ret)) {
 			if (ret != -ERESTARTSYS)
 				NV_ERROR(drm, "fail ttm_validate\n");
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 50aa508..2986686 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -240,7 +240,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 	}
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		bo->pin_count = 1;
 		if (gpu_addr != NULL)
@@ -269,7 +269,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
 		return 0;
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (unlikely(r != 0))
 		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
 	return r;
@@ -355,7 +355,7 @@ int radeon_bo_list_validate(struct list_head *head)
 		retry:
 			radeon_ttm_placement_from_domain(bo, domain);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement,
-						true, false, false);
+						true, false);
 			if (unlikely(r)) {
 				if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
 					domain |= RADEON_GEM_DOMAIN_GTT;
@@ -575,7 +575,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 			/* hurrah the memory is not visible ! */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
 			rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
-			r = ttm_bo_validate(bo, &rbo->placement, false, true, false);
+			r = ttm_bo_validate(bo, &rbo->placement, false, false);
 			if (unlikely(r != 0))
 				return r;
 			offset = bo->mem.start << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 563c8ed..1d8ff2f 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -216,7 +216,7 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
 }
 
 static int radeon_move_blit(struct ttm_buffer_object *bo,
-			bool evict, int no_wait_reserve, bool no_wait_gpu,
+			bool evict, bool no_wait_gpu,
 			struct ttm_mem_reg *new_mem,
 			struct ttm_mem_reg *old_mem)
 {
@@ -266,14 +266,14 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 			&fence);
 	/* FIXME: handle copy error */
 	r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
-				      evict, no_wait_reserve, no_wait_gpu, new_mem);
+				      evict, no_wait_gpu, new_mem);
 	radeon_fence_unref(&fence);
 	return r;
 }
 
 static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 				bool evict, bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu,
+				bool no_wait_gpu,
 				struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
@@ -294,7 +294,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	placement.busy_placement = &placements;
 	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
-			     interruptible, no_wait_reserve, no_wait_gpu);
+			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
 		return r;
 	}
@@ -308,11 +308,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem, old_mem);
+	r = radeon_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
@@ -320,7 +320,7 @@ out_cleanup:
 
 static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 				bool evict, bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu,
+				bool no_wait_gpu,
 				struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
@@ -340,15 +340,16 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
 	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu);
+	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
+			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
 		return r;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+	r = radeon_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@@ -359,7 +360,7 @@ out_cleanup:
 
 static int radeon_bo_move(struct ttm_buffer_object *bo,
 			bool evict, bool interruptible,
-			bool no_wait_reserve, bool no_wait_gpu,
+			bool no_wait_gpu,
 			struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
@@ -388,18 +389,18 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	if (old_mem->mem_type == TTM_PL_VRAM &&
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
 		r = radeon_move_vram_ram(bo, evict, interruptible,
-					no_wait_reserve, no_wait_gpu, new_mem);
+					no_wait_gpu, new_mem);
 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
 		   new_mem->mem_type == TTM_PL_VRAM) {
 		r = radeon_move_ram_vram(bo, evict, interruptible,
-					    no_wait_reserve, no_wait_gpu, new_mem);
+					    no_wait_gpu, new_mem);
 	} else {
-		r = radeon_move_blit(bo, evict, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+		r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
 	}
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	}
 	return r;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e6df086..1d77ad1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -375,7 +375,7 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *mem,
 				  bool evict, bool interruptible,
-				  bool no_wait_reserve, bool no_wait_gpu)
+				  bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	bool old_is_pci = ttm_mem_reg_is_pci(bdev, &bo->mem);
@@ -429,12 +429,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-		ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, mem);
+		ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem);
 	else if (bdev->driver->move)
 		ret = bdev->driver->move(bo, evict, interruptible,
-					 no_wait_reserve, no_wait_gpu, mem);
+					 no_wait_gpu, mem);
 	else
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, mem);
+		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, mem);
 
 	if (ret) {
 		if (bdev->driver->move_notify) {
@@ -717,7 +717,7 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_bo_device *bdev, int resched)
 EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue);
 
 static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
-			bool no_wait_reserve, bool no_wait_gpu)
+			bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_reg evict_mem;
@@ -748,7 +748,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	placement.num_busy_placement = 0;
 	bdev->driver->evict_flags(bo, &placement);
 	ret = ttm_bo_mem_space(bo, &placement, &evict_mem, interruptible,
-				no_wait_reserve, no_wait_gpu);
+				no_wait_gpu);
 	if (ret) {
 		if (ret != -ERESTARTSYS) {
 			pr_err("Failed to find memory space for buffer 0x%p eviction\n",
@@ -759,7 +759,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	}
 
 	ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, interruptible,
-				     no_wait_reserve, no_wait_gpu);
+				     no_wait_gpu);
 	if (ret) {
 		if (ret != -ERESTARTSYS)
 			pr_err("Buffer eviction failed\n");
@@ -773,7 +773,7 @@ out:
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 				uint32_t mem_type,
-				bool interruptible, bool no_wait_reserve,
+				bool interruptible,
 				bool no_wait_gpu)
 {
 	struct ttm_bo_global *glob = bdev->glob;
@@ -781,7 +781,6 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	struct ttm_buffer_object *bo;
 	int ret, put_count = 0;
 
-retry:
 	spin_lock(&glob->lru_lock);
 	if (list_empty(&man->lru)) {
 		spin_unlock(&glob->lru_lock);
@@ -793,20 +792,11 @@ retry:
 
 	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
 
-	if (unlikely(ret == -EBUSY)) {
+	if (WARN_ON_ONCE(ret == -EBUSY)) {
 		spin_unlock(&glob->lru_lock);
-		if (likely(!no_wait_reserve))
-			ret = ttm_bo_wait_unreserved(bo, interruptible);
 
 		kref_put(&bo->list_kref, ttm_bo_release_list);
-
-		/**
-		 * We *need* to retry after releasing the lru lock.
-		 */
-
-		if (unlikely(ret != 0))
-			return ret;
-		goto retry;
+		return ret;
 	}
 
 	if (!list_empty(&bo->ddestroy)) {
@@ -823,7 +813,7 @@ retry:
 
 	ttm_bo_list_ref_sub(bo, put_count, true);
 
-	ret = ttm_bo_evict(bo, interruptible, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
 	ttm_bo_unreserve(bo);
 
 	kref_put(&bo->list_kref, ttm_bo_release_list);
@@ -848,7 +838,6 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 					struct ttm_placement *placement,
 					struct ttm_mem_reg *mem,
 					bool interruptible,
-					bool no_wait_reserve,
 					bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -861,8 +850,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, interruptible,
-						no_wait_reserve, no_wait_gpu);
+		ret = ttm_mem_evict_first(bdev, mem_type,
+					  interruptible, no_wait_gpu);
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
@@ -927,7 +916,7 @@ static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
 int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
 			struct ttm_mem_reg *mem,
-			bool interruptible, bool no_wait_reserve,
+			bool interruptible,
 			bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -1018,7 +1007,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		}
 
 		ret = ttm_bo_mem_force_space(bo, mem_type, placement, mem,
-						interruptible, no_wait_reserve, no_wait_gpu);
+						interruptible, no_wait_gpu);
 		if (ret == 0 && mem->mm_node) {
 			mem->placement = cur_flags;
 			return 0;
@@ -1033,7 +1022,7 @@ EXPORT_SYMBOL(ttm_bo_mem_space);
 
 int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
-			bool interruptible, bool no_wait_reserve,
+			bool interruptible,
 			bool no_wait_gpu)
 {
 	int ret = 0;
@@ -1060,10 +1049,12 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	/*
 	 * Determine where to move the buffer.
 	 */
-	ret = ttm_bo_mem_space(bo, placement, &mem, interruptible, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_mem_space(bo, placement, &mem,
+			       interruptible, no_wait_gpu);
 	if (ret)
 		goto out_unlock;
-	ret = ttm_bo_handle_move_mem(bo, &mem, false, interruptible, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_handle_move_mem(bo, &mem, false,
+				     interruptible, no_wait_gpu);
 out_unlock:
 	if (ret && mem.mm_node)
 		ttm_bo_mem_put(bo, &mem);
@@ -1092,7 +1083,7 @@ static int ttm_bo_mem_compat(struct ttm_placement *placement,
 
 int ttm_bo_validate(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
-			bool interruptible, bool no_wait_reserve,
+			bool interruptible,
 			bool no_wait_gpu)
 {
 	int ret;
@@ -1108,7 +1099,8 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	 */
 	ret = ttm_bo_mem_compat(placement, &bo->mem);
 	if (ret < 0) {
-		ret = ttm_bo_move_buffer(bo, placement, interruptible, no_wait_reserve, no_wait_gpu);
+		ret = ttm_bo_move_buffer(bo, placement, interruptible,
+					 no_wait_gpu);
 		if (ret)
 			return ret;
 	} else {
@@ -1224,7 +1216,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 			goto out_err;
 	}
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 	if (ret)
 		goto out_err;
 
@@ -1303,7 +1295,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	int ret = 0;
 
 	while (!ret) {
-		ret = ttm_mem_evict_first(bdev, mem_type, false, false, false);
+		ret = ttm_mem_evict_first(bdev, mem_type, false, false);
 		if (ret == -EBUSY)
 			return 0;
 		else if (ret && !allow_errors) {
@@ -1826,7 +1818,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 		evict_mem.mem_type = TTM_PL_SYSTEM;
 
 		ret = ttm_bo_handle_move_mem(bo, &evict_mem, true,
-					     false, false, false);
+					     false, false);
 		if (unlikely(ret != 0))
 			goto out;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index b9c4e51..9e9c5d2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -43,7 +43,7 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 }
 
 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-		    bool evict, bool no_wait_reserve,
+		    bool evict,
 		    bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct ttm_tt *ttm = bo->ttm;
@@ -314,7 +314,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 }
 
 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-		       bool evict, bool no_wait_reserve, bool no_wait_gpu,
+		       bool evict, bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -611,7 +611,7 @@ EXPORT_SYMBOL(ttm_bo_kunmap);
 
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 			      void *sync_obj,
-			      bool evict, bool no_wait_reserve,
+			      bool evict,
 			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index bd78257..2ace48d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -66,7 +66,7 @@ int vmw_dmabuf_to_placement(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 
 	ttm_bo_unreserve(bo);
 
@@ -123,7 +123,7 @@ int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
 	else
 		placement = &vmw_vram_gmr_placement;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 	if (likely(ret == 0) || ret == -ERESTARTSYS)
 		goto err_unreserve;
 
@@ -138,7 +138,7 @@ int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
 	else
 		placement = &vmw_vram_placement;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 
 err_unreserve:
 	ttm_bo_unreserve(bo);
@@ -224,10 +224,9 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv,
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
 	    bo->mem.start < bo->num_pages &&
 	    bo->mem.start > 0)
-		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
-				       false, false);
+		(void) ttm_bo_validate(bo, &vmw_sys_placement, false, false);
 
-	ret = ttm_bo_validate(bo, &placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, &placement, interruptible, false);
 
 	/* For some reason we didn't up at the start of vram */
 	WARN_ON(ret == 0 && bo->offset != 0);
@@ -316,7 +315,7 @@ void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin)
 	placement.num_placement = 1;
 	placement.placement = &pl_flags;
 
-	ret = ttm_bo_validate(bo, &placement, false, true, true);
+	ret = ttm_bo_validate(bo, &placement, false, true);
 
 	BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index e5775a0..546313f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -921,7 +921,7 @@ static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
 	 * used as a GMR, this will return -ENOMEM.
 	 */
 
-	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, true, false, false);
+	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, true, false);
 	if (likely(ret == 0 || ret == -ERESTARTSYS))
 		return ret;
 
@@ -931,7 +931,7 @@ static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
 	 */
 
 	DRM_INFO("Falling through to VRAM.\n");
-	ret = ttm_bo_validate(bo, &vmw_vram_placement, true, false, false);
+	ret = ttm_bo_validate(bo, &vmw_vram_placement, true, false);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 0ca3e1c..abe7c72 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -839,7 +839,7 @@ int vmw_surface_do_validate(struct vmw_private *dev_priv,
 			goto out_no_reserve;
 
 		ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
-				      true, false, false);
+				      true, false);
 		if (unlikely(ret != 0))
 			goto out_no_validate;
 	}
@@ -969,8 +969,7 @@ int vmw_surface_evict(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto out_no_reserve;
 
-	ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
-			      true, false, false);
+	ret = ttm_bo_validate(srf->backup, &vmw_srf_placement, true, false);
 	if (unlikely(ret != 0))
 		goto out_no_validate;
 
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 13e33f0..c22a385 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -340,7 +340,6 @@ extern int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy,
  * @bo: The buffer object.
  * @placement: Proposed placement for the buffer object.
  * @interruptible: Sleep interruptible if sleeping.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  *
  * Changes placement and caching policy of the buffer object
@@ -353,7 +352,7 @@ extern int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy,
  */
 extern int ttm_bo_validate(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement,
-				bool interruptible, bool no_wait_reserve,
+				bool interruptible,
 				bool no_wait_gpu);
 
 /**
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 0c8c3b5..887c3c0 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -394,7 +394,7 @@ struct ttm_bo_driver {
 	 */
 	int (*move) (struct ttm_buffer_object *bo,
 		     bool evict, bool interruptible,
-		     bool no_wait_reserve, bool no_wait_gpu,
+		     bool no_wait_gpu,
 		     struct ttm_mem_reg *new_mem);
 
 	/**
@@ -706,7 +706,6 @@ extern bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev,
  * @proposed_placement: Proposed new placement for the buffer object.
  * @mem: A struct ttm_mem_reg.
  * @interruptible: Sleep interruptible when sliping.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  *
  * Allocate memory space for the buffer object pointed to by @bo, using
@@ -722,7 +721,7 @@ extern int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement,
 				struct ttm_mem_reg *mem,
 				bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu);
+				bool no_wait_gpu);
 
 extern void ttm_bo_mem_put(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *mem);
@@ -904,7 +903,6 @@ extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo,
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @evict: 1: This is an eviction. Don't try to pipeline.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -919,15 +917,14 @@ extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo,
  */
 
 extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-			   bool evict, bool no_wait_reserve,
-			   bool no_wait_gpu, struct ttm_mem_reg *new_mem);
+			   bool evict, bool no_wait_gpu,
+			   struct ttm_mem_reg *new_mem);
 
 /**
  * ttm_bo_move_memcpy
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @evict: 1: This is an eviction. Don't try to pipeline.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -942,8 +939,8 @@ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
  */
 
 extern int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-			      bool evict, bool no_wait_reserve,
-			      bool no_wait_gpu, struct ttm_mem_reg *new_mem);
+			      bool evict, bool no_wait_gpu,
+			      struct ttm_mem_reg *new_mem);
 
 /**
  * ttm_bo_free_old_node
@@ -960,7 +957,6 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo);
  * @bo: A pointer to a struct ttm_buffer_object.
  * @sync_obj: A sync object that signals when moving is complete.
  * @evict: This is an evict move. Don't return until the buffer is idle.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -974,8 +970,7 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo);
 
 extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 				     void *sync_obj,
-				     bool evict, bool no_wait_reserve,
-				     bool no_wait_gpu,
+				     bool evict, bool no_wait_gpu,
 				     struct ttm_mem_reg *new_mem);
 /**
  * ttm_io_prot
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 07/10] drm/ttm: cope with reserved buffers on swap list in ttm_bo_swapout
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
                   ` (4 preceding siblings ...)
  2012-11-12 14:00 ` [PATCH 06/10] drm/ttm: remove no_wait_reserve, v2 Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 08/10] drm/ttm: cope with reserved buffers on lru list in ttm_mem_evict_first Maarten Lankhorst
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1d77ad1..b9c26a5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1761,16 +1761,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
 
 	spin_lock(&glob->lru_lock);
-	while (ret == -EBUSY) {
-		if (unlikely(list_empty(&glob->swap_lru))) {
-			spin_unlock(&glob->lru_lock);
-			return -EBUSY;
-		}
-
-		bo = list_first_entry(&glob->swap_lru,
-				      struct ttm_buffer_object, swap);
-		kref_get(&bo->list_kref);
-
+	list_for_each_entry(bo, &glob->swap_lru, swap) {
 		/**
 		 * Reserve buffer. Since we unlock while sleeping, we need
 		 * to re-check that nobody removed us from the swap-list while
@@ -1778,21 +1769,23 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 		 */
 
 		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
-		if (unlikely(ret == -EBUSY)) {
-			spin_unlock(&glob->lru_lock);
-			ttm_bo_wait_unreserved(bo, false);
-			kref_put(&bo->list_kref, ttm_bo_release_list);
-			spin_lock(&glob->lru_lock);
-		}
+		if (!ret)
+			break;
 	}
 
+	if (ret) {
+		spin_unlock(&glob->lru_lock);
+		return ret;
+	}
+
+	kref_get(&bo->list_kref);
+
 	if (!list_empty(&bo->ddestroy)) {
 		ret = ttm_bo_cleanup_refs_and_unlock(bo, false, false);
 		kref_put(&bo->list_kref, ttm_bo_release_list);
 		return ret;
 	}
 
-	BUG_ON(ret != 0);
 	put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 08/10] drm/ttm: cope with reserved buffers on lru list in ttm_mem_evict_first
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
                   ` (5 preceding siblings ...)
  2012-11-12 14:00 ` [PATCH 07/10] drm/ttm: cope with reserved buffers on swap list in ttm_bo_swapout Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 09/10] drm/ttm: remove lru_lock around ttm_bo_reserve Maarten Lankhorst
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b9c26a5..a760178 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -779,26 +779,21 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_buffer_object *bo;
-	int ret, put_count = 0;
+	int ret = -EBUSY, put_count = 0;
 
 	spin_lock(&glob->lru_lock);
-	if (list_empty(&man->lru)) {
-		spin_unlock(&glob->lru_lock);
-		return -EBUSY;
+	list_for_each_entry(bo, &man->lru, lru) {
+		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+		if (!ret)
+			break;
 	}
 
-	bo = list_first_entry(&man->lru, struct ttm_buffer_object, lru);
-	kref_get(&bo->list_kref);
-
-	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
-
-	if (WARN_ON_ONCE(ret == -EBUSY)) {
+	if (ret) {
 		spin_unlock(&glob->lru_lock);
-
-		kref_put(&bo->list_kref, ttm_bo_release_list);
 		return ret;
 	}
 
+	kref_get(&bo->list_kref);
 	if (!list_empty(&bo->ddestroy)) {
 		ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
 						     no_wait_gpu);
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 09/10] drm/ttm: remove lru_lock around ttm_bo_reserve
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
                   ` (6 preceding siblings ...)
  2012-11-12 14:00 ` [PATCH 08/10] drm/ttm: cope with reserved buffers on lru list in ttm_mem_evict_first Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-12 14:00 ` [PATCH 10/10] drm/ttm: remove reliance on ttm_bo_wait_unreserved Maarten Lankhorst
  2012-11-12 14:03 ` [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
  9 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

There should no longer be assumptions that reserve will always succeed
with the lru lock held, so we can safely break the whole atomic
reserve/lru thing. As a bonus this fixes most lockdep annotations for
reservations.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c           | 50 ++++++++++++++++++++++------------
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |  2 +-
 include/drm/ttm/ttm_bo_driver.h        | 17 ++----------
 3 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a760178..e57dae5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -221,14 +221,13 @@ int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
 	return put_count;
 }
 
-int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
+int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo,
 			  bool interruptible,
 			  bool no_wait, bool use_sequence, uint32_t sequence)
 {
-	struct ttm_bo_global *glob = bo->glob;
 	int ret;
 
-	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
+	while (unlikely(atomic_xchg(&bo->reserved, 1) != 0)) {
 		/**
 		 * Deadlock avoidance for multi-bo reserving.
 		 */
@@ -249,25 +248,36 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 		if (no_wait)
 			return -EBUSY;
 
-		spin_unlock(&glob->lru_lock);
 		ret = ttm_bo_wait_unreserved(bo, interruptible);
-		spin_lock(&glob->lru_lock);
 
 		if (unlikely(ret))
 			return ret;
 	}
 
 	if (use_sequence) {
+		bool wake_up = false;
 		/**
 		 * Wake up waiters that may need to recheck for deadlock,
 		 * if we decreased the sequence number.
 		 */
 		if (unlikely((bo->val_seq - sequence < (1 << 31))
 			     || !bo->seq_valid))
-			wake_up_all(&bo->event_queue);
+			wake_up = true;
 
+		/*
+		 * In the worst case with memory ordering these values can be
+		 * seen in the wrong order. However since we call wake_up_all
+		 * in that case, this will hopefully not pose a problem,
+		 * and the worst case would only cause someone to accidentally
+		 * hit -EAGAIN in ttm_bo_reserve when they see old value of
+		 * val_seq. However this would only happen if seq_valid was
+		 * written before val_seq was, and just means some slightly
+		 * increased cpu usage
+		 */
 		bo->val_seq = sequence;
 		bo->seq_valid = true;
+		if (wake_up)
+			wake_up_all(&bo->event_queue);
 	} else {
 		bo->seq_valid = false;
 	}
@@ -298,14 +308,14 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 
 	WARN_ON(!atomic_read(&bo->kref.refcount));
 
-	spin_lock(&glob->lru_lock);
-	ret = ttm_bo_reserve_locked(bo, interruptible, no_wait, use_sequence,
+	ret = ttm_bo_reserve_nolru(bo, interruptible, no_wait, use_sequence,
 				    sequence);
-	if (likely(ret == 0))
+	if (likely(ret == 0)) {
+		spin_lock(&glob->lru_lock);
 		put_count = ttm_bo_del_from_lru(bo);
-	spin_unlock(&glob->lru_lock);
-
-	ttm_bo_list_ref_sub(bo, put_count, true);
+		spin_unlock(&glob->lru_lock);
+		ttm_bo_list_ref_sub(bo, put_count, true);
+	}
 
 	return ret;
 }
@@ -486,7 +496,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 	int ret;
 
 	spin_lock(&glob->lru_lock);
-	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+	ret = ttm_bo_reserve_nolru(bo, false, true, false, 0);
 	if (!ret) {
 		spin_lock(&bdev->fence_lock);
 		ret = ttm_bo_wait(bo, false, false, true);
@@ -631,8 +641,14 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 			kref_get(&nentry->list_kref);
 		}
 
-		ret = ttm_bo_reserve_locked(entry, false, !remove_all,
-					    false, 0);
+		ret = ttm_bo_reserve_nolru(entry, false, true, false, 0);
+		if (remove_all && ret) {
+			spin_unlock(&glob->lru_lock);
+			ret = ttm_bo_reserve_nolru(entry, false, false,
+						   false, 0);
+			spin_lock(&glob->lru_lock);
+		}
+
 		if (ret)
 			spin_unlock(&glob->lru_lock);
 		else
@@ -783,7 +799,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 
 	spin_lock(&glob->lru_lock);
 	list_for_each_entry(bo, &man->lru, lru) {
-		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+		ret = ttm_bo_reserve_nolru(bo, false, true, false, 0);
 		if (!ret)
 			break;
 	}
@@ -1763,7 +1779,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 		 * we slept.
 		 */
 
-		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+		ret = ttm_bo_reserve_nolru(bo, false, true, false, 0);
 		if (!ret)
 			break;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 5490492..b3fe824 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -154,7 +154,7 @@ retry:
 
 		WARN_ON(!atomic_read(&bo->kref.refcount));
 retry_this_bo:
-		ret = ttm_bo_reserve_locked(bo, true, true, true, val_seq);
+		ret = ttm_bo_reserve_nolru(bo, true, true, true, val_seq);
 		switch (ret) {
 		case 0:
 			break;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 887c3c0..e9cdae1 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -793,15 +793,6 @@ extern void ttm_mem_io_unlock(struct ttm_mem_type_manager *man);
  * to make room for a buffer already reserved. (Buffers are reserved before
  * they are evicted). The following algorithm prevents such deadlocks from
  * occurring:
- * 1) Buffers are reserved with the lru spinlock held. Upon successful
- * reservation they are removed from the lru list. This stops a reserved buffer
- * from being evicted. However the lru spinlock is released between the time
- * a buffer is selected for eviction and the time it is reserved.
- * Therefore a check is made when a buffer is reserved for eviction, that it
- * is still the first buffer in the lru list, before it is removed from the
- * list. @check_lru == 1 forces this check. If it fails, the function returns
- * -EINVAL, and the caller should then choose a new buffer to evict and repeat
- * the procedure.
  * 2) Processes attempting to reserve multiple buffers other than for eviction,
  * (typically execbuf), should first obtain a unique 32-bit
  * validation sequence number,
@@ -835,7 +826,7 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
 
 
 /**
- * ttm_bo_reserve_locked:
+ * ttm_bo_reserve_nolru:
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @interruptible: Sleep interruptible if waiting.
@@ -843,9 +834,7 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
  * @use_sequence: If @bo is already reserved, Only sleep waiting for
  * it to become unreserved if @sequence < (@bo)->sequence.
  *
- * Must be called with struct ttm_bo_global::lru_lock held,
- * and will not remove reserved buffers from the lru lists.
- * The function may release the LRU spinlock if it needs to sleep.
+ * Will not remove reserved buffers from the lru lists.
  * Otherwise identical to ttm_bo_reserve.
  *
  * Returns:
@@ -858,7 +847,7 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
  * -EDEADLK: Bo already reserved using @sequence. This error code will only
  * be returned if @use_sequence is set to true.
  */
-extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
+extern int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo,
 				 bool interruptible,
 				 bool no_wait, bool use_sequence,
 				 uint32_t sequence);
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 10/10] drm/ttm: remove reliance on ttm_bo_wait_unreserved
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
                   ` (7 preceding siblings ...)
  2012-11-12 14:00 ` [PATCH 09/10] drm/ttm: remove lru_lock around ttm_bo_reserve Maarten Lankhorst
@ 2012-11-12 14:00 ` Maarten Lankhorst
  2012-11-12 14:03 ` [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
  9 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:00 UTC (permalink / raw)
  To: dri-devel

Slightly makes things more complicated, but instead of testing for
unreserved and starting over, try to block and acquire reservation
first, then start over.

This maps a lot better to a blocking acquire operation.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/nouveau/nouveau_gem.c  | 19 +++++++++---
 drivers/gpu/drm/ttm/ttm_bo.c           | 33 +++++++++++++++++++--
 drivers/gpu/drm/ttm/ttm_execbuf_util.c | 53 ++++++++++++++++++++--------------
 include/drm/ttm/ttm_bo_driver.h        | 24 +++++++--------
 4 files changed, 89 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 7b9364b..6f58604 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -321,6 +321,7 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
 	uint32_t sequence;
 	int trycnt = 0;
 	int ret, i;
+	struct nouveau_bo *res_bo = NULL;
 
 	sequence = atomic_add_return(1, &drm->ttm.validate_sequence);
 retry:
@@ -341,6 +342,11 @@ retry:
 			return -ENOENT;
 		}
 		nvbo = gem->driver_private;
+		if (nvbo == res_bo) {
+			res_bo = NULL;
+			drm_gem_object_unreference_unlocked(gem);
+			continue;
+		}
 
 		if (nvbo->reserved_by && nvbo->reserved_by == file_priv) {
 			NV_ERROR(drm, "multiple instances of buffer %d on "
@@ -353,15 +359,18 @@ retry:
 		ret = ttm_bo_reserve(&nvbo->bo, true, false, true, sequence);
 		if (ret) {
 			validate_fini(op, NULL);
-			if (unlikely(ret == -EAGAIN))
-				ret = ttm_bo_wait_unreserved(&nvbo->bo, true);
-			drm_gem_object_unreference_unlocked(gem);
+			if (unlikely(ret == -EAGAIN)) {
+				ret = ttm_bo_reserve_slowpath(&nvbo->bo, true,
+							      sequence);
+				if (!ret)
+					res_bo = nvbo;
+			}
 			if (unlikely(ret)) {
+				drm_gem_object_unreference_unlocked(gem);
 				if (ret != -ERESTARTSYS)
 					NV_ERROR(drm, "fail reserve\n");
 				return ret;
 			}
-			goto retry;
 		}
 
 		b->user_priv = (uint64_t)(unsigned long)nvbo;
@@ -383,6 +392,8 @@ retry:
 			validate_fini(op, NULL);
 			return -EINVAL;
 		}
+		if (nvbo == res_bo)
+			goto retry;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e57dae5..bc90f6b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -166,7 +166,8 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
 }
 
-int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible)
+static int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo,
+				  bool interruptible)
 {
 	if (interruptible) {
 		return wait_event_interruptible(bo->event_queue,
@@ -176,7 +177,6 @@ int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(ttm_bo_wait_unreserved);
 
 void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 {
@@ -320,6 +320,35 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 	return ret;
 }
 
+int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
+			    bool interruptible, uint32_t sequence)
+{
+	struct ttm_bo_global *glob = bo->glob;
+	int put_count = 0;
+	int ret;
+
+	WARN_ON(!list_empty_careful(&bo->ddestroy));
+
+	ret = ttm_bo_reserve_nolru(bo, interruptible, false, false, 0);
+	if (likely(ret == 0)) {
+		/**
+		 * Wake up waiters that may need to recheck for deadlock,
+		 * since we unset seq_valid in ttm_bo_reserve_nolru
+		 */
+		bo->val_seq = sequence;
+		bo->seq_valid = true;
+		wake_up_all(&bo->event_queue);
+
+		spin_lock(&glob->lru_lock);
+		put_count = ttm_bo_del_from_lru(bo);
+		spin_unlock(&glob->lru_lock);
+		ttm_bo_list_ref_sub(bo, put_count, true);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_reserve_slowpath);
+
 void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo)
 {
 	ttm_bo_add_to_lru(bo);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index b3fe824..de1504f 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -82,22 +82,6 @@ static void ttm_eu_list_ref_sub(struct list_head *list)
 	}
 }
 
-static int ttm_eu_wait_unreserved_locked(struct list_head *list,
-					 struct ttm_buffer_object *bo)
-{
-	struct ttm_bo_global *glob = bo->glob;
-	int ret;
-
-	ttm_eu_del_from_lru_locked(list);
-	spin_unlock(&glob->lru_lock);
-	ret = ttm_bo_wait_unreserved(bo, true);
-	spin_lock(&glob->lru_lock);
-	if (unlikely(ret != 0))
-		ttm_eu_backoff_reservation_locked(list);
-	return ret;
-}
-
-
 void ttm_eu_backoff_reservation(struct list_head *list)
 {
 	struct ttm_validate_buffer *entry;
@@ -145,34 +129,59 @@ int ttm_eu_reserve_buffers(struct list_head *list)
 	entry = list_first_entry(list, struct ttm_validate_buffer, head);
 	glob = entry->bo->glob;
 
-retry:
 	spin_lock(&glob->lru_lock);
 	val_seq = entry->bo->bdev->val_seq++;
 
+retry:
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
+		/* already slowpath reserved? */
+		if (entry->reserved)
+			continue;
+
 		WARN_ON(!atomic_read(&bo->kref.refcount));
-retry_this_bo:
+
 		ret = ttm_bo_reserve_nolru(bo, true, true, true, val_seq);
 		switch (ret) {
 		case 0:
 			break;
 		case -EBUSY:
-			ret = ttm_eu_wait_unreserved_locked(list, bo);
-			if (unlikely(ret != 0)) {
+			ttm_eu_del_from_lru_locked(list);
+			spin_unlock(&glob->lru_lock);
+			ret = ttm_bo_reserve_nolru(bo, true, false,
+						   true, val_seq);
+			spin_lock(&glob->lru_lock);
+			if (!ret)
+				break;
+
+			if (ret != -EAGAIN) {
+				ttm_eu_backoff_reservation_locked(list);
 				spin_unlock(&glob->lru_lock);
 				ttm_eu_list_ref_sub(list);
 				return ret;
 			}
-			goto retry_this_bo;
+
+			/* fallthrough */
 		case -EAGAIN:
+			/* uh oh, we lost out, drop every reservation and try
+			 * to only reserve this buffer, then start over if
+			 * this succeeds.
+			 */
 			ttm_eu_backoff_reservation_locked(list);
 			spin_unlock(&glob->lru_lock);
 			ttm_eu_list_ref_sub(list);
-			ret = ttm_bo_wait_unreserved(bo, true);
+			ret = ttm_bo_reserve_slowpath(bo, true, val_seq);
 			if (unlikely(ret != 0))
 				return ret;
+			entry->removed = entry->reserved = true;
+			spin_lock(&glob->lru_lock);
+			if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
+				ttm_eu_backoff_reservation_locked(list);
+				spin_unlock(&glob->lru_lock);
+				ttm_eu_list_ref_sub(list);
+				return -EBUSY;
+			}
 			goto retry;
 		default:
 			ttm_eu_backoff_reservation_locked(list);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e9cdae1..26af40c 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -824,6 +824,18 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
 			  bool interruptible,
 			  bool no_wait, bool use_sequence, uint32_t sequence);
 
+/**
+ * ttm_bo_reserve_slowpath:
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @interruptible: Sleep interruptible if waiting.
+ * @sequence: Set (@bo)->sequence to this value after lock
+ *
+ * This is called after ttm_bo_reserve returns -EAGAIN and we backed off
+ * from all our other reservations. Because there are no other reservations
+ * held by us, this function cannot deadlock any more.
+ */
+extern int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
+				   bool interruptible, uint32_t sequence);
 
 /**
  * ttm_bo_reserve_nolru:
@@ -871,18 +883,6 @@ extern void ttm_bo_unreserve(struct ttm_buffer_object *bo);
  */
 extern void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo);
 
-/**
- * ttm_bo_wait_unreserved
- *
- * @bo: A pointer to a struct ttm_buffer_object.
- *
- * Wait for a struct ttm_buffer_object to become unreserved.
- * This is typically used in the execbuf code to relax cpu-usage when
- * a potential deadlock condition backoff.
- */
-extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo,
-				  bool interruptible);
-
 /*
  * ttm_bo_util.c
  */
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation
  2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
                   ` (8 preceding siblings ...)
  2012-11-12 14:00 ` [PATCH 10/10] drm/ttm: remove reliance on ttm_bo_wait_unreserved Maarten Lankhorst
@ 2012-11-12 14:03 ` Maarten Lankhorst
  9 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-12 14:03 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: dri-devel

Op 12-11-12 15:00, Maarten Lankhorst schreef:
> The few places that care should have those checks instead.
> This allow destruction of bo backed memory without a reservation.
Forgot to add, this patch series depends on the previous sync_obj_arg removal patches, and cpu_writers -EBUSY change.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use
  2012-11-12 14:00 ` [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use Maarten Lankhorst
@ 2012-11-19 13:26   ` Thomas Hellstrom
  2012-11-19 14:03     ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-19 13:26 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: dri-devel

Hi,

On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
> move to release_list instead

Can you describe why this change is made? cleanup? reorder locks in a 
later patch?
Also please describe why you need move_notify and ttm unbind / destroy 
to be outside of
reservation, because that's the main change in this patch and it's not 
even mentioned in the
commit message.

Thanks,
Thomas


>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 47 +++++++++++++-------------------------------
>   1 file changed, 14 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 9c48e8f..74d6e7c 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -143,12 +143,20 @@ static void ttm_bo_release_list(struct kref *list_kref)
>   	BUG_ON(atomic_read(&bo->kref.refcount));
>   	BUG_ON(atomic_read(&bo->cpu_writers));
>   	BUG_ON(bo->sync_obj != NULL);
> -	BUG_ON(bo->mem.mm_node != NULL);
>   	BUG_ON(!list_empty(&bo->lru));
>   	BUG_ON(!list_empty(&bo->ddestroy));
>   
> -	if (bo->ttm)
> +	if (bo->bdev->driver->move_notify)
> +		bo->bdev->driver->move_notify(bo, NULL);
> +
> +	if (bo->ttm) {
> +		ttm_tt_unbind(bo->ttm);
>   		ttm_tt_destroy(bo->ttm);
> +		bo->ttm = NULL;
> +	}
> +	ttm_bo_mem_put(bo, &bo->mem);
> +	BUG_ON(bo->mem.mm_node != NULL);
> +
>   	atomic_dec(&bo->glob->bo_count);
>   	if (bo->destroy)
>   		bo->destroy(bo);
> @@ -466,35 +474,6 @@ out_err:
>   	return ret;
>   }
>   
> -/**
> - * Call bo::reserved.
> - * Will release GPU memory type usage on destruction.
> - * This is the place to put in driver specific hooks to release
> - * driver private resources.
> - * Will release the bo::reserved lock.
> - */
> -
> -static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
> -{
> -	if (bo->bdev->driver->move_notify)
> -		bo->bdev->driver->move_notify(bo, NULL);
> -
> -	if (bo->ttm) {
> -		ttm_tt_unbind(bo->ttm);
> -		ttm_tt_destroy(bo->ttm);
> -		bo->ttm = NULL;
> -	}
> -	ttm_bo_mem_put(bo, &bo->mem);
> -
> -	atomic_set(&bo->reserved, 0);
> -
> -	/*
> -	 * Make processes trying to reserve really pick it up.
> -	 */
> -	smp_mb__after_atomic_dec();
> -	wake_up_all(&bo->event_queue);
> -}
> -
>   static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   {
>   	struct ttm_bo_device *bdev = bo->bdev;
> @@ -523,8 +502,9 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   		spin_unlock(&bdev->fence_lock);
>   		put_count = ttm_bo_del_from_lru(bo);
>   
> +		atomic_set(&bo->reserved, 0);
> +		wake_up_all(&bo->event_queue);
>   		spin_unlock(&glob->lru_lock);
> -		ttm_bo_cleanup_memtype_use(bo);
>   
>   		ttm_bo_list_ref_sub(bo, put_count, true);
>   
> @@ -619,8 +599,9 @@ retry_reserve:
>   	list_del_init(&bo->ddestroy);
>   	++put_count;
>   
> +	atomic_set(&bo->reserved, 0);
> +	wake_up_all(&bo->event_queue);
>   	spin_unlock(&glob->lru_lock);
> -	ttm_bo_cleanup_memtype_use(bo);
>   
>   	ttm_bo_list_ref_sub(bo, put_count, true);
>   

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean
  2012-11-12 14:00 ` [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean Maarten Lankhorst
@ 2012-11-19 13:33   ` Thomas Hellstrom
  2012-11-19 14:10     ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-19 13:33 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: dri-devel

On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
> Just use the return error from ttm_mem_evict_first instead.

Here driver need to be able to evict a memory type completely, because 
they might shut down
the memory type or clear it for some legacy usage, suspending or 
whatever, so returning 0 on -EBUSY isn't sufficient,
we need at least a list empty check, and a shared reservation at this 
point is illegal.

This is a point where the mechanism to exclude other reservers is 
needed, as we discussed previously.
vmwgfx is using the ttm lock, but will of course adapt if a new 
mechanism is emerging.

Thanks,
/Thomas


>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 27 ++++++++-------------------
>   1 file changed, 8 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 74d6e7c..a3383a7 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1302,29 +1302,18 @@ EXPORT_SYMBOL(ttm_bo_create);
>   static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>   					unsigned mem_type, bool allow_errors)
>   {
> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> -	struct ttm_bo_global *glob = bdev->glob;
> -	int ret;
> -
> -	/*
> -	 * Can't use standard list traversal since we're unlocking.
> -	 */
> +	int ret = 0;
>   
> -	spin_lock(&glob->lru_lock);
> -	while (!list_empty(&man->lru)) {
> -		spin_unlock(&glob->lru_lock);
> +	while (!ret) {
>   		ret = ttm_mem_evict_first(bdev, mem_type, false, false, false);
> -		if (ret) {
> -			if (allow_errors) {
> -				return ret;
> -			} else {
> -				pr_err("Cleanup eviction failed\n");
> -			}
> +		if (ret == -EBUSY)
> +			return 0;
> +		else if (ret && !allow_errors) {
> +			pr_err("Cleanup eviction failed\n");
> +			ret = 0;
>   		}
> -		spin_lock(&glob->lru_lock);
>   	}
> -	spin_unlock(&glob->lru_lock);
> -	return 0;
> +	return ret;
>   }
>   
>   int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use
  2012-11-19 13:26   ` Thomas Hellstrom
@ 2012-11-19 14:03     ` Maarten Lankhorst
  2012-11-19 14:12       ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-19 14:03 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: dri-devel

Op 19-11-12 14:26, Thomas Hellstrom schreef:
> Hi,
>
> On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
>> move to release_list instead
>
> Can you describe why this change is made? cleanup? reorder locks in a later patch?
> Also please describe why you need move_notify and ttm unbind / destroy to be outside of
> reservation, because that's the main change in this patch and it's not even mentioned in the
> commit message.
Ok is a reword enough? In that case I'll resend.

I moved all the destruction to happen when release_list refcount drops to 0.
This removes the special handling of ttm_bo_cleanup_memtype_use, and
makes it part of the normal bo destruction instead.

It also meant that move_notify and unbind/destroy was without reservation, simply
because it was done during normal destruction instead. At that point you may no longer
hold a reservation, but you can already be sure you're the only one touching it.

It is optional and I can drop this patch if the behavior change is unwanted.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean
  2012-11-19 13:33   ` Thomas Hellstrom
@ 2012-11-19 14:10     ` Maarten Lankhorst
  2012-11-20  7:42       ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-19 14:10 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: dri-devel

Op 19-11-12 14:33, Thomas Hellstrom schreef:
> On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
>> Just use the return error from ttm_mem_evict_first instead.
>
> Here driver need to be able to evict a memory type completely, because they might shut down
> the memory type or clear it for some legacy usage, suspending or whatever, so returning 0 on -EBUSY isn't sufficient,
> we need at least a list empty check, and a shared reservation at this point is illegal.
>
> This is a point where the mechanism to exclude other reservers is needed, as we discussed previously.
> vmwgfx is using the ttm lock, but will of course adapt if a new mechanism is emerging.
Normally ttm_mem_evict_first only returns -EBUSY if the list is empty and no_wait = false,
so I thought using the return code would be equivalent.

We could do spin_lock(&glob->lru_lock); WARN_ON(!list_empty(&man->lru_lock)); spin_unlock(&glob->lru_lock); to handle this after -EBUSY.

With a lot of objects on the lru list, this would save taking lru_lock twice for each object.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use
  2012-11-19 14:03     ` Maarten Lankhorst
@ 2012-11-19 14:12       ` Thomas Hellstrom
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-19 14:12 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: dri-devel

On 11/19/2012 03:03 PM, Maarten Lankhorst wrote:
> Op 19-11-12 14:26, Thomas Hellstrom schreef:
>> Hi,
>>
>> On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
>>> move to release_list instead
>> Can you describe why this change is made? cleanup? reorder locks in a later patch?
>> Also please describe why you need move_notify and ttm unbind / destroy to be outside of
>> reservation, because that's the main change in this patch and it's not even mentioned in the
>> commit message.
> Ok is a reword enough? In that case I'll resend.
>
> I moved all the destruction to happen when release_list refcount drops to 0.
> This removes the special handling of ttm_bo_cleanup_memtype_use, and
> makes it part of the normal bo destruction instead.
>
> It also meant that move_notify and unbind/destroy was without reservation, simply
> because it was done during normal destruction instead. At that point you may no longer
> hold a reservation, but you can already be sure you're the only one touching it.

That's true, but even if we are the only users it would be good to keep 
reservation
when calling move_notify and the unbind stuff, simply because drivers 
can easily detect
reservation bugs if these functions are always called reserved.

if we kan keep the reservation here without too much trouble, I'd like 
us to do so.

Thanks,
Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-12 14:00 ` [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3 Maarten Lankhorst
@ 2012-11-19 14:17   ` Thomas Hellstrom
  2012-11-19 15:04     ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-19 14:17 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: dri-devel

Hi,

This patch looks mostly good, although I think ttm_bo_cleanup_refs 
becomes overly complicated:
Could this do, or am I missing something?


static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
                    bool interruptible,
                    bool no_wait_reserve,
                    bool no_wait_gpu)
{
     struct ttm_bo_device *bdev = bo->bdev;
     struct ttm_bo_global *glob = bo->glob;
     int put_count;
     int ret = 0;

     /*
      * First, reserve while making sure we're still on the
      * ddestroy list.
      */
retry_reserve:
     spin_lock(&glob->lru_lock);

     if (unlikely(list_empty(&bo->ddestroy))) {
         spin_unlock(&glob->lru_lock);
         return 0;
     }

     ret = ttm_bo_reserve_locked(bo, false, true, false, 0);

     if (unlikely(ret == -EBUSY)) {
         spin_unlock(&glob->lru_lock);
         if (likely(!no_wait_reserve))
             ret = ttm_bo_wait_unreserved(bo, interruptible);
         if (unlikely(ret != 0))
             return ret;

         goto retry_reserve;
     }

     BUG_ON(ret != 0);

     spin_lock(&bdev->fence_lock);
     ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
     spin_unlock(&bdev->fence_lock);

     if (unlikely(ret != 0)) {
         atomic_set(&bo->reserved, 0);
         wake_up_all(&bo->event_queue);
         spin_unlock(&glob->lru_lock);
         return ret;
     }

     put_count = ttm_bo_del_from_lru(bo);
     list_del_init(&bo->ddestroy);
     ++put_count;

     spin_unlock(&glob->lru_lock);
     ttm_bo_cleanup_memtype_use(bo);

     atomic_set(&bo_reserved, 0);
     wake_up_all(&bo->event_queue);
     ttm_bo_list_ref_sub(bo, put_count, true);

     return 0;
}


On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
> I changed the hierarchy to make fence_lock the most inner lock,
> instead of outer lock. This will simplify things slightly, and
> hopefully makes it easier to make fence_lock global at one point
> should it be needed.
>
> To make things clearer, I change the order around in ttm_bo_cleanup_refs
> and ttm_bo_cleanup_refs_or_queue.
>
> A reservation is taken first, then fence lock is taken and a wait is attempted.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>
> v2:
>   - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs
> v3:
>   - change removal of fence_lock to making it a inner lock instead
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c           | 95 ++++++++++++++++------------------
>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  4 +-
>   2 files changed, 48 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index a3383a7..70285ff 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   {
>   	struct ttm_bo_device *bdev = bo->bdev;
>   	struct ttm_bo_global *glob = bo->glob;
> -	struct ttm_bo_driver *driver;
> +	struct ttm_bo_driver *driver = bdev->driver;
>   	void *sync_obj = NULL;
>   	int put_count;
>   	int ret;
>   
> -	spin_lock(&bdev->fence_lock);
> -	(void) ttm_bo_wait(bo, false, false, true);
> -	if (!bo->sync_obj) {
> -
> -		spin_lock(&glob->lru_lock);
> -
> -		/**
> -		 * Lock inversion between bo:reserve and bdev::fence_lock here,
> -		 * but that's OK, since we're only trylocking.
> -		 */
> -
> -		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> +	spin_lock(&glob->lru_lock);
> +	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> +	if (!ret) {
> +		spin_lock(&bdev->fence_lock);
> +		ret = ttm_bo_wait(bo, false, false, true);
>   
> -		if (unlikely(ret == -EBUSY))
> +		if (unlikely(ret == -EBUSY)) {
> +			sync_obj = driver->sync_obj_ref(bo->sync_obj);
> +			spin_unlock(&bdev->fence_lock);
> +			atomic_set(&bo->reserved, 0);
> +			wake_up_all(&bo->event_queue);
>   			goto queue;
> -
> +		}
>   		spin_unlock(&bdev->fence_lock);
> +
>   		put_count = ttm_bo_del_from_lru(bo);
>   
>   		atomic_set(&bo->reserved, 0);
> @@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   		ttm_bo_list_ref_sub(bo, put_count, true);
>   
>   		return;
> -	} else {
> -		spin_lock(&glob->lru_lock);
>   	}
>   queue:
> -	driver = bdev->driver;
> -	if (bo->sync_obj)
> -		sync_obj = driver->sync_obj_ref(bo->sync_obj);
> -
>   	kref_get(&bo->list_kref);
>   	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>   	spin_unlock(&glob->lru_lock);
> -	spin_unlock(&bdev->fence_lock);
>   
>   	if (sync_obj) {
>   		driver->sync_obj_flush(sync_obj);
> @@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>   			       bool no_wait_gpu)
>   {
>   	struct ttm_bo_device *bdev = bo->bdev;
> +	struct ttm_bo_driver *driver = bdev->driver;
>   	struct ttm_bo_global *glob = bo->glob;
>   	int put_count;
>   	int ret = 0;
> +	void *sync_obj;
>   
>   retry:
> -	spin_lock(&bdev->fence_lock);
> -	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
> -	spin_unlock(&bdev->fence_lock);
> +	spin_lock(&glob->lru_lock);
>   
> -	if (unlikely(ret != 0))
> -		return ret;
> +	ret = ttm_bo_reserve_locked(bo, interruptible,
> +				    no_wait_reserve, false, 0);
>   
> -retry_reserve:
> -	spin_lock(&glob->lru_lock);
> +	if (unlikely(ret)) {
> +		spin_unlock(&glob->lru_lock);
> +		return ret;
> +	}
>   
>   	if (unlikely(list_empty(&bo->ddestroy))) {
> +		atomic_set(&bo->reserved, 0);
> +		wake_up_all(&bo->event_queue);
>   		spin_unlock(&glob->lru_lock);
>   		return 0;
>   	}
>   
> -	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> -
> -	if (unlikely(ret == -EBUSY)) {
> -		spin_unlock(&glob->lru_lock);
> -		if (likely(!no_wait_reserve))
> -			ret = ttm_bo_wait_unreserved(bo, interruptible);
> -		if (unlikely(ret != 0))
> +	spin_lock(&bdev->fence_lock);
> +	ret = ttm_bo_wait(bo, false, false, true);
> +	if (ret) {
> +		if (no_wait_gpu) {
> +			spin_unlock(&bdev->fence_lock);
> +			atomic_set(&bo->reserved, 0);
> +			wake_up_all(&bo->event_queue);
> +			spin_unlock(&glob->lru_lock);
>   			return ret;
> +		}
>   
> -		goto retry_reserve;
> -	}
> -
> -	BUG_ON(ret != 0);
> -
> -	/**
> -	 * We can re-check for sync object without taking
> -	 * the bo::lock since setting the sync object requires
> -	 * also bo::reserved. A busy object at this point may
> -	 * be caused by another thread recently starting an accelerated
> -	 * eviction.
> -	 */
> +		/**
> +		 * Take a reference to the fence and unreserve, if the wait
> +		 * was succesful and no new sync_obj was attached,
> +		 * ttm_bo_wait in retry will return ret = 0, and end the loop.
> +		 */
>   
> -	if (unlikely(bo->sync_obj)) {
> +		sync_obj = driver->sync_obj_ref(&bo->sync_obj);
> +		spin_unlock(&bdev->fence_lock);
>   		atomic_set(&bo->reserved, 0);
>   		wake_up_all(&bo->event_queue);
>   		spin_unlock(&glob->lru_lock);
> +
> +		ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
> +		driver->sync_obj_unref(&sync_obj);
> +		if (ret)
> +			return ret;
>   		goto retry;
>   	}
> +	spin_unlock(&bdev->fence_lock);
>   
>   	put_count = ttm_bo_del_from_lru(bo);
>   	list_del_init(&bo->ddestroy);
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 1986d00..cd9e452 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
>   	driver = bdev->driver;
>   	glob = bo->glob;
>   
> -	spin_lock(&bdev->fence_lock);
>   	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->fence_lock);
>   
>   	list_for_each_entry(entry, list, head) {
>   		bo = entry->bo;
> @@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
>   		ttm_bo_unreserve_locked(bo);
>   		entry->reserved = false;
>   	}
> -	spin_unlock(&glob->lru_lock);
>   	spin_unlock(&bdev->fence_lock);
> +	spin_unlock(&glob->lru_lock);
>   
>   	list_for_each_entry(entry, list, head) {
>   		if (entry->old_sync_obj)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-19 14:17   ` Thomas Hellstrom
@ 2012-11-19 15:04     ` Thomas Hellstrom
  2012-11-19 15:33       ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-19 15:04 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: dri-devel

On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
> Hi,
>
> This patch looks mostly good, although I think ttm_bo_cleanup_refs 
> becomes overly complicated:
> Could this do, or am I missing something?
>

Actually, my version is bad, because ttm_bo_wait() is called with the 
lru lock held.

/Thomas


>
> static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>                    bool interruptible,
>                    bool no_wait_reserve,
>                    bool no_wait_gpu)
> {
>     struct ttm_bo_device *bdev = bo->bdev;
>     struct ttm_bo_global *glob = bo->glob;
>     int put_count;
>     int ret = 0;
>
>     /*
>      * First, reserve while making sure we're still on the
>      * ddestroy list.
>      */
> retry_reserve:
>     spin_lock(&glob->lru_lock);
>
>     if (unlikely(list_empty(&bo->ddestroy))) {
>         spin_unlock(&glob->lru_lock);
>         return 0;
>     }
>
>     ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>
>     if (unlikely(ret == -EBUSY)) {
>         spin_unlock(&glob->lru_lock);
>         if (likely(!no_wait_reserve))
>             ret = ttm_bo_wait_unreserved(bo, interruptible);
>         if (unlikely(ret != 0))
>             return ret;
>
>         goto retry_reserve;
>     }
>
>     BUG_ON(ret != 0);
>
>     spin_lock(&bdev->fence_lock);
>     ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
>     spin_unlock(&bdev->fence_lock);
>
>     if (unlikely(ret != 0)) {
>         atomic_set(&bo->reserved, 0);
>         wake_up_all(&bo->event_queue);
>         spin_unlock(&glob->lru_lock);
>         return ret;
>     }
>
>     put_count = ttm_bo_del_from_lru(bo);
>     list_del_init(&bo->ddestroy);
>     ++put_count;
>
>     spin_unlock(&glob->lru_lock);
>     ttm_bo_cleanup_memtype_use(bo);
>
>     atomic_set(&bo_reserved, 0);
>     wake_up_all(&bo->event_queue);
>     ttm_bo_list_ref_sub(bo, put_count, true);
>
>     return 0;
> }
>
>
> On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
>> I changed the hierarchy to make fence_lock the most inner lock,
>> instead of outer lock. This will simplify things slightly, and
>> hopefully makes it easier to make fence_lock global at one point
>> should it be needed.
>>
>> To make things clearer, I change the order around in ttm_bo_cleanup_refs
>> and ttm_bo_cleanup_refs_or_queue.
>>
>> A reservation is taken first, then fence lock is taken and a wait is 
>> attempted.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>>
>> v2:
>>   - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs
>> v3:
>>   - change removal of fence_lock to making it a inner lock instead
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c           | 95 
>> ++++++++++++++++------------------
>>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  4 +-
>>   2 files changed, 48 insertions(+), 51 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index a3383a7..70285ff 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
>> ttm_buffer_object *bo)
>>   {
>>       struct ttm_bo_device *bdev = bo->bdev;
>>       struct ttm_bo_global *glob = bo->glob;
>> -    struct ttm_bo_driver *driver;
>> +    struct ttm_bo_driver *driver = bdev->driver;
>>       void *sync_obj = NULL;
>>       int put_count;
>>       int ret;
>>   -    spin_lock(&bdev->fence_lock);
>> -    (void) ttm_bo_wait(bo, false, false, true);
>> -    if (!bo->sync_obj) {
>> -
>> -        spin_lock(&glob->lru_lock);
>> -
>> -        /**
>> -         * Lock inversion between bo:reserve and bdev::fence_lock here,
>> -         * but that's OK, since we're only trylocking.
>> -         */
>> -
>> -        ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>> +    spin_lock(&glob->lru_lock);
>> +    ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>> +    if (!ret) {
>> +        spin_lock(&bdev->fence_lock);
>> +        ret = ttm_bo_wait(bo, false, false, true);
>>   -        if (unlikely(ret == -EBUSY))
>> +        if (unlikely(ret == -EBUSY)) {
>> +            sync_obj = driver->sync_obj_ref(bo->sync_obj);
>> +            spin_unlock(&bdev->fence_lock);
>> +            atomic_set(&bo->reserved, 0);
>> +            wake_up_all(&bo->event_queue);
>>               goto queue;
>> -
>> +        }
>>           spin_unlock(&bdev->fence_lock);
>> +
>>           put_count = ttm_bo_del_from_lru(bo);
>>             atomic_set(&bo->reserved, 0);
>> @@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
>> ttm_buffer_object *bo)
>>           ttm_bo_list_ref_sub(bo, put_count, true);
>>             return;
>> -    } else {
>> -        spin_lock(&glob->lru_lock);
>>       }
>>   queue:
>> -    driver = bdev->driver;
>> -    if (bo->sync_obj)
>> -        sync_obj = driver->sync_obj_ref(bo->sync_obj);
>> -
>>       kref_get(&bo->list_kref);
>>       list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>>       spin_unlock(&glob->lru_lock);
>> -    spin_unlock(&bdev->fence_lock);
>>         if (sync_obj) {
>>           driver->sync_obj_flush(sync_obj);
>> @@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct 
>> ttm_buffer_object *bo,
>>                      bool no_wait_gpu)
>>   {
>>       struct ttm_bo_device *bdev = bo->bdev;
>> +    struct ttm_bo_driver *driver = bdev->driver;
>>       struct ttm_bo_global *glob = bo->glob;
>>       int put_count;
>>       int ret = 0;
>> +    void *sync_obj;
>>     retry:
>> -    spin_lock(&bdev->fence_lock);
>> -    ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
>> -    spin_unlock(&bdev->fence_lock);
>> +    spin_lock(&glob->lru_lock);
>>   -    if (unlikely(ret != 0))
>> -        return ret;
>> +    ret = ttm_bo_reserve_locked(bo, interruptible,
>> +                    no_wait_reserve, false, 0);
>>   -retry_reserve:
>> -    spin_lock(&glob->lru_lock);
>> +    if (unlikely(ret)) {
>> +        spin_unlock(&glob->lru_lock);
>> +        return ret;
>> +    }
>>         if (unlikely(list_empty(&bo->ddestroy))) {
>> +        atomic_set(&bo->reserved, 0);
>> +        wake_up_all(&bo->event_queue);
>>           spin_unlock(&glob->lru_lock);
>>           return 0;
>>       }
>>   -    ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>> -
>> -    if (unlikely(ret == -EBUSY)) {
>> -        spin_unlock(&glob->lru_lock);
>> -        if (likely(!no_wait_reserve))
>> -            ret = ttm_bo_wait_unreserved(bo, interruptible);
>> -        if (unlikely(ret != 0))
>> +    spin_lock(&bdev->fence_lock);
>> +    ret = ttm_bo_wait(bo, false, false, true);
>> +    if (ret) {
>> +        if (no_wait_gpu) {
>> +            spin_unlock(&bdev->fence_lock);
>> +            atomic_set(&bo->reserved, 0);
>> +            wake_up_all(&bo->event_queue);
>> +            spin_unlock(&glob->lru_lock);
>>               return ret;
>> +        }
>>   -        goto retry_reserve;
>> -    }
>> -
>> -    BUG_ON(ret != 0);
>> -
>> -    /**
>> -     * We can re-check for sync object without taking
>> -     * the bo::lock since setting the sync object requires
>> -     * also bo::reserved. A busy object at this point may
>> -     * be caused by another thread recently starting an accelerated
>> -     * eviction.
>> -     */
>> +        /**
>> +         * Take a reference to the fence and unreserve, if the wait
>> +         * was succesful and no new sync_obj was attached,
>> +         * ttm_bo_wait in retry will return ret = 0, and end the loop.
>> +         */
>>   -    if (unlikely(bo->sync_obj)) {
>> +        sync_obj = driver->sync_obj_ref(&bo->sync_obj);
>> +        spin_unlock(&bdev->fence_lock);
>>           atomic_set(&bo->reserved, 0);
>>           wake_up_all(&bo->event_queue);
>>           spin_unlock(&glob->lru_lock);
>> +
>> +        ret = driver->sync_obj_wait(bo->sync_obj, false, 
>> interruptible);
>> +        driver->sync_obj_unref(&sync_obj);
>> +        if (ret)
>> +            return ret;
>>           goto retry;
>>       }
>> +    spin_unlock(&bdev->fence_lock);
>>         put_count = ttm_bo_del_from_lru(bo);
>>       list_del_init(&bo->ddestroy);
>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c 
>> b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> index 1986d00..cd9e452 100644
>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> @@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head 
>> *list, void *sync_obj)
>>       driver = bdev->driver;
>>       glob = bo->glob;
>>   -    spin_lock(&bdev->fence_lock);
>>       spin_lock(&glob->lru_lock);
>> +    spin_lock(&bdev->fence_lock);
>>         list_for_each_entry(entry, list, head) {
>>           bo = entry->bo;
>> @@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head 
>> *list, void *sync_obj)
>>           ttm_bo_unreserve_locked(bo);
>>           entry->reserved = false;
>>       }
>> -    spin_unlock(&glob->lru_lock);
>>       spin_unlock(&bdev->fence_lock);
>> +    spin_unlock(&glob->lru_lock);
>>         list_for_each_entry(entry, list, head) {
>>           if (entry->old_sync_obj)
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-19 15:04     ` Thomas Hellstrom
@ 2012-11-19 15:33       ` Maarten Lankhorst
  2012-11-20  7:48         ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-19 15:33 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: dri-devel

Op 19-11-12 16:04, Thomas Hellstrom schreef:
> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>> Hi,
>>
>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>> Could this do, or am I missing something?
>>
>
> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>
> /Thomas
Oh digging through it made me remember why I had to release the reservation early and
had to allow move_notify to be called without reservation.

Fortunately move_notify has a NULL parameter, which is the only time that happens,
so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
move_notify handler.

05/10 removed the loop and assumed no new fence could be attached after the driver has
declared the bo dead.

However, at that point it may no longer hold a reservation to confirm this, that's why
I moved the cleanup to be done in the release_list handler. It could still be done in
ttm_bo_release, but we no longer have a reservation after we waited. Getting
a reservation can fail if the bo is imported for example.

While it would be true that in that case a new fence may be attached as well, that
would be less harmful since that operation wouldn't involve this device, so the
ttm bo can still be removed in that case. When that time comes I should probably
fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)

I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
in for a kernel release or 2. But according to the rules that would be the only time you
could attach a new fence and trigger the WARN_ON for now..

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean
  2012-11-19 14:10     ` Maarten Lankhorst
@ 2012-11-20  7:42       ` Thomas Hellstrom
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-20  7:42 UTC (permalink / raw)
  To: dri-devel, Maarten Lankhorst

On 11/19/2012 03:10 PM, Maarten Lankhorst wrote:
> Op 19-11-12 14:33, Thomas Hellstrom schreef:
>> On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
>>> Just use the return error from ttm_mem_evict_first instead.
>> Here driver need to be able to evict a memory type completely, because they might shut down
>> the memory type or clear it for some legacy usage, suspending or whatever, so returning 0 on -EBUSY isn't sufficient,
>> we need at least a list empty check, and a shared reservation at this point is illegal.
>>
>> This is a point where the mechanism to exclude other reservers is needed, as we discussed previously.
>> vmwgfx is using the ttm lock, but will of course adapt if a new mechanism is emerging.
> Normally ttm_mem_evict_first only returns -EBUSY if the list is empty and no_wait = false,
> so I thought using the return code would be equivalent.
>
> We could do spin_lock(&glob->lru_lock); WARN_ON(!list_empty(&man->lru_lock)); spin_unlock(&glob->lru_lock); to handle this after -EBUSY.
>
> With a lot of objects on the lru list, this would save taking lru_lock twice for each object.
>
> ~Maarten

Sure, and in the allow_errors case we should return an error if the list 
isn't empty, to allow careful drivers to deal with that.

/Thomas



>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-19 15:33       ` Maarten Lankhorst
@ 2012-11-20  7:48         ` Thomas Hellstrom
  2012-11-20 11:33           ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-20  7:48 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Thomas Hellstrom, dri-devel

On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>> Hi,
>>>
>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>> Could this do, or am I missing something?
>>>
>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>
>> /Thomas
> Oh digging through it made me remember why I had to release the reservation early and
> had to allow move_notify to be called without reservation.
>
> Fortunately move_notify has a NULL parameter, which is the only time that happens,
> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
> move_notify handler.
>
> 05/10 removed the loop and assumed no new fence could be attached after the driver has
> declared the bo dead.
>
> However, at that point it may no longer hold a reservation to confirm this, that's why
> I moved the cleanup to be done in the release_list handler. It could still be done in
> ttm_bo_release, but we no longer have a reservation after we waited. Getting
> a reservation can fail if the bo is imported for example.
>
> While it would be true that in that case a new fence may be attached as well, that
> would be less harmful since that operation wouldn't involve this device, so the
> ttm bo can still be removed in that case. When that time comes I should probably
> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>
> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
> in for a kernel release or 2. But according to the rules that would be the only time you
> could attach a new fence and trigger the WARN_ON for now..

Hmm, I'd appreciate if you could group patches with functional changes 
that depend on eachother togeteher,
and "this is done because ...", which makes it much easier to review, 
(and to follow the commit history in case
something goes terribly wrong and we need to revert).

Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot 
any culprits.

In general, as long as a bo is on a LRU list, we must be able to attach 
fences because of accelerated eviction.

/Thomas











> ~Maarten
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-20  7:48         ` Thomas Hellstrom
@ 2012-11-20 11:33           ` Maarten Lankhorst
  2012-11-20 11:59             ` Maarten Lankhorst
  2012-11-20 12:03             ` Thomas Hellstrom
  0 siblings, 2 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-20 11:33 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Maarten Lankhorst, Thomas Hellstrom, dri-devel

Op 20-11-12 08:48, Thomas Hellstrom schreef:
> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>> Hi,
>>>>
>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>> Could this do, or am I missing something?
>>>>
>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>
>>> /Thomas
>> Oh digging through it made me remember why I had to release the reservation early and
>> had to allow move_notify to be called without reservation.
>>
>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>> move_notify handler.
>>
>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>> declared the bo dead.
>>
>> However, at that point it may no longer hold a reservation to confirm this, that's why
>> I moved the cleanup to be done in the release_list handler. It could still be done in
>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>> a reservation can fail if the bo is imported for example.
>>
>> While it would be true that in that case a new fence may be attached as well, that
>> would be less harmful since that operation wouldn't involve this device, so the
>> ttm bo can still be removed in that case. When that time comes I should probably
>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>
>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>> in for a kernel release or 2. But according to the rules that would be the only time you
>> could attach a new fence and trigger the WARN_ON for now..
>
> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
> something goes terribly wrong and we need to revert).
>
> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>
> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
I thought it was deliberately designed in such a way that it was kept on the lru list,
but since it's also on the ddestroy list it won't start accelerated eviction,
since it branches into cleanup_refs early, and lru_lock still protects all the list entries.

Of course any previous acceleration may still happen, but since we take a reservation first before waiting,
we're already sure that any previous acceleration command has finished fencing, and no new one can
start since it appears on the ddestroy list which would force it to perform the same wait.

The wait is legal, and no new fences can be attached.

I do agree all those patches probably needs a lot longer commit message to explain it though. :-)

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-20 11:33           ` Maarten Lankhorst
@ 2012-11-20 11:59             ` Maarten Lankhorst
  2012-11-20 12:03             ` Thomas Hellstrom
  1 sibling, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-20 11:59 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Maarten Lankhorst, Thomas Hellstrom, dri-devel

Op 20-11-12 12:33, Maarten Lankhorst schreef:
> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>> Hi,
>>>>>
>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>> Could this do, or am I missing something?
>>>>>
>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>
>>>> /Thomas
>>> Oh digging through it made me remember why I had to release the reservation early and
>>> had to allow move_notify to be called without reservation.
>>>
>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>> move_notify handler.
>>>
>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>> declared the bo dead.
>>>
>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>> a reservation can fail if the bo is imported for example.
>>>
>>> While it would be true that in that case a new fence may be attached as well, that
>>> would be less harmful since that operation wouldn't involve this device, so the
>>> ttm bo can still be removed in that case. When that time comes I should probably
>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>
>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>> could attach a new fence and trigger the WARN_ON for now..
>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>> something goes terribly wrong and we need to revert).
>>
>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>
>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
> I thought it was deliberately designed in such a way that it was kept on the lru list,
> but since it's also on the ddestroy list it won't start accelerated eviction,
> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>
> Of course any previous acceleration may still happen, but since we take a reservation first before waiting,
> we're already sure that any previous acceleration command has finished fencing, and no new one can
> start since it appears on the ddestroy list which would force it to perform the same wait.
>
> The wait is legal, and no new fences can be attached.
>
> I do agree all those patches probably needs a lot longer commit message to explain it though. :-)
>
Or maybe an alternative patch..

We could move the checks. There are only 2 places that are allowed to hold
reservations at that point right?

ttm_bo_swapout and evict_mem_first.

If cleanup_refs_or_queue fails because reservation fails, it must mean it's in one of those 2 places.
If it succeeds, we can remove it from the lru list and swap list, and if wait fails move it to ddestroy list.

unreserve in swapout doesn't add it back to any lists. No special handling needed there.
unreserve in evict_mem_first does, but we could take the lock before unreserve, and only
re-add it to the swap/lru list when it's not on ddestroy.

That way we wouldn't need to call ttm_bo_cleanup_refs from multiple places,
and the cleanup would only ever need to be done in the ttm_bo_delayed_delete without race.

I thought it was a feature that it still appeared on the lru list after death, so evict_mem_first could
wait on it, but if it's an annoyance it could be easily fixed like that.

But even if it's a feature to be preserved, evict_mem_first and swapout could be modified to check
the ddestroy list first for buffers to destroy. In that case those functions would explicitly prefer waiting for
destruction of bo's before queueing new work to swapout or evict bo's.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-20 11:33           ` Maarten Lankhorst
  2012-11-20 11:59             ` Maarten Lankhorst
@ 2012-11-20 12:03             ` Thomas Hellstrom
  2012-11-20 13:13               ` Maarten Lankhorst
  1 sibling, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-20 12:03 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Maarten Lankhorst, dri-devel

On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>> Hi,
>>>>>
>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>> Could this do, or am I missing something?
>>>>>
>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>
>>>> /Thomas
>>> Oh digging through it made me remember why I had to release the reservation early and
>>> had to allow move_notify to be called without reservation.
>>>
>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>> move_notify handler.
>>>
>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>> declared the bo dead.
>>>
>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>> a reservation can fail if the bo is imported for example.
>>>
>>> While it would be true that in that case a new fence may be attached as well, that
>>> would be less harmful since that operation wouldn't involve this device, so the
>>> ttm bo can still be removed in that case. When that time comes I should probably
>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>
>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>> could attach a new fence and trigger the WARN_ON for now..
>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>> something goes terribly wrong and we need to revert).
>>
>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>
>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
> I thought it was deliberately designed in such a way that it was kept on the lru list,
> but since it's also on the ddestroy list it won't start accelerated eviction,
> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
I used bad wording. I meant that unbinding might be accelerated, but  
currently (quite inefficiently)
do synchronized unbinding, assuming that only the CPU can do that. When 
we start to support
unsynchronized moves, we need to be able to attach fences at least at 
the last move_notify(bo, NULL);

/Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-20 12:03             ` Thomas Hellstrom
@ 2012-11-20 13:13               ` Maarten Lankhorst
  2012-11-20 15:08                 ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-20 13:13 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Maarten Lankhorst, dri-devel

Op 20-11-12 13:03, Thomas Hellstrom schreef:
> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>> Hi,
>>>>>>
>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>> Could this do, or am I missing something?
>>>>>>
>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>
>>>>> /Thomas
>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>> had to allow move_notify to be called without reservation.
>>>>
>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>> move_notify handler.
>>>>
>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>> declared the bo dead.
>>>>
>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>> a reservation can fail if the bo is imported for example.
>>>>
>>>> While it would be true that in that case a new fence may be attached as well, that
>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>
>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>> could attach a new fence and trigger the WARN_ON for now..
>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>> something goes terribly wrong and we need to revert).
>>>
>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>
>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>> but since it's also on the ddestroy list it won't start accelerated eviction,
>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
Would you need to wait in that case on fence_wait being completed before calling move_notify?

If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
lru_lock though, so it shouldn't be too hard to make something safe.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-20 13:13               ` Maarten Lankhorst
@ 2012-11-20 15:08                 ` Thomas Hellstrom
  2012-11-21 11:38                   ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-20 15:08 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Maarten Lankhorst, dri-devel

On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>> Could this do, or am I missing something?
>>>>>>>
>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>
>>>>>> /Thomas
>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>> had to allow move_notify to be called without reservation.
>>>>>
>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>> move_notify handler.
>>>>>
>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>> declared the bo dead.
>>>>>
>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>> a reservation can fail if the bo is imported for example.
>>>>>
>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>
>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>> something goes terribly wrong and we need to revert).
>>>>
>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>
>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>
> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
> lru_lock though, so it shouldn't be too hard to make something safe.

I think typically a driver that wants to implement asynchronous moves 
don't want to wait before calling
move_notify, but may wait in move_notify or move. Typically (upcoming 
vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach 
a fence and then use the normal delayed destroy to wait on that fence 
before destroying the buffer.

Otherwise, since binds / unbinds are handled in the GPU command stream 
there's never any need to wait for moves except when there's a CPU
access.

/Thomas


>
> ~Maarten
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-20 15:08                 ` Thomas Hellstrom
@ 2012-11-21 11:38                   ` Maarten Lankhorst
  2012-11-21 12:42                     ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-21 11:38 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Maarten Lankhorst, dri-devel

Hey,

Op 20-11-12 16:08, Thomas Hellstrom schreef:
> On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
>> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>>> Could this do, or am I missing something?
>>>>>>>>
>>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>>
>>>>>>> /Thomas
>>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>>> had to allow move_notify to be called without reservation.
>>>>>>
>>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>>> move_notify handler.
>>>>>>
>>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>>> declared the bo dead.
>>>>>>
>>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>>> a reservation can fail if the bo is imported for example.
>>>>>>
>>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>>
>>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>>> something goes terribly wrong and we need to revert).
>>>>>
>>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>>
>>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
>> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>>
>> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
>> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
>> lru_lock though, so it shouldn't be too hard to make something safe.
>
> I think typically a driver that wants to implement asynchronous moves don't want to wait before calling
> move_notify, but may wait in move_notify or move. Typically (upcoming vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach a fence and then use the normal delayed destroy to wait on that fence before destroying the buffer.
>
> Otherwise, since binds / unbinds are handled in the GPU command stream there's never any need to wait for moves except when there's a CPU
> access.
Well, nouveau actually needs fence_wait to finish first, since vm changes are out of band.
But I guess it should be possible to attach it as work to the fence when it's signaled, and I
may want to do something like that already for performance reasons in a different place,
so I guess it doesn't matter.

Is calling move_notify(bo, NULL) legal and a noop the second time? That would save a flag in the bo to check if it's called already,
although I suppose we could always define a TTM_BO_PRIV_FLAG_* for it otherwise.

move_notify might end up being called with the lru_lock held, but that shouldn't be a problem.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-21 11:38                   ` Maarten Lankhorst
@ 2012-11-21 12:42                     ` Thomas Hellstrom
  2012-11-21 13:12                       ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-21 12:42 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Maarten Lankhorst, dri-devel

On 11/21/2012 12:38 PM, Maarten Lankhorst wrote:
> Hey,
>
> Op 20-11-12 16:08, Thomas Hellstrom schreef:
>> On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
>>> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>>>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>>>> Could this do, or am I missing something?
>>>>>>>>>
>>>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>>>
>>>>>>>> /Thomas
>>>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>>>> had to allow move_notify to be called without reservation.
>>>>>>>
>>>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>>>> move_notify handler.
>>>>>>>
>>>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>>>> declared the bo dead.
>>>>>>>
>>>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>>>> a reservation can fail if the bo is imported for example.
>>>>>>>
>>>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>>>
>>>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>>>> something goes terribly wrong and we need to revert).
>>>>>>
>>>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>>>
>>>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>>>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>>>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>>>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
>>> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>>>
>>> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
>>> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
>>> lru_lock though, so it shouldn't be too hard to make something safe.
>> I think typically a driver that wants to implement asynchronous moves don't want to wait before calling
>> move_notify, but may wait in move_notify or move. Typically (upcoming vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach a fence and then use the normal delayed destroy to wait on that fence before destroying the buffer.
>>
>> Otherwise, since binds / unbinds are handled in the GPU command stream there's never any need to wait for moves except when there's a CPU
>> access.
> Well, nouveau actually needs fence_wait to finish first, since vm changes are out of band.
> But I guess it should be possible to attach it as work to the fence when it's signaled, and I
> may want to do something like that already for performance reasons in a different place,
> so I guess it doesn't matter.

Actions to be performed on fence signaling tend to be very cpu 
consuming, I think due to the context switches involved.
We had to replace that in the old psb driver and batch things like TTM 
does instead.

Also remember that TTM fences are not required to signal in finite time 
unless fence_flush is called.

I think nouveau doesn't use fence irqs to signal its fences.

>
> Is calling move_notify(bo, NULL) legal and a noop the second time?

I see no fundamental reason why it shouldn't be OK, although we might 
need to patch drivers to cope with it.

>   That would save a flag in the bo to check if it's called already,
> although I suppose we could always define a TTM_BO_PRIV_FLAG_* for it otherwise.
>
> move_notify might end up being called with the lru_lock held, but that shouldn't be a problem.

I don't think that's a good idea. Drivers sleeping in move_notify will 
need to release the spinlock, and that means it's
better to release it before move_notify is called.

/Thomas


>
> ~Maarten
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-21 12:42                     ` Thomas Hellstrom
@ 2012-11-21 13:12                       ` Maarten Lankhorst
  2012-11-21 13:27                         ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-21 13:12 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Maarten Lankhorst, dri-devel

Op 21-11-12 13:42, Thomas Hellstrom schreef:
> On 11/21/2012 12:38 PM, Maarten Lankhorst wrote:
>> Hey,
>>
>> Op 20-11-12 16:08, Thomas Hellstrom schreef:
>>> On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
>>>> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>>>>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>>>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>>>>> Hi,
>>>>>>>>>>
>>>>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>>>>> Could this do, or am I missing something?
>>>>>>>>>>
>>>>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>>>>
>>>>>>>>> /Thomas
>>>>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>>>>> had to allow move_notify to be called without reservation.
>>>>>>>>
>>>>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>>>>> move_notify handler.
>>>>>>>>
>>>>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>>>>> declared the bo dead.
>>>>>>>>
>>>>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>>>>> a reservation can fail if the bo is imported for example.
>>>>>>>>
>>>>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>>>>
>>>>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>>>>> something goes terribly wrong and we need to revert).
>>>>>>>
>>>>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>>>>
>>>>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>>>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>>>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>>>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>>>>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>>>>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>>>>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
>>>> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>>>>
>>>> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
>>>> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
>>>> lru_lock though, so it shouldn't be too hard to make something safe.
>>> I think typically a driver that wants to implement asynchronous moves don't want to wait before calling
>>> move_notify, but may wait in move_notify or move. Typically (upcoming vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach a fence and then use the normal delayed destroy to wait on that fence before destroying the buffer.
>>>
>>> Otherwise, since binds / unbinds are handled in the GPU command stream there's never any need to wait for moves except when there's a CPU
>>> access.
>> Well, nouveau actually needs fence_wait to finish first, since vm changes are out of band.
>> But I guess it should be possible to attach it as work to the fence when it's signaled, and I
>> may want to do something like that already for performance reasons in a different place,
>> so I guess it doesn't matter.
>
> Actions to be performed on fence signaling tend to be very cpu consuming, I think due to the context switches involved.
> We had to replace that in the old psb driver and batch things like TTM does instead.
>
> Also remember that TTM fences are not required to signal in finite time unless fence_flush is called.
>
> I think nouveau doesn't use fence irqs to signal its fences.
>
>>
>> Is calling move_notify(bo, NULL) legal and a noop the second time?
>
> I see no fundamental reason why it shouldn't be OK, although we might need to patch drivers to cope with it.
>
>>   That would save a flag in the bo to check if it's called already,
>> although I suppose we could always define a TTM_BO_PRIV_FLAG_* for it otherwise.
>>
>> move_notify might end up being called with the lru_lock held, but that shouldn't be a problem.
>
> I don't think that's a good idea. Drivers sleeping in move_notify will need to release the spinlock, and that means it's
> better to release it before move_notify is called.
Is the only sleeping being done on fences? In that case we might wish to split it up in 2 pieces for destruction,
the piece that runs immediately, and a piece to run after the new fence has signaled (current behavior).

Nouveau needs the final move_notify unmap to be called after object is idle, like it is now. It doesn't need
to attach a new fence.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-21 13:12                       ` Maarten Lankhorst
@ 2012-11-21 13:27                         ` Thomas Hellstrom
  2012-11-22 15:51                           ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-21 13:27 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Maarten Lankhorst, dri-devel

On 11/21/2012 02:12 PM, Maarten Lankhorst wrote:
> Op 21-11-12 13:42, Thomas Hellstrom schreef:
>> On 11/21/2012 12:38 PM, Maarten Lankhorst wrote:
>>> Hey,
>>>
>>> Op 20-11-12 16:08, Thomas Hellstrom schreef:
>>>> On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
>>>>> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>>>>>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>>>>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>>>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>>>>>> Hi,
>>>>>>>>>>>
>>>>>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>>>>>> Could this do, or am I missing something?
>>>>>>>>>>>
>>>>>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>>>>>
>>>>>>>>>> /Thomas
>>>>>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>>>>>> had to allow move_notify to be called without reservation.
>>>>>>>>>
>>>>>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>>>>>> move_notify handler.
>>>>>>>>>
>>>>>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>>>>>> declared the bo dead.
>>>>>>>>>
>>>>>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>>>>>> a reservation can fail if the bo is imported for example.
>>>>>>>>>
>>>>>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>>>>>
>>>>>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>>>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>>>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>>>>>> something goes terribly wrong and we need to revert).
>>>>>>>>
>>>>>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>>>>>
>>>>>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>>>>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>>>>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>>>>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>>>>>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>>>>>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>>>>>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
>>>>> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>>>>>
>>>>> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
>>>>> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
>>>>> lru_lock though, so it shouldn't be too hard to make something safe.
>>>> I think typically a driver that wants to implement asynchronous moves don't want to wait before calling
>>>> move_notify, but may wait in move_notify or move. Typically (upcoming vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach a fence and then use the normal delayed destroy to wait on that fence before destroying the buffer.
>>>>
>>>> Otherwise, since binds / unbinds are handled in the GPU command stream there's never any need to wait for moves except when there's a CPU
>>>> access.
>>> Well, nouveau actually needs fence_wait to finish first, since vm changes are out of band.
>>> But I guess it should be possible to attach it as work to the fence when it's signaled, and I
>>> may want to do something like that already for performance reasons in a different place,
>>> so I guess it doesn't matter.
>> Actions to be performed on fence signaling tend to be very cpu consuming, I think due to the context switches involved.
>> We had to replace that in the old psb driver and batch things like TTM does instead.
>>
>> Also remember that TTM fences are not required to signal in finite time unless fence_flush is called.
>>
>> I think nouveau doesn't use fence irqs to signal its fences.
>>
>>> Is calling move_notify(bo, NULL) legal and a noop the second time?
>> I see no fundamental reason why it shouldn't be OK, although we might need to patch drivers to cope with it.
>>
>>>    That would save a flag in the bo to check if it's called already,
>>> although I suppose we could always define a TTM_BO_PRIV_FLAG_* for it otherwise.
>>>
>>> move_notify might end up being called with the lru_lock held, but that shouldn't be a problem.
>> I don't think that's a good idea. Drivers sleeping in move_notify will need to release the spinlock, and that means it's
>> better to release it before move_notify is called.
> Is the only sleeping being done on fences? In that case we might wish to split it up in 2 pieces for destruction,
> the piece that runs immediately, and a piece to run after the new fence has signaled (current behavior).
>
> Nouveau needs the final move_notify unmap to be called after object is idle, like it is now. It doesn't need
> to attach a new fence.

In that case it might be best to worry about asynchronous stuff later?
We will eventually implement it on the new vmwgfx hardware revision, but 
it's not ready yet.

/Thomas



> ~Maarten
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-21 13:27                         ` Thomas Hellstrom
@ 2012-11-22 15:51                           ` Maarten Lankhorst
  2012-11-22 20:29                             ` Thomas Hellstrom
  0 siblings, 1 reply; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-22 15:51 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Maarten Lankhorst, dri-devel

Op 21-11-12 14:27, Thomas Hellstrom schreef:
> On 11/21/2012 02:12 PM, Maarten Lankhorst wrote:
>> Op 21-11-12 13:42, Thomas Hellstrom schreef:
>>> On 11/21/2012 12:38 PM, Maarten Lankhorst wrote:
>>>> Hey,
>>>>
>>>> Op 20-11-12 16:08, Thomas Hellstrom schreef:
>>>>> On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
>>>>>> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>>>>>>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>>>>>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>>>>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>>>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>>>>>>> Hi,
>>>>>>>>>>>>
>>>>>>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>>>>>>> Could this do, or am I missing something?
>>>>>>>>>>>>
>>>>>>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>>>>>>
>>>>>>>>>>> /Thomas
>>>>>>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>>>>>>> had to allow move_notify to be called without reservation.
>>>>>>>>>>
>>>>>>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>>>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>>>>>>> move_notify handler.
>>>>>>>>>>
>>>>>>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>>>>>>> declared the bo dead.
>>>>>>>>>>
>>>>>>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>>>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>>>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>>>>>>> a reservation can fail if the bo is imported for example.
>>>>>>>>>>
>>>>>>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>>>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>>>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>>>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>>>>>>
>>>>>>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>>>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>>>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>>>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>>>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>>>>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>>>>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>>>>>>> something goes terribly wrong and we need to revert).
>>>>>>>>>
>>>>>>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>>>>>>
>>>>>>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>>>>>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>>>>>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>>>>>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>>>>>>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>>>>>>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>>>>>>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
>>>>>> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>>>>>>
>>>>>> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
>>>>>> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
>>>>>> lru_lock though, so it shouldn't be too hard to make something safe.
>>>>> I think typically a driver that wants to implement asynchronous moves don't want to wait before calling
>>>>> move_notify, but may wait in move_notify or move. Typically (upcoming vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach a fence and then use the normal delayed destroy to wait on that fence before destroying the buffer.
>>>>>
>>>>> Otherwise, since binds / unbinds are handled in the GPU command stream there's never any need to wait for moves except when there's a CPU
>>>>> access.
>>>> Well, nouveau actually needs fence_wait to finish first, since vm changes are out of band.
>>>> But I guess it should be possible to attach it as work to the fence when it's signaled, and I
>>>> may want to do something like that already for performance reasons in a different place,
>>>> so I guess it doesn't matter.
>>> Actions to be performed on fence signaling tend to be very cpu consuming, I think due to the context switches involved.
>>> We had to replace that in the old psb driver and batch things like TTM does instead.
>>>
>>> Also remember that TTM fences are not required to signal in finite time unless fence_flush is called.
>>>
>>> I think nouveau doesn't use fence irqs to signal its fences.
>>>
>>>> Is calling move_notify(bo, NULL) legal and a noop the second time?
>>> I see no fundamental reason why it shouldn't be OK, although we might need to patch drivers to cope with it.
>>>
>>>>    That would save a flag in the bo to check if it's called already,
>>>> although I suppose we could always define a TTM_BO_PRIV_FLAG_* for it otherwise.
>>>>
>>>> move_notify might end up being called with the lru_lock held, but that shouldn't be a problem.
>>> I don't think that's a good idea. Drivers sleeping in move_notify will need to release the spinlock, and that means it's
>>> better to release it before move_notify is called.
>> Is the only sleeping being done on fences? In that case we might wish to split it up in 2 pieces for destruction,
>> the piece that runs immediately, and a piece to run after the new fence has signaled (current behavior).
>>
>> Nouveau needs the final move_notify unmap to be called after object is idle, like it is now. It doesn't need
>> to attach a new fence.
>
> In that case it might be best to worry about asynchronous stuff later?
> We will eventually implement it on the new vmwgfx hardware revision, but it's not ready yet.
>
> /Thomas
Ok sounds good.

In that case what do you want me to change from the first 4 patches apart from more verbose commit messages?
- 03/10 I got that I need to re-add the list_empty check after -EBUSY was returned in evict_mem_first.

Also PATCH 05/10 cleans up the spinning in ttm_bo_cleanup_refs, so I hope it's ok that it's a big
ugly in 04/10, as long as it doesn't result in any new bugs being introduced.

~Maarten

PS: I did a plain rebase of my git tree to deal with the conflicts in drm-next.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-22 15:51                           ` Maarten Lankhorst
@ 2012-11-22 20:29                             ` Thomas Hellstrom
  2012-11-27 12:35                               ` Maarten Lankhorst
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Hellstrom @ 2012-11-22 20:29 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Maarten Lankhorst, Thomas Hellstrom, dri-devel

On 11/22/2012 04:51 PM, Maarten Lankhorst wrote:
> Op 21-11-12 14:27, Thomas Hellstrom schreef:
>> On 11/21/2012 02:12 PM, Maarten Lankhorst wrote:
>>> Op 21-11-12 13:42, Thomas Hellstrom schreef:
>>>> On 11/21/2012 12:38 PM, Maarten Lankhorst wrote:
>>>>> Hey,
>>>>>
>>>>> Op 20-11-12 16:08, Thomas Hellstrom schreef:
>>>>>> On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
>>>>>>> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>>>>>>>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>>>>>>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>>>>>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>>>>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>>>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>>>>>>>> Hi,
>>>>>>>>>>>>>
>>>>>>>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>>>>>>>> Could this do, or am I missing something?
>>>>>>>>>>>>>
>>>>>>>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>>>>>>>
>>>>>>>>>>>> /Thomas
>>>>>>>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>>>>>>>> had to allow move_notify to be called without reservation.
>>>>>>>>>>>
>>>>>>>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>>>>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>>>>>>>> move_notify handler.
>>>>>>>>>>>
>>>>>>>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>>>>>>>> declared the bo dead.
>>>>>>>>>>>
>>>>>>>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>>>>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>>>>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>>>>>>>> a reservation can fail if the bo is imported for example.
>>>>>>>>>>>
>>>>>>>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>>>>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>>>>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>>>>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>>>>>>>
>>>>>>>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>>>>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>>>>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>>>>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>>>>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>>>>>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>>>>>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>>>>>>>> something goes terribly wrong and we need to revert).
>>>>>>>>>>
>>>>>>>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>>>>>>>
>>>>>>>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>>>>>>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>>>>>>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>>>>>>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>>>>>>>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>>>>>>>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>>>>>>>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
>>>>>>> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>>>>>>>
>>>>>>> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
>>>>>>> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
>>>>>>> lru_lock though, so it shouldn't be too hard to make something safe.
>>>>>> I think typically a driver that wants to implement asynchronous moves don't want to wait before calling
>>>>>> move_notify, but may wait in move_notify or move. Typically (upcoming vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach a fence and then use the normal delayed destroy to wait on that fence before destroying the buffer.
>>>>>>
>>>>>> Otherwise, since binds / unbinds are handled in the GPU command stream there's never any need to wait for moves except when there's a CPU
>>>>>> access.
>>>>> Well, nouveau actually needs fence_wait to finish first, since vm changes are out of band.
>>>>> But I guess it should be possible to attach it as work to the fence when it's signaled, and I
>>>>> may want to do something like that already for performance reasons in a different place,
>>>>> so I guess it doesn't matter.
>>>> Actions to be performed on fence signaling tend to be very cpu consuming, I think due to the context switches involved.
>>>> We had to replace that in the old psb driver and batch things like TTM does instead.
>>>>
>>>> Also remember that TTM fences are not required to signal in finite time unless fence_flush is called.
>>>>
>>>> I think nouveau doesn't use fence irqs to signal its fences.
>>>>
>>>>> Is calling move_notify(bo, NULL) legal and a noop the second time?
>>>> I see no fundamental reason why it shouldn't be OK, although we might need to patch drivers to cope with it.
>>>>
>>>>>     That would save a flag in the bo to check if it's called already,
>>>>> although I suppose we could always define a TTM_BO_PRIV_FLAG_* for it otherwise.
>>>>>
>>>>> move_notify might end up being called with the lru_lock held, but that shouldn't be a problem.
>>>> I don't think that's a good idea. Drivers sleeping in move_notify will need to release the spinlock, and that means it's
>>>> better to release it before move_notify is called.
>>> Is the only sleeping being done on fences? In that case we might wish to split it up in 2 pieces for destruction,
>>> the piece that runs immediately, and a piece to run after the new fence has signaled (current behavior).
>>>
>>> Nouveau needs the final move_notify unmap to be called after object is idle, like it is now. It doesn't need
>>> to attach a new fence.
>> In that case it might be best to worry about asynchronous stuff later?
>> We will eventually implement it on the new vmwgfx hardware revision, but it's not ready yet.
>>
>> /Thomas
> Ok sounds good.
>
> In that case what do you want me to change from the first 4 patches apart from more verbose commit messages?
> - 03/10 I got that I need to re-add the list_empty check after -EBUSY was returned in evict_mem_first.
>
> Also PATCH 05/10 cleans up the spinning in ttm_bo_cleanup_refs, so I hope it's ok that it's a big
> ugly in 04/10, as long as it doesn't result in any new bugs being introduced.
>
> ~Maarten
>
> PS: I did a plain rebase of my git tree to deal with the conflicts in drm-next.
>

Maarten, it seems to me the purpose of the patches are the following 
(not necessarily the correct order).

1) Change fence lock locking order w r t LRU lock - should be a trivial 
and very small change.
2) Change reservations from lists to always be trylock, skipping already 
reserved bos.
3) Remove the lru lock around reservations.
4) Various optimizations / cleanups.

If you could reorganize and make 4 patch series like this, it would be 
much easier to follow what happens and why, and would make it much 
easier for me to review. It seems to me if patch series 1-3 focus on the 
intended changes and the intended changes only, they would be quite small?

/Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3
  2012-11-22 20:29                             ` Thomas Hellstrom
@ 2012-11-27 12:35                               ` Maarten Lankhorst
  0 siblings, 0 replies; 33+ messages in thread
From: Maarten Lankhorst @ 2012-11-27 12:35 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Maarten Lankhorst, dri-devel

Op 22-11-12 21:29, Thomas Hellstrom schreef:
> On 11/22/2012 04:51 PM, Maarten Lankhorst wrote:
>> Op 21-11-12 14:27, Thomas Hellstrom schreef:
>>> On 11/21/2012 02:12 PM, Maarten Lankhorst wrote:
>>>> Op 21-11-12 13:42, Thomas Hellstrom schreef:
>>>>> On 11/21/2012 12:38 PM, Maarten Lankhorst wrote:
>>>>>> Hey,
>>>>>>
>>>>>> Op 20-11-12 16:08, Thomas Hellstrom schreef:
>>>>>>> On 11/20/2012 02:13 PM, Maarten Lankhorst wrote:
>>>>>>>> Op 20-11-12 13:03, Thomas Hellstrom schreef:
>>>>>>>>> On 11/20/2012 12:33 PM, Maarten Lankhorst wrote:
>>>>>>>>>> Op 20-11-12 08:48, Thomas Hellstrom schreef:
>>>>>>>>>>> On 11/19/2012 04:33 PM, Maarten Lankhorst wrote:
>>>>>>>>>>>> Op 19-11-12 16:04, Thomas Hellstrom schreef:
>>>>>>>>>>>>> On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
>>>>>>>>>>>>>> Hi,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated:
>>>>>>>>>>>>>> Could this do, or am I missing something?
>>>>>>>>>>>>>>
>>>>>>>>>>>>> Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held.
>>>>>>>>>>>>>
>>>>>>>>>>>>> /Thomas
>>>>>>>>>>>> Oh digging through it made me remember why I had to release the reservation early and
>>>>>>>>>>>> had to allow move_notify to be called without reservation.
>>>>>>>>>>>>
>>>>>>>>>>>> Fortunately move_notify has a NULL parameter, which is the only time that happens,
>>>>>>>>>>>> so you can still check do BUG_ON(mem != NULL && !ttm_bo_reserved(bo)); in your
>>>>>>>>>>>> move_notify handler.
>>>>>>>>>>>>
>>>>>>>>>>>> 05/10 removed the loop and assumed no new fence could be attached after the driver has
>>>>>>>>>>>> declared the bo dead.
>>>>>>>>>>>>
>>>>>>>>>>>> However, at that point it may no longer hold a reservation to confirm this, that's why
>>>>>>>>>>>> I moved the cleanup to be done in the release_list handler. It could still be done in
>>>>>>>>>>>> ttm_bo_release, but we no longer have a reservation after we waited. Getting
>>>>>>>>>>>> a reservation can fail if the bo is imported for example.
>>>>>>>>>>>>
>>>>>>>>>>>> While it would be true that in that case a new fence may be attached as well, that
>>>>>>>>>>>> would be less harmful since that operation wouldn't involve this device, so the
>>>>>>>>>>>> ttm bo can still be removed in that case. When that time comes I should probably
>>>>>>>>>>>> fix up that WARN_ON(ret) in ttm_bo_cleanup_refs. :-)
>>>>>>>>>>>>
>>>>>>>>>>>> I did add a WARN_ON(!atomic_read(&bo->kref.refcount)); to
>>>>>>>>>>>> ttm_bo_reserve and ttm_eu_reserve_buffers to be sure nothing is done on the device
>>>>>>>>>>>> itself. If that is too paranoid, those WARN_ON's could be dropped. I prefer to leave them
>>>>>>>>>>>> in for a kernel release or 2. But according to the rules that would be the only time you
>>>>>>>>>>>> could attach a new fence and trigger the WARN_ON for now..
>>>>>>>>>>> Hmm, I'd appreciate if you could group patches with functional changes that depend on eachother togeteher,
>>>>>>>>>>> and "this is done because ...", which makes it much easier to review, (and to follow the commit history in case
>>>>>>>>>>> something goes terribly wrong and we need to revert).
>>>>>>>>>>>
>>>>>>>>>>> Meanwhile I'll take a look at the final ttm_bo.c and see if I can spot any culprits.
>>>>>>>>>>>
>>>>>>>>>>> In general, as long as a bo is on a LRU list, we must be able to attach fences because of accelerated eviction.
>>>>>>>>>> I thought it was deliberately designed in such a way that it was kept on the lru list,
>>>>>>>>>> but since it's also on the ddestroy list it won't start accelerated eviction,
>>>>>>>>>> since it branches into cleanup_refs early, and lru_lock still protects all the list entries.
>>>>>>>>> I used bad wording. I meant that unbinding might be accelerated, but  currently (quite inefficiently)
>>>>>>>>> do synchronized unbinding, assuming that only the CPU can do that. When we start to support
>>>>>>>>> unsynchronized moves, we need to be able to attach fences at least at the last move_notify(bo, NULL);
>>>>>>>> Would you need to wait in that case on fence_wait being completed before calling move_notify?
>>>>>>>>
>>>>>>>> If not, you would still only need to perform one wait, but you'd have to make sure move_notify only gets
>>>>>>>> called by 1 thread before checking the fence pointer and performing a wait. At that point you still hold the
>>>>>>>> lru_lock though, so it shouldn't be too hard to make something safe.
>>>>>>> I think typically a driver that wants to implement asynchronous moves don't want to wait before calling
>>>>>>> move_notify, but may wait in move_notify or move. Typically (upcoming vmwgfx) it would invalidate the buffer in move_notify(bo, NULL), attach a fence and then use the normal delayed destroy to wait on that fence before destroying the buffer.
>>>>>>>
>>>>>>> Otherwise, since binds / unbinds are handled in the GPU command stream there's never any need to wait for moves except when there's a CPU
>>>>>>> access.
>>>>>> Well, nouveau actually needs fence_wait to finish first, since vm changes are out of band.
>>>>>> But I guess it should be possible to attach it as work to the fence when it's signaled, and I
>>>>>> may want to do something like that already for performance reasons in a different place,
>>>>>> so I guess it doesn't matter.
>>>>> Actions to be performed on fence signaling tend to be very cpu consuming, I think due to the context switches involved.
>>>>> We had to replace that in the old psb driver and batch things like TTM does instead.
>>>>>
>>>>> Also remember that TTM fences are not required to signal in finite time unless fence_flush is called.
>>>>>
>>>>> I think nouveau doesn't use fence irqs to signal its fences.
>>>>>
>>>>>> Is calling move_notify(bo, NULL) legal and a noop the second time?
>>>>> I see no fundamental reason why it shouldn't be OK, although we might need to patch drivers to cope with it.
>>>>>
>>>>>>     That would save a flag in the bo to check if it's called already,
>>>>>> although I suppose we could always define a TTM_BO_PRIV_FLAG_* for it otherwise.
>>>>>>
>>>>>> move_notify might end up being called with the lru_lock held, but that shouldn't be a problem.
>>>>> I don't think that's a good idea. Drivers sleeping in move_notify will need to release the spinlock, and that means it's
>>>>> better to release it before move_notify is called.
>>>> Is the only sleeping being done on fences? In that case we might wish to split it up in 2 pieces for destruction,
>>>> the piece that runs immediately, and a piece to run after the new fence has signaled (current behavior).
>>>>
>>>> Nouveau needs the final move_notify unmap to be called after object is idle, like it is now. It doesn't need
>>>> to attach a new fence.
>>> In that case it might be best to worry about asynchronous stuff later?
>>> We will eventually implement it on the new vmwgfx hardware revision, but it's not ready yet.
>>>
>>> /Thomas
>> Ok sounds good.
>>
>> In that case what do you want me to change from the first 4 patches apart from more verbose commit messages?
>> - 03/10 I got that I need to re-add the list_empty check after -EBUSY was returned in evict_mem_first.
>>
>> Also PATCH 05/10 cleans up the spinning in ttm_bo_cleanup_refs, so I hope it's ok that it's a big
>> ugly in 04/10, as long as it doesn't result in any new bugs being introduced.
>>
>> ~Maarten
>>
>> PS: I did a plain rebase of my git tree to deal with the conflicts in drm-next.
>>
>
> Maarten, it seems to me the purpose of the patches are the following (not necessarily the correct order).
>
> 1) Change fence lock locking order w r t LRU lock - should be a trivial and very small change.
Hm yeah, this seems to be small in itself if I only do that.

> 2) Change reservations from lists to always be trylock, skipping already reserved bos.
Yeah, but unfortunately this was easier to be done after some of the cleanups.

> 3) Remove the lru lock around reservations.
This is a separate patch, but unfortunately dependent on all previous optimizations/cleanups.
> 4) Various optimizations / cleanups.
This was a bit harder, some of the changes are a lot easier with the cleanups/optimizations done first.

I need the cleanup_refs changes before the reservation trylock change, since they cause cleanup_refs to be
called with reservation and lru lock held, this will prevent ever blocking inside that function on something
other than wait.

The real reason those patches are in this order is because some patches can only be done after some
previous changes have been made first. However I can decrease the amount of changes slightly,
I was thinking of this:

0. change cleanup_refs_or_queue order of reservation and wait check
1. fence_lock <-> lru_lock nesting change, needed for patch 3
- small patch if I only focus on the inversion itself, I fear there is no race free to do this
without squashing this with patch 0. There will be a race otherwise where we waited on the
previous fence and a new fence was attached between unlocking fence lock and locking lru lock.
Both patches only touch ttm_bo_cleanup_refs_or_queue (and 2 lines in ttm_eu_fence_buffer_objects),
so it won't affect reviewability much if it's done in 1 patch only.

2. fix radeon move_notify to be callable without reservation, needed for patch 3
- small patch

3. call ttm_bo_cleanup_refs with reservation and lru lock held, drop the looping in ttm_bo_cleanup_refs, makes patch 4 and 5 easier
- instead of touching this function multiple times, just touch it first, to the final cleaner form to be done with it.

4. loop the trylocking in swapout
5. loop the trylocking in ttm_mem_evict_first
6. drop now unused no_wait_reserve argument from ttm_mem_evict_first
- separate patch since it's just touching a lot of functions without any functional change

Bonus series, those are probably independent patches, but might depend on above first:
7. cleanup ttm_bo_force_list_clean's taking lru lock twice for every bo
- depends loosely on no_wait_reserve argument being dropped to apply cleanly, no real depends otherwise
8. lru lock is now no longer needed to protect reservations, cleanup
- depends on the whole series
9. replace calls to ttm_bo_wait_unreserved with ttm_bo_reserve_slowpath, maps a lot better to mutexes
- Probably best if I split this one up in 4 patches, first introduce new function, make nouveau/ttm_eu use it, then drop old function.
> If you could reorganize and make 4 patch series like this, it would be much easier to follow what happens and why, and would make it much easier for me to review. It seems to me if patch series 1-3 focus on the intended changes and the intended changes only, they would be quite small?
Would the above patches 1 to 6 in 1 series be ok too?

If so, a completely untested version is up at my git tree http://cgit.freedesktop.org/~mlankhorst/linux/log/

"drm/ttm: change fence_lock to inner lock" up to "drm/ttm: remove no_wait_reserve, v2"

At this point those patches are not even boot-tested, so I'll do some testing first before
resubmitting those.

The last 3 changes can be reviewed independently, but last 2 changes
are probably best committed in that order, else I need to fixup reserve_slowpath too.

~Maarten

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2012-11-27 12:35 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-12 14:00 [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst
2012-11-12 14:00 ` [PATCH 02/10] drm/ttm: remove ttm_bo_cleanup_memtype_use Maarten Lankhorst
2012-11-19 13:26   ` Thomas Hellstrom
2012-11-19 14:03     ` Maarten Lankhorst
2012-11-19 14:12       ` Thomas Hellstrom
2012-11-12 14:00 ` [PATCH 03/10] drm/ttm: do not check if list is empty in ttm_bo_force_list_clean Maarten Lankhorst
2012-11-19 13:33   ` Thomas Hellstrom
2012-11-19 14:10     ` Maarten Lankhorst
2012-11-20  7:42       ` Thomas Hellstrom
2012-11-12 14:00 ` [PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3 Maarten Lankhorst
2012-11-19 14:17   ` Thomas Hellstrom
2012-11-19 15:04     ` Thomas Hellstrom
2012-11-19 15:33       ` Maarten Lankhorst
2012-11-20  7:48         ` Thomas Hellstrom
2012-11-20 11:33           ` Maarten Lankhorst
2012-11-20 11:59             ` Maarten Lankhorst
2012-11-20 12:03             ` Thomas Hellstrom
2012-11-20 13:13               ` Maarten Lankhorst
2012-11-20 15:08                 ` Thomas Hellstrom
2012-11-21 11:38                   ` Maarten Lankhorst
2012-11-21 12:42                     ` Thomas Hellstrom
2012-11-21 13:12                       ` Maarten Lankhorst
2012-11-21 13:27                         ` Thomas Hellstrom
2012-11-22 15:51                           ` Maarten Lankhorst
2012-11-22 20:29                             ` Thomas Hellstrom
2012-11-27 12:35                               ` Maarten Lankhorst
2012-11-12 14:00 ` [PATCH 05/10] drm/ttm: add sense to ttm_bo_cleanup_refs, v4 Maarten Lankhorst
2012-11-12 14:00 ` [PATCH 06/10] drm/ttm: remove no_wait_reserve, v2 Maarten Lankhorst
2012-11-12 14:00 ` [PATCH 07/10] drm/ttm: cope with reserved buffers on swap list in ttm_bo_swapout Maarten Lankhorst
2012-11-12 14:00 ` [PATCH 08/10] drm/ttm: cope with reserved buffers on lru list in ttm_mem_evict_first Maarten Lankhorst
2012-11-12 14:00 ` [PATCH 09/10] drm/ttm: remove lru_lock around ttm_bo_reserve Maarten Lankhorst
2012-11-12 14:00 ` [PATCH 10/10] drm/ttm: remove reliance on ttm_bo_wait_unreserved Maarten Lankhorst
2012-11-12 14:03 ` [PATCH 01/10] drm/radeon: allow move_notify to be called without reservation Maarten Lankhorst

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).