All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 08/18] drm/i915: Protect request retirement with timeline->mutex
Date: Mon, 12 Aug 2019 14:39:05 +0100	[thread overview]
Message-ID: <20190812133915.18824-8-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190812133915.18824-1-chris@chris-wilson.co.uk>

Forgo the struct_mutex requirement for request retirement as we have
been transitioning over to only using the timeline->mutex for
controlling the lifetime of a request on that timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 183 ++++++++++--------
 drivers/gpu/drm/i915/gt/intel_context.h       |  18 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   1 -
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   3 -
 drivers/gpu/drm/i915/gt/intel_gt.c            |   2 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   2 -
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  19 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   1 -
 drivers/gpu/drm/i915/gt/selftest_context.c    |   9 +-
 drivers/gpu/drm/i915/i915_request.c           | 156 +++++++--------
 drivers/gpu/drm/i915/i915_request.h           |   3 -
 12 files changed, 209 insertions(+), 189 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 533db2b1fae9..b8432c3437e9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -735,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
-	struct i915_request *rq;
-
-	/*
-	 * Completely unscientific finger-in-the-air estimates for suitable
-	 * maximum user request size (to avoid blocking) and then backoff.
-	 */
-	if (intel_ring_update_space(ring) >= PAGE_SIZE)
-		return NULL;
-
-	/*
-	 * Find a request that after waiting upon, there will be at least half
-	 * the ring available. The hysteresis allows us to compete for the
-	 * shared ring and should mean that we sleep less often prior to
-	 * claiming our resources, but not so long that the ring completely
-	 * drains before we can submit our next request.
-	 */
-	list_for_each_entry(rq, &ring->request_list, ring_link) {
-		if (__intel_ring_space(rq->postfix,
-				       ring->emit, ring->size) > ring->size / 2)
-			break;
-	}
-	if (&rq->ring_link == &ring->request_list)
-		return NULL; /* weird, we will check again later for real */
-
-	return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
-	struct i915_request *rq;
-	int ret = 0;
-
-	/*
-	 * Apply a light amount of backpressure to prevent excessive hogs
-	 * from blocking waiting for space whilst holding struct_mutex and
-	 * keeping all of their resources pinned.
-	 */
-
-	rq = __eb_wait_for_ring(eb->context->ring);
-	if (rq) {
-		mutex_unlock(&eb->i915->drm.struct_mutex);
-
-		if (i915_request_wait(rq,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT) < 0)
-			ret = -EINTR;
-
-		i915_request_put(rq);
-
-		mutex_lock(&eb->i915->drm.struct_mutex);
-	}
-
-	return ret;
-}
-
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -2132,10 +2075,75 @@ static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
+{
+	struct intel_ring *ring = ce->ring;
+	struct intel_timeline *tl = ce->timeline;
+	struct i915_request *rq;
+
+	/*
+	 * Completely unscientific finger-in-the-air estimates for suitable
+	 * maximum user request size (to avoid blocking) and then backoff.
+	 */
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hysteresis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &tl->requests, link) {
+		if (rq->ring != ring)
+			continue;
+
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->link == &tl->requests)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 {
 	int err;
 
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+	if (err)
+		return err;
+
+	err = __intel_context_do_pin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	mutex_lock(&eb->i915->drm.struct_mutex);
+	intel_context_unpin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+	struct i915_request *rq;
+	int err;
+
 	/*
 	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 	 * EIO if the GPU is already wedged.
@@ -2149,7 +2157,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = intel_context_pin(ce);
+	err = __eb_pin_context(eb, ce);
 	if (err)
 		return err;
 
@@ -2161,23 +2169,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * until the timeline is idle, which in turn releases the wakeref
 	 * taken on the engine, and the parent device.
 	 */
-	err = intel_context_timeline_lock(ce);
-	if (err)
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl)) {
+		err = PTR_ERR(tl);
 		goto err_unpin;
+	}
 
 	intel_context_enter(ce);
-	intel_context_timeline_unlock(ce);
+	rq = eb_throttle(ce);
+
+	intel_context_timeline_unlock(tl);
+
+	if (rq) {
+		if (i915_request_wait(rq,
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0) {
+			i915_request_put(rq);
+			err = -EINTR;
+			goto err_exit;
+		}
+
+		i915_request_put(rq);
+	}
 
 	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
 
+err_exit:
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	intel_context_timeline_unlock(tl);
 err_unpin:
-	intel_context_unpin(ce);
+	__eb_unpin_context(eb, ce);
 	return err;
 }
 
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
 {
 	struct intel_context *ce = eb->context;
 	struct intel_timeline *tl = ce->timeline;
@@ -2186,7 +2214,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
 	intel_context_exit(ce);
 	mutex_unlock(&tl->mutex);
 
-	intel_context_unpin(ce);
+	__eb_unpin_context(eb, ce);
 }
 
 static unsigned int
@@ -2231,9 +2259,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }
 
 static int
-eb_select_engine(struct i915_execbuffer *eb,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+	      struct drm_file *file,
+	      struct drm_i915_gem_execbuffer2 *args)
 {
 	struct intel_context *ce;
 	unsigned int idx;
@@ -2248,7 +2276,7 @@ eb_select_engine(struct i915_execbuffer *eb,
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	err = eb_pin_context(eb, ce);
+	err = __eb_pin_engine(eb, ce);
 	intel_context_put(ce);
 
 	return err;
@@ -2466,16 +2494,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto err_context;
-
-	err = eb_select_engine(&eb, file, args);
+	err = eb_pin_engine(&eb, file, args);
 	if (unlikely(err))
-		goto err_unlock;
+		goto err_context;
 
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
-	if (unlikely(err))
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
 		goto err_engine;
 
 	err = eb_relocate(&eb);
@@ -2633,10 +2657,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
-err_engine:
-	eb_unpin_context(&eb);
-err_unlock:
 	mutex_unlock(&dev->struct_mutex);
+err_engine:
+	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 9fa8b588f18e..053a1307ecb4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_timeline_types.h"
 
 void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
@@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce)
 	kref_put(&ce->ref, ce->ops->destroy);
 }
 
-static inline int __must_check
+static inline struct intel_timeline *__must_check
 intel_context_timeline_lock(struct intel_context *ce)
 	__acquires(&ce->timeline->mutex)
 {
-	return mutex_lock_interruptible(&ce->timeline->mutex);
+	struct intel_timeline *tl = ce->timeline;
+	int err;
+
+	err = mutex_lock_interruptible(&tl->mutex);
+	if (err)
+		return ERR_PTR(err);
+
+	return tl;
 }
 
-static inline void intel_context_timeline_unlock(struct intel_context *ce)
-	__releases(&ce->timeline->mutex)
+static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
+	__releases(&tl->mutex)
 {
-	mutex_unlock(&ce->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 int intel_context_prepare_remote_request(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 13a569907c3d..8a9d3cd2c31c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -679,7 +679,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 				engine->status_page.vma))
 		goto out_frame;
 
-	INIT_LIST_HEAD(&frame->ring.request_list);
 	frame->ring.vaddr = frame->cs;
 	frame->ring.size = sizeof(frame->cs);
 	frame->ring.effective_size = frame->ring.size;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a0f372807dd4..9965a32601d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -69,9 +69,6 @@ struct intel_ring {
 	struct i915_vma *vma;
 	void *vaddr;
 
-	struct list_head request_list;
-	struct list_head active_link;
-
 	/*
 	 * As we have two types of rings, one global to the engine used
 	 * by ringbuffer submission and those that are exclusive to a
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index c543467a8a1c..b5277632137c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -13,9 +13,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	gt->i915 = i915;
 	gt->uncore = &i915->uncore;
 
-	INIT_LIST_HEAD(&gt->active_rings);
 	INIT_LIST_HEAD(&gt->closed_vma);
-
 	spin_lock_init(&gt->closed_lock);
 
 	intel_gt_init_hangcheck(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 3b34b658de3f..8da7b9f1f46e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -48,8 +48,6 @@ struct intel_gt {
 		struct list_head hwsp_free_list;
 	} timelines;
 
-	struct list_head active_rings;
-
 	struct intel_wakeref wakeref;
 
 	struct list_head closed_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b5618e6c1361..71c69381a1aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1619,6 +1619,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 {
 	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
+	intel_ring_reset(ce->ring, ce->ring->tail);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index be170b10d92f..8ce5a55427c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1248,7 +1248,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 		return;
 
 	/* Discard any unused bytes beyond that submitted to hw. */
-	intel_ring_reset(ring, ring->tail);
+	intel_ring_reset(ring, ring->emit);
 
 	i915_vma_unset_ggtt_write(vma);
 	if (i915_vma_is_map_and_fenceable(vma))
@@ -1309,7 +1309,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ring->ref);
-	INIT_LIST_HEAD(&ring->request_list);
 
 	ring->size = size;
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -1863,7 +1862,10 @@ static int ring_request_alloc(struct i915_request *request)
 	return 0;
 }
 
-static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
+static noinline int
+wait_for_space(struct intel_ring *ring,
+	       struct intel_timeline *tl,
+	       unsigned int bytes)
 {
 	struct i915_request *target;
 	long timeout;
@@ -1871,15 +1873,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
-	GEM_BUG_ON(list_empty(&ring->request_list));
-	list_for_each_entry(target, &ring->request_list, ring_link) {
+	GEM_BUG_ON(list_empty(&tl->requests));
+	list_for_each_entry(target, &tl->requests, link) {
+		if (target->ring != ring)
+			continue;
+
 		/* Would completion of this request free enough space? */
 		if (bytes <= __intel_ring_space(target->postfix,
 						ring->emit, ring->size))
 			break;
 	}
 
-	if (WARN_ON(&target->ring_link == &ring->request_list))
+	if (GEM_WARN_ON(&target->link == &tl->requests))
 		return -ENOSPC;
 
 	timeout = i915_request_wait(target,
@@ -1946,7 +1951,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 		 */
 		GEM_BUG_ON(!rq->reserved_space);
 
-		ret = wait_for_space(ring, total_bytes);
+		ret = wait_for_space(ring, rq->timeline, total_bytes);
 		if (unlikely(ret))
 			return ERR_PTR(ret);
 	}
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 54a11dde3076..5d43cbc3f345 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -58,7 +58,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	ring->vaddr = (void *)(ring + 1);
 	atomic_set(&ring->pin_count, 1);
 
-	INIT_LIST_HEAD(&ring->request_list);
 	intel_ring_update_space(ring);
 
 	return ring;
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index da9c49e2adaf..6fbc72bc290e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq)
 
 	i915_request_add(rq);
 	timeout = i915_request_wait(rq, 0, HZ / 10);
-	if (timeout < 0)
+	if (timeout < 0) {
 		err = timeout;
-	else
+	} else {
+		mutex_lock(&rq->timeline->mutex);
 		i915_request_retire_upto(rq);
+		mutex_unlock(&rq->timeline->mutex);
+	}
 
 	i915_request_put(rq);
 
@@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce)
 	struct intel_timeline *tl = ce->timeline;
 	int err = 0;
 
+	mutex_lock(&tl->mutex);
 	do {
 		struct i915_request *rq;
 		long timeout;
@@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce)
 
 		i915_request_put(rq);
 	} while (!err);
+	mutex_unlock(&tl->mutex);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4703aab3ae21..74b611ab60a2 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -181,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static void advance_ring(struct i915_request *request)
-{
-	struct intel_ring *ring = request->ring;
-	unsigned int tail;
-
-	/*
-	 * We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
-	if (list_is_last(&request->ring_link, &ring->request_list)) {
-		/*
-		 * We may race here with execlists resubmitting this request
-		 * as we retire it. The resubmission will move the ring->tail
-		 * forwards (to request->wa_tail). We either read the
-		 * current value that was written to hw, or the value that
-		 * is just about to be. Either works, if we miss the last two
-		 * noops - they are safe to be replayed on a reset.
-		 */
-		tail = READ_ONCE(request->tail);
-		list_del(&ring->active_link);
-	} else {
-		tail = request->postfix;
-	}
-	list_del_init(&request->ring_link);
-
-	ring->head = tail;
-}
-
 static void free_capture_list(struct i915_request *request)
 {
 	struct i915_capture_list *capture;
@@ -232,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&rq->timeline->mutex);
 	if (!i915_request_completed(rq))
 		return false;
 
@@ -244,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 	trace_i915_request_retire(rq);
 
-	advance_ring(rq);
+	/*
+	 * We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
+	rq->ring->head = rq->postfix;
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -321,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq)
 
 void i915_request_retire_upto(struct i915_request *rq)
 {
-	struct intel_ring *ring = rq->ring;
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *tmp;
 
 	GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -329,15 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 		  rq->fence.context, rq->fence.seqno,
 		  hwsp_seqno(rq));
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&tl->mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
-	if (list_empty(&rq->ring_link))
-		return;
-
 	do {
-		tmp = list_first_entry(&ring->request_list,
-				       typeof(*tmp), ring_link);
+		tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
@@ -564,29 +536,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static void ring_retire_requests(struct intel_ring *ring)
+static void retire_requests(struct intel_timeline *tl)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
 		if (!i915_request_retire(rq))
 			break;
 }
 
 static noinline struct i915_request *
-request_alloc_slow(struct intel_context *ce, gfp_t gfp)
+request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
 {
-	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
 
-	if (list_empty(&ring->request_list))
+	if (list_empty(&tl->requests))
 		goto out;
 
 	if (!gfpflags_allow_blocking(gfp))
 		goto out;
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
 	i915_request_retire(rq);
 
 	rq = kmem_cache_alloc(global.slab_requests,
@@ -595,11 +566,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 		return rq;
 
 	/* Ratelimit ourselves to prevent oom from malicious clients */
-	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_last_entry(&tl->requests, typeof(*rq), link);
 	cond_synchronize_rcu(rq->rcustate);
 
 	/* Retire our old requests in the hope that we free some */
-	ring_retire_requests(ring);
+	retire_requests(tl);
 
 out:
 	return kmem_cache_alloc(global.slab_requests, gfp);
@@ -650,7 +621,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq = kmem_cache_alloc(global.slab_requests,
 			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = request_alloc_slow(ce, gfp);
+		rq = request_alloc_slow(tl, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
@@ -742,15 +713,15 @@ struct i915_request *
 i915_request_create(struct intel_context *ce)
 {
 	struct i915_request *rq;
-	int err;
+	struct intel_timeline *tl;
 
-	err = intel_context_timeline_lock(ce);
-	if (err)
-		return ERR_PTR(err);
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return ERR_CAST(tl);
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
+	if (!list_is_last(&rq->link, &tl->requests))
 		i915_request_retire(rq);
 
 	intel_context_enter(ce);
@@ -760,22 +731,22 @@ i915_request_create(struct intel_context *ce)
 		goto err_unlock;
 
 	/* Check that we do not interrupt ourselves with a new request */
-	rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
+	rq->cookie = lockdep_pin_lock(&tl->mutex);
 
 	return rq;
 
 err_unlock:
-	intel_context_timeline_unlock(ce);
+	intel_context_timeline_unlock(tl);
 	return rq;
 }
 
 static int
 i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 {
-	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+	if (list_is_first(&signal->link, &signal->timeline->requests))
 		return 0;
 
-	signal = list_prev_entry(signal, ring_link);
+	signal = list_prev_entry(signal, link);
 	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
 		return 0;
 
@@ -1155,7 +1126,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct intel_ring *ring = rq->ring;
-	struct i915_request *prev;
 	u32 *cs;
 
 	GEM_TRACE("%s fence %llx:%lld\n",
@@ -1168,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	 */
 	GEM_BUG_ON(rq->reserved_space > ring->space);
 	rq->reserved_space = 0;
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1179,14 +1150,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	GEM_BUG_ON(IS_ERR(cs));
 	rq->postfix = intel_ring_offset(rq, cs);
 
-	prev = __i915_request_add_to_timeline(rq);
-
-	list_add_tail(&rq->ring_link, &ring->request_list);
-	if (list_is_first(&rq->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &rq->i915->gt.active_rings);
-	rq->emitted_jiffies = jiffies;
-
-	return prev;
+	return __i915_request_add_to_timeline(rq);
 }
 
 void __i915_request_queue(struct i915_request *rq,
@@ -1214,10 +1178,11 @@ void __i915_request_queue(struct i915_request *rq,
 void i915_request_add(struct i915_request *rq)
 {
 	struct i915_sched_attr attr = rq->gem_context->sched;
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *prev;
 
-	lockdep_assert_held(&rq->timeline->mutex);
-	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+	lockdep_assert_held(&tl->mutex);
+	lockdep_unpin_lock(&tl->mutex, rq->cookie);
 
 	trace_i915_request_add(rq);
 
@@ -1266,10 +1231,10 @@ void i915_request_add(struct i915_request *rq)
 	 * work on behalf of others -- but instead we should benefit from
 	 * improved resource management. (Well, that's the theory at least.)
 	 */
-	if (prev && i915_request_completed(prev))
+	if (prev && i915_request_completed(prev) && prev->timeline == tl)
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&rq->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
@@ -1489,18 +1454,43 @@ long i915_request_wait(struct i915_request *rq,
 
 bool i915_retire_requests(struct drm_i915_private *i915)
 {
-	struct intel_ring *ring, *tmp;
+	struct intel_gt_timelines *timelines = &i915->gt.timelines;
+	struct intel_timeline *tl, *tn;
+	LIST_HEAD(free);
+
+	spin_lock(&timelines->lock);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		if (!mutex_trylock(&tl->mutex))
+			continue;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!tl->active_count);
+		tl->active_count++; /* pin the list element */
+		spin_unlock(&timelines->lock);
 
-	list_for_each_entry_safe(ring, tmp,
-				 &i915->gt.active_rings, active_link) {
-		intel_ring_get(ring); /* last rq holds reference! */
-		ring_retire_requests(ring);
-		intel_ring_put(ring);
+		retire_requests(tl);
+
+		spin_lock(&timelines->lock);
+
+		/* Restart iteration after dropping lock */
+		list_safe_reset_next(tl, tn, link);
+		if (!--tl->active_count)
+			list_del(&tl->link);
+
+		mutex_unlock(&tl->mutex);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(tl->active_count);
+			list_add(&tl->link, &free);
+		}
 	}
+	spin_unlock(&timelines->lock);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
 
-	return !list_empty(&i915->gt.active_rings);
+	return !list_empty(&timelines->active_list);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index fec1d5f17c94..8ac6e1226a56 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -223,9 +223,6 @@ struct i915_request {
 	/** timeline->request entry for this request */
 	struct list_head link;
 
-	/** ring->request_list entry for this request */
-	struct list_head ring_link;
-
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_link;
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-08-12 13:39 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
2019-08-12 13:38 ` [PATCH 02/18] drm/i915: Push the wakeref->count deferral to the backend Chris Wilson
2019-08-12 13:39 ` [PATCH 03/18] drm/i915/gt: Save/restore interrupts around breadcrumb disable Chris Wilson
2019-08-12 13:39 ` [PATCH 04/18] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
2019-08-12 13:39 ` [PATCH 05/18] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
2019-08-12 13:39 ` [PATCH 06/18] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
2019-08-12 13:39 ` [PATCH 07/18] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
2019-08-12 13:39 ` Chris Wilson [this message]
2019-08-12 13:39 ` [PATCH 09/18] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
2019-08-12 13:39 ` [PATCH 10/18] drm/i915: Forgo last_fence active request tracking Chris Wilson
2019-08-12 13:39 ` [PATCH 11/18] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
2019-08-12 15:38   ` Matthew Auld
2019-08-12 13:39 ` [PATCH 12/18] drm/i915: Extract intel_frontbuffer active tracking Chris Wilson
2019-08-12 13:39 ` [PATCH 13/18] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
2019-08-12 13:39 ` [PATCH 14/18] drm/i915: Remove logical HW ID Chris Wilson
2019-08-12 13:39 ` [PATCH 15/18] drm/i915: Move context management under GEM Chris Wilson
2019-08-12 13:39 ` [PATCH 16/18] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
2019-08-12 13:39 ` [PATCH 17/18] drm/i915: Drop GEM context as a direct link from i915_request Chris Wilson
2019-08-12 13:39 ` [PATCH 18/18] drm/i915: Push the use-semaphore marker onto the intel_context Chris Wilson
2019-08-12 14:11 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests Patchwork
2019-08-12 14:21 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-08-12 14:31 ` ✓ Fi.CI.BAT: success " Patchwork
2019-08-12 20:23 ` [PATCH 01/18] " Daniele Ceraolo Spurio
2019-08-12 20:36   ` [PATCH] " Chris Wilson
2019-08-19 10:06   ` [PATCH 01/18] " Janusz Krzysztofik
2019-08-12 20:59 ` ✗ Fi.CI.IGT: failure for series starting with [01/18] " Patchwork
2019-08-12 22:00 ` ✗ Fi.CI.BAT: failure for series starting with drm/i915/guc: Use a local cancel_port_requests (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190812133915.18824-8-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.