All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: matthew.auld@intel.com
Subject: [PATCH 14/15] drm/i915: Protect request retirement with timeline->mutex
Date: Wed,  3 Jul 2019 10:17:25 +0100	[thread overview]
Message-ID: <20190703091726.11690-14-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190703091726.11690-1-chris@chris-wilson.co.uk>

Forgo the struct_mutex requirement for request retirement as we have
been transitioning over to only using the timeline->mutex for
controlling the lifetime of a request on that timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 192 ++++++++++--------
 drivers/gpu/drm/i915/gt/intel_context.h       |  25 +--
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   1 -
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   2 -
 drivers/gpu/drm/i915/gt/intel_gt.c            |   1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   2 -
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  13 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   1 -
 drivers/gpu/drm/i915/i915_request.c           | 151 +++++++-------
 drivers/gpu/drm/i915/i915_request.h           |   3 -
 11 files changed, 203 insertions(+), 189 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f43eaaa5db5f..80c9c57a302f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -739,63 +739,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
-	struct i915_request *rq;
-
-	/*
-	 * Completely unscientific finger-in-the-air estimates for suitable
-	 * maximum user request size (to avoid blocking) and then backoff.
-	 */
-	if (intel_ring_update_space(ring) >= PAGE_SIZE)
-		return NULL;
-
-	/*
-	 * Find a request that after waiting upon, there will be at least half
-	 * the ring available. The hysteresis allows us to compete for the
-	 * shared ring and should mean that we sleep less often prior to
-	 * claiming our resources, but not so long that the ring completely
-	 * drains before we can submit our next request.
-	 */
-	list_for_each_entry(rq, &ring->request_list, ring_link) {
-		if (__intel_ring_space(rq->postfix,
-				       ring->emit, ring->size) > ring->size / 2)
-			break;
-	}
-	if (&rq->ring_link == &ring->request_list)
-		return NULL; /* weird, we will check again later for real */
-
-	return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
-	struct i915_request *rq;
-	int ret = 0;
-
-	/*
-	 * Apply a light amount of backpressure to prevent excessive hogs
-	 * from blocking waiting for space whilst holding struct_mutex and
-	 * keeping all of their resources pinned.
-	 */
-
-	rq = __eb_wait_for_ring(eb->context->ring);
-	if (rq) {
-		mutex_unlock(&eb->i915->drm.struct_mutex);
-
-		if (i915_request_wait(rq,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT) < 0)
-			ret = -EINTR;
-
-		i915_request_put(rq);
-
-		mutex_lock(&eb->i915->drm.struct_mutex);
-	}
-
-	return ret;
-}
-
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -2122,10 +2065,75 @@ static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
+{
+	struct intel_ring *ring = ce->ring;
+	struct intel_timeline *tl = ring->timeline;
+	struct i915_request *rq;
+
+	/*
+	 * Completely unscientific finger-in-the-air estimates for suitable
+	 * maximum user request size (to avoid blocking) and then backoff.
+	 */
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hysteresis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &tl->requests, link) {
+		if (rq->ring != ring)
+			continue;
+
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->link == &tl->requests)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 {
 	int err;
 
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+	if (err)
+		return err;
+
+	err = __intel_context_do_pin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	mutex_lock(&eb->i915->drm.struct_mutex);
+	intel_context_unpin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+	struct i915_request *rq;
+	int err;
+
 	/*
 	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 	 * EIO if the GPU is already wedged.
@@ -2139,7 +2147,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = intel_context_pin(ce);
+	err = __eb_pin_context(eb, ce);
 	if (err)
 		return err;
 
@@ -2150,29 +2158,52 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * wakeref that we hold until the GPU has been idle for at least
 	 * 100ms.
 	 */
-	err = intel_context_timeline_lock(ce);
-	if (err)
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl)) {
+		err = PTR_ERR(tl);
 		goto err_unpin;
+	}
 
 	intel_context_enter(ce);
-	intel_context_timeline_unlock(ce);
+	rq = eb_throttle(ce);
+
+	intel_context_timeline_unlock(tl);
+
+	if (rq) {
+		if (i915_request_wait(rq,
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0) {
+			i915_request_put(rq);
+			err = -EINTR;
+			goto err_exit;
+		}
+
+		i915_request_put(rq);
+	}
 
 	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
 
+err_exit:
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	intel_context_timeline_unlock(tl);
 err_unpin:
-	intel_context_unpin(ce);
+	__eb_unpin_context(eb, ce);
 	return err;
 }
 
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
 {
-	__intel_context_timeline_lock(eb->context);
-	intel_context_exit(eb->context);
-	intel_context_timeline_unlock(eb->context);
+	struct intel_context *ce = eb->context;
+	struct intel_timeline *tl = ce->ring->timeline;
+
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	intel_context_timeline_unlock(tl);
 
-	intel_context_unpin(eb->context);
+	__eb_unpin_context(eb, ce);
 }
 
 static unsigned int
@@ -2217,9 +2248,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }
 
 static int
-eb_select_engine(struct i915_execbuffer *eb,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+	      struct drm_file *file,
+	      struct drm_i915_gem_execbuffer2 *args)
 {
 	struct intel_context *ce;
 	unsigned int idx;
@@ -2234,7 +2265,7 @@ eb_select_engine(struct i915_execbuffer *eb,
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	err = eb_pin_context(eb, ce);
+	err = __eb_pin_engine(eb, ce);
 	intel_context_put(ce);
 
 	return err;
@@ -2452,16 +2483,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto err_context;
-
-	err = eb_select_engine(&eb, file, args);
+	err = eb_pin_engine(&eb, file, args);
 	if (unlikely(err))
-		goto err_unlock;
+		goto err_context;
 
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
-	if (unlikely(err))
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
 		goto err_engine;
 
 	err = eb_relocate(&eb);
@@ -2615,10 +2642,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
-err_engine:
-	eb_unpin_context(&eb);
-err_unlock:
 	mutex_unlock(&dev->struct_mutex);
+err_engine:
+	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 065ba4ac4e87..38b60cbf2592 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_timeline_types.h"
 
 void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
@@ -126,24 +127,24 @@ static inline void intel_context_put(struct intel_context *ce)
 	kref_put(&ce->ref, ce->ops->destroy);
 }
 
-static inline void
-__intel_context_timeline_lock(struct intel_context *ce)
-	__acquires(&ce->ring->timeline->mutex)
-{
-	mutex_lock(&ce->ring->timeline->mutex);
-}
-
-static inline int __must_check
+static inline struct intel_timeline *__must_check
 intel_context_timeline_lock(struct intel_context *ce)
 	__acquires(&ce->ring->timeline->mutex)
 {
-	return mutex_lock_interruptible(&ce->ring->timeline->mutex);
+	struct intel_timeline *tl = ce->ring->timeline;
+	int err;
+
+	err = mutex_lock_interruptible(&tl->mutex);
+	if (err)
+		return ERR_PTR(err);
+
+	return tl;
 }
 
-static inline void intel_context_timeline_unlock(struct intel_context *ce)
-	__releases(&ce->ring->timeline->mutex)
+static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
+	__releases(&tl->mutex)
 {
-	mutex_unlock(&ce->ring->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 struct i915_request *intel_context_create_request(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d1508f0b4c84..b27fc555fe09 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -745,7 +745,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 				engine->status_page.vma))
 		goto out_frame;
 
-	INIT_LIST_HEAD(&frame->ring.request_list);
 	frame->ring.timeline = &frame->timeline;
 	frame->ring.vaddr = frame->cs;
 	frame->ring.size = sizeof(frame->cs);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7e056114344e..0dde7e04b102 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -69,8 +69,6 @@ struct intel_ring {
 	void *vaddr;
 
 	struct intel_timeline *timeline;
-	struct list_head request_list;
-	struct list_head active_link;
 
 	/*
 	 * As we have two types of rings, one global to the engine used
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8cca6b22b386..46d24d9d62ac 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -14,7 +14,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	gt->i915 = i915;
 	gt->uncore = &i915->uncore;
 
-	INIT_LIST_HEAD(&gt->active_rings);
 	INIT_LIST_HEAD(&gt->closed_vma);
 
 	spin_lock_init(&gt->closed_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index cfd41e6c54e1..f43ea830b1e8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -34,8 +34,6 @@ struct intel_gt {
 		struct list_head hwsp_free_list;
 	} timelines;
 
-	struct list_head active_rings;
-
 	struct intel_wakeref wakeref;
 
 	struct list_head closed_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 72c22b95cd9c..9ebd4f258227 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1535,6 +1535,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 {
 	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
+	intel_ring_reset(ce->ring, ce->ring->tail);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 81f9b0422e6a..b771170eb56a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1227,7 +1227,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 	GEM_TRACE("ring:%llx unpin\n", ring->timeline->fence_context);
 
 	/* Discard any unused bytes beyond that submitted to hw. */
-	intel_ring_reset(ring, ring->tail);
+	intel_ring_reset(ring, ring->emit);
 
 	GEM_BUG_ON(!ring->vma);
 	i915_vma_unset_ggtt_write(ring->vma);
@@ -1293,7 +1293,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ring->ref);
-	INIT_LIST_HEAD(&ring->request_list);
 	ring->timeline = intel_timeline_get(timeline);
 
 	ring->size = size;
@@ -1817,21 +1816,25 @@ static int ring_request_alloc(struct i915_request *request)
 
 static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 {
+	struct intel_timeline *tl = ring->timeline;
 	struct i915_request *target;
 	long timeout;
 
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
-	GEM_BUG_ON(list_empty(&ring->request_list));
-	list_for_each_entry(target, &ring->request_list, ring_link) {
+	GEM_BUG_ON(list_empty(&tl->requests));
+	list_for_each_entry(target, &tl->requests, link) {
+		if (target->ring != ring)
+			continue;
+
 		/* Would completion of this request free enough space? */
 		if (bytes <= __intel_ring_space(target->postfix,
 						ring->emit, ring->size))
 			break;
 	}
 
-	if (WARN_ON(&target->ring_link == &ring->request_list))
+	if (GEM_WARN_ON(&target->link == &tl->requests))
 		return -ENOSPC;
 
 	timeout = i915_request_wait(target,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index a48b36d31e65..5bcb461b8372 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -68,7 +68,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	ring->base.timeline = &ring->timeline;
 	atomic_set(&ring->base.pin_count, 1);
 
-	INIT_LIST_HEAD(&ring->base.request_list);
 	intel_ring_update_space(&ring->base);
 
 	return &ring->base;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5ff87c4a0cd5..2aabed8ed594 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -180,40 +180,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static void advance_ring(struct i915_request *request)
-{
-	struct intel_ring *ring = request->ring;
-	unsigned int tail;
-
-	/*
-	 * We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
-	if (list_is_last(&request->ring_link, &ring->request_list)) {
-		/*
-		 * We may race here with execlists resubmitting this request
-		 * as we retire it. The resubmission will move the ring->tail
-		 * forwards (to request->wa_tail). We either read the
-		 * current value that was written to hw, or the value that
-		 * is just about to be. Either works, if we miss the last two
-		 * noops - they are safe to be replayed on a reset.
-		 */
-		tail = READ_ONCE(request->tail);
-		list_del(&ring->active_link);
-	} else {
-		tail = request->postfix;
-	}
-	list_del_init(&request->ring_link);
-
-	ring->head = tail;
-}
-
 static void free_capture_list(struct i915_request *request)
 {
 	struct i915_capture_list *capture;
@@ -231,7 +197,7 @@ static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&rq->timeline->mutex);
 	if (!i915_request_completed(rq))
 		return false;
 
@@ -243,7 +209,17 @@ static bool i915_request_retire(struct i915_request *rq)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 	trace_i915_request_retire(rq);
 
-	advance_ring(rq);
+	/*
+	 * We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
+	rq->ring->head = rq->postfix;
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -320,7 +296,7 @@ static bool i915_request_retire(struct i915_request *rq)
 
 void i915_request_retire_upto(struct i915_request *rq)
 {
-	struct intel_ring *ring = rq->ring;
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *tmp;
 
 	GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -328,15 +304,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 		  rq->fence.context, rq->fence.seqno,
 		  hwsp_seqno(rq));
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&tl->mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
-	if (list_empty(&rq->ring_link))
-		return;
-
 	do {
-		tmp = list_first_entry(&ring->request_list,
-				       typeof(*tmp), ring_link);
+		tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
@@ -563,29 +535,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static void ring_retire_requests(struct intel_ring *ring)
+static void retire_requests(struct intel_timeline *tl)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
 		if (!i915_request_retire(rq))
 			break;
 }
 
 static noinline struct i915_request *
-request_alloc_slow(struct intel_context *ce, gfp_t gfp)
+request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
 {
-	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
 
-	if (list_empty(&ring->request_list))
+	if (list_empty(&tl->requests))
 		goto out;
 
 	if (!gfpflags_allow_blocking(gfp))
 		goto out;
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
 	i915_request_retire(rq);
 
 	rq = kmem_cache_alloc(global.slab_requests,
@@ -594,11 +565,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 		return rq;
 
 	/* Ratelimit ourselves to prevent oom from malicious clients */
-	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_last_entry(&tl->requests, typeof(*rq), link);
 	cond_synchronize_rcu(rq->rcustate);
 
 	/* Retire our old requests in the hope that we free some */
-	ring_retire_requests(ring);
+	retire_requests(tl);
 
 out:
 	return kmem_cache_alloc(global.slab_requests, gfp);
@@ -649,7 +620,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq = kmem_cache_alloc(global.slab_requests,
 			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = request_alloc_slow(ce, gfp);
+		rq = request_alloc_slow(tl, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
@@ -741,15 +712,15 @@ struct i915_request *
 i915_request_create(struct intel_context *ce)
 {
 	struct i915_request *rq;
-	int err;
+	struct intel_timeline *tl;
 
-	err = intel_context_timeline_lock(ce);
-	if (err)
-		return ERR_PTR(err);
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return ERR_CAST(tl);
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
+	if (!list_is_last(&rq->link, &tl->requests))
 		i915_request_retire(rq);
 
 	intel_context_enter(ce);
@@ -759,22 +730,22 @@ i915_request_create(struct intel_context *ce)
 		goto err_unlock;
 
 	/* Check that we do not interrupt ourselves with a new request */
-	rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
+	rq->cookie = lockdep_pin_lock(&tl->mutex);
 
 	return rq;
 
 err_unlock:
-	intel_context_timeline_unlock(ce);
+	intel_context_timeline_unlock(tl);
 	return rq;
 }
 
 static int
 i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 {
-	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+	if (list_is_first(&signal->link, &signal->ring->timeline->requests))
 		return 0;
 
-	signal = list_prev_entry(signal, ring_link);
+	signal = list_prev_entry(signal, link);
 	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
 		return 0;
 
@@ -1167,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	 */
 	GEM_BUG_ON(rq->reserved_space > ring->space);
 	rq->reserved_space = 0;
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1180,11 +1152,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 
 	prev = __i915_request_add_to_timeline(rq);
 
-	list_add_tail(&rq->ring_link, &ring->request_list);
-	if (list_is_first(&rq->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &rq->i915->gt.active_rings);
-	rq->emitted_jiffies = jiffies;
-
 	/*
 	 * Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
@@ -1237,10 +1204,11 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 
 void i915_request_add(struct i915_request *rq)
 {
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *prev;
 
-	lockdep_assert_held(&rq->timeline->mutex);
-	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+	lockdep_assert_held(&tl->mutex);
+	lockdep_unpin_lock(&tl->mutex, rq->cookie);
 
 	trace_i915_request_add(rq);
 
@@ -1263,10 +1231,10 @@ void i915_request_add(struct i915_request *rq)
 	 * work on behalf of others -- but instead we should benefit from
 	 * improved resource management. (Well, that's the theory at least.)
 	 */
-	if (prev && i915_request_completed(prev))
+	if (prev && i915_request_completed(prev) && prev->timeline == tl)
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&rq->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
@@ -1487,18 +1455,43 @@ long i915_request_wait(struct i915_request *rq,
 
 bool i915_retire_requests(struct drm_i915_private *i915)
 {
-	struct intel_ring *ring, *tmp;
+	struct intel_gt_timelines *timelines = &i915->gt.timelines;
+	struct intel_timeline *tl, *tn;
+	LIST_HEAD(free);
+
+	spin_lock(&timelines->lock);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		if (!mutex_trylock(&tl->mutex))
+			continue;
+
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!tl->active_count);
+		tl->active_count++; /* pin the list element */
+		spin_unlock(&timelines->lock);
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+		retire_requests(tl);
 
-	list_for_each_entry_safe(ring, tmp,
-				 &i915->gt.active_rings, active_link) {
-		intel_ring_get(ring); /* last rq holds reference! */
-		ring_retire_requests(ring);
-		intel_ring_put(ring);
+		spin_lock(&timelines->lock);
+
+		/* Restart iteration after dropping lock */
+		list_safe_reset_next(tl, tn, link);
+		if (!--tl->active_count)
+			list_del(&tl->link);
+
+		mutex_unlock(&tl->mutex);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(tl->active_count);
+			list_add(&tl->link, &free);
+		}
 	}
+	spin_unlock(&timelines->lock);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
 
-	return !list_empty(&i915->gt.active_rings);
+	return !list_empty(&timelines->active_list);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index b58ceef92e20..a6b1e5f43949 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -221,9 +221,6 @@ struct i915_request {
 	/** timeline->request entry for this request */
 	struct list_head link;
 
-	/** ring->request_list entry for this request */
-	struct list_head ring_link;
-
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_link;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-07-03  9:17 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-03  9:17 [PATCH 01/15] drm/i915/selftests: Common live setup/teardown Chris Wilson
2019-07-03  9:17 ` [PATCH 02/15] drm/i915/selftests: Lock the drm_mm while modifying Chris Wilson
2019-07-03  9:50   ` Matthew Auld
2019-07-03  9:17 ` [PATCH 03/15] drm/i915/gt: Use caller provided forcewake for intel_mocs_init_engine Chris Wilson
2019-07-03 11:23   ` Tvrtko Ursulin
2019-07-03 11:47     ` Chris Wilson
2019-07-03 12:01       ` Tvrtko Ursulin
2019-07-03  9:17 ` [PATCH 04/15] drm/i915/gt: Assume we hold forcewake for execlists resume Chris Wilson
2019-07-03 12:03   ` Tvrtko Ursulin
2019-07-03  9:17 ` [PATCH 05/15] drm/i915/gt: Ignore forcewake acquisition for posting_reads Chris Wilson
2019-07-03 11:26   ` Tvrtko Ursulin
2019-07-03 11:44     ` Chris Wilson
2019-07-03  9:17 ` [PATCH 06/15] drm/i915/gem: Free pages before rcu-freeing the object Chris Wilson
2019-07-03 10:37   ` Matthew Auld
2019-07-03  9:17 ` [PATCH 07/15] drm/i915: Markup potential lock for i915_active Chris Wilson
2019-07-03 10:38   ` Matthew Auld
2019-07-03  9:17 ` [PATCH 08/15] drm/i915: Mark up vma->active as safe for use inside shrinkers Chris Wilson
2019-07-03 10:53   ` Matthew Auld
2019-07-03  9:17 ` [PATCH 09/15] drm/i915/execlists: Hesitate before slicing Chris Wilson
2019-07-03  9:17 ` [PATCH 10/15] drm/i915: Teach execbuffer to take the engine wakeref not GT Chris Wilson
2019-07-03  9:17 ` [PATCH 11/15] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
2019-07-03  9:17 ` [PATCH 12/15] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
2019-07-03  9:17 ` [PATCH 13/15] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
2019-07-03  9:17 ` Chris Wilson [this message]
2019-07-03  9:17 ` [PATCH 15/15] drm/i915: Replace struct_mutex for batch pool serialisation Chris Wilson
2019-07-03  9:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/15] drm/i915/selftests: Common live setup/teardown Patchwork
2019-07-03  9:47 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-07-03  9:48 ` [PATCH 01/15] " Matthew Auld
2019-07-03 10:04   ` Chris Wilson
2019-07-03  9:59 ` ✓ Fi.CI.BAT: success for series starting with [01/15] " Patchwork
2019-07-04  3:22 ` ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190703091726.11690-14-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.