All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 07/21] drm/i915: Spin after waking up for an interrupt
Date: Fri,  3 Jun 2016 17:08:39 +0100	[thread overview]
Message-ID: <1464970133-29859-8-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1464970133-29859-1-git-send-email-chris@chris-wilson.co.uk>

When waiting for an interrupt (waiting for the GPU to complete some
work), we know we are the single waiter for the GPU. We also know when
the GPU has nearly completed our request (or at least started processing
it), so after being woken and we detect that the GPU is almost finished,
allow the bottom-half to spin for a very short while to reduce client
latencies.

The impact is minimal, there was an improvement to the realtime-vs-many
clients case, but exporting the function proves useful later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  2 +-
 drivers/gpu/drm/i915/i915_drv.h      | 26 +++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem.c      | 40 +++++++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
 5 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 48683538b4e2..0c287bf0d230 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -663,7 +663,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 					   i915_gem_request_get_seqno(work->flip_queued_req),
 					   dev_priv->next_seqno,
 					   engine->get_seqno(engine),
-					   i915_gem_request_completed(work->flip_queued_req, true));
+					   i915_gem_request_completed(work->flip_queued_req));
 			} else
 				seq_printf(m, "Flip not associated with any ring\n");
 			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 68b383d98457..b0460eda2113 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3219,24 +3219,27 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 	return (int32_t)(seq1 - seq2) >= 0;
 }
 
-static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
-					   bool lazy_coherency)
+static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
-	if (!lazy_coherency && req->engine->irq_seqno_barrier)
-		req->engine->irq_seqno_barrier(req->engine);
 	return i915_seqno_passed(req->engine->get_seqno(req->engine),
 				 req->previous_seqno);
 }
 
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
-					      bool lazy_coherency)
+static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
-	if (!lazy_coherency && req->engine->irq_seqno_barrier)
-		req->engine->irq_seqno_barrier(req->engine);
 	return i915_seqno_passed(req->engine->get_seqno(req->engine),
 				 req->seqno);
 }
 
+bool __i915_spin_request(const struct drm_i915_gem_request *request,
+			 int state, unsigned long timeout_us);
+static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
+				     int state, unsigned long timeout_us)
+{
+	return (i915_gem_request_started(request) &&
+		__i915_spin_request(request, state, timeout_us));
+}
+
 int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
 
@@ -3913,6 +3916,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *engine,
 
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *engine = req->engine;
+
 	/* Ensure our read of the seqno is coherent so that we
 	 * do not "miss an interrupt" (i.e. if this is the last
 	 * request and the seqno write from the GPU is not visible
@@ -3924,7 +3929,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 	 * but it is easier and safer to do it every time the waiter
 	 * is woken.
 	 */
-	if (i915_gem_request_completed(req, false))
+	if (engine->irq_seqno_barrier)
+		engine->irq_seqno_barrier(engine);
+
+	if (i915_gem_request_completed(req))
 		return true;
 
 	/* We need to check whether any gpu reset happened in between
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d08edb3d16f1..bf5c93f2bd81 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1155,9 +1155,9 @@ static bool busywait_stop(unsigned long timeout, unsigned cpu)
 	return this_cpu != cpu;
 }
 
-static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
+bool __i915_spin_request(const struct drm_i915_gem_request *req,
+			 int state, unsigned long timeout_us)
 {
-	unsigned long timeout;
 	unsigned cpu;
 
 	/* When waiting for high frequency requests, e.g. during synchronous
@@ -1170,19 +1170,15 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	/* Only spin if we know the GPU is processing this request */
-	if (!i915_gem_request_started(req, true))
-		return false;
-
-	timeout = local_clock_us(&cpu) + 5;
+	timeout_us += local_clock_us(&cpu);
 	do {
-		if (i915_gem_request_completed(req, true))
+		if (i915_gem_request_completed(req))
 			return true;
 
 		if (signal_pending_state(state, current))
 			break;
 
-		if (busywait_stop(timeout, cpu))
+		if (busywait_stop(timeout_us, cpu))
 			break;
 
 		cpu_relax_lowlatency();
@@ -1224,7 +1220,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	if (list_empty(&req->list))
 		return 0;
 
-	if (i915_gem_request_completed(req, true))
+	if (i915_gem_request_completed(req))
 		return 0;
 
 	timeout_remain = MAX_SCHEDULE_TIMEOUT;
@@ -1249,7 +1245,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
 	/* Optimistic spin for the next ~jiffie before touching IRQs */
-	if (__i915_spin_request(req, state))
+	if (i915_spin_request(req, state, 5))
 		goto complete;
 
 	intel_wait_init(&wait, req->seqno);
@@ -1290,6 +1286,10 @@ wakeup:
 		 */
 		if (__i915_request_irq_complete(req))
 			break;
+
+		/* Only spin if we know the GPU is processing this request */
+		if (i915_spin_request(req, state, 2))
+			break;
 	}
 	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
@@ -2805,8 +2805,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
 
+	/* We are called by the error capture and reset at a random
+	 * point in time. In particular, note that neither is crucially
+	 * ordered with an interrupt. After a hang, the GPU is dead and we
+	 * assume that no more writes can happen (we waited long enough for
+	 * all writes that were in transaction to be flushed) - adding an
+	 * extra delay for a recent interrupt is pointless. Hence, we do
+	 * not need an engine->irq_seqno_barrier() before the seqno reads.
+	 */
 	list_for_each_entry(request, &engine->request_list, list) {
-		if (i915_gem_request_completed(request, false))
+		if (i915_gem_request_completed(request))
 			continue;
 
 		return request;
@@ -2937,7 +2945,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 					   struct drm_i915_gem_request,
 					   list);
 
-		if (!i915_gem_request_completed(request, true))
+		if (!i915_gem_request_completed(request))
 			break;
 
 		i915_gem_request_retire(request);
@@ -2961,7 +2969,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 	}
 
 	if (unlikely(engine->trace_irq_req &&
-		     i915_gem_request_completed(engine->trace_irq_req, true))) {
+		     i915_gem_request_completed(engine->trace_irq_req))) {
 		engine->irq_put(engine);
 		i915_gem_request_assign(&engine->trace_irq_req, NULL);
 	}
@@ -3058,7 +3066,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 		if (req == NULL)
 			continue;
 
-		if (i915_gem_request_completed(req, true))
+		if (i915_gem_request_completed(req))
 			i915_gem_object_retire__read(obj, i);
 	}
 
@@ -3164,7 +3172,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	if (to == from)
 		return 0;
 
-	if (i915_gem_request_completed(from_req, true))
+	if (i915_gem_request_completed(from_req))
 		return 0;
 
 	if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2bc291ac7243..bb09ee6d1a3f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11590,7 +11590,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv,
 	vblank = intel_crtc_get_vblank_counter(intel_crtc);
 	if (work->flip_ready_vblank == 0) {
 		if (work->flip_queued_req &&
-		    !i915_gem_request_completed(work->flip_queued_req, true))
+		    !i915_gem_request_completed(work->flip_queued_req))
 			return false;
 
 		work->flip_ready_vblank = vblank;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 657a64fc2780..712bd0debb91 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7687,7 +7687,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
 	struct request_boost *boost = container_of(work, struct request_boost, work);
 	struct drm_i915_gem_request *req = boost->req;
 
-	if (!i915_gem_request_completed(req, true))
+	if (!i915_gem_request_completed(req))
 		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
 
 	i915_gem_request_unreference(req);
@@ -7701,7 +7701,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
 	if (req == NULL || INTEL_GEN(req->i915) < 6)
 		return;
 
-	if (i915_gem_request_completed(req, true))
+	if (i915_gem_request_completed(req))
 		return;
 
 	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-06-03 16:09 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-03 16:08 Breadcrumbs, again Chris Wilson
2016-06-03 16:08 ` [PATCH 01/21] drm/i915/shrinker: Flush active on objects before counting Chris Wilson
2016-06-03 16:08 ` [PATCH 02/21] drm/i915: Delay queuing hangcheck to wait-request Chris Wilson
2016-06-08  8:42   ` Daniel Vetter
2016-06-08  9:13     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 03/21] drm/i915: Remove the dedicated hangcheck workqueue Chris Wilson
2016-06-06 12:52   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 04/21] drm/i915: Make queueing the hangcheck work inline Chris Wilson
2016-06-03 16:08 ` [PATCH 05/21] drm/i915: Separate GPU hang waitqueue from advance Chris Wilson
2016-06-06 13:00   ` Tvrtko Ursulin
2016-06-07 12:11     ` Arun Siluvery
2016-06-03 16:08 ` [PATCH 06/21] drm/i915: Slaughter the thundering i915_wait_request herd Chris Wilson
2016-06-06 13:58   ` Tvrtko Ursulin
2016-06-03 16:08 ` Chris Wilson [this message]
2016-06-06 14:39   ` [PATCH 07/21] drm/i915: Spin after waking up for an interrupt Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 08/21] drm/i915: Use HWS for seqno tracking everywhere Chris Wilson
2016-06-06 14:55   ` Tvrtko Ursulin
2016-06-08  9:24     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 09/21] drm/i915: Stop mapping the scratch page into CPU space Chris Wilson
2016-06-06 15:03   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 10/21] drm/i915: Allocate scratch page from stolen Chris Wilson
2016-06-06 15:05   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 11/21] drm/i915: Refactor scratch object allocation for gen2 w/a buffer Chris Wilson
2016-06-06 15:09   ` Tvrtko Ursulin
2016-06-08  9:27     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 12/21] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) Chris Wilson
2016-06-03 16:08 ` [PATCH 13/21] drm/i915: Check the CPU cached value of seqno after waking the waiter Chris Wilson
2016-06-06 15:10   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 14/21] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted Chris Wilson
2016-06-06 15:34   ` Tvrtko Ursulin
2016-06-08  9:35     ` Chris Wilson
2016-06-08  9:57       ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 15/21] drm/i915: Stop setting wraparound seqno on initialisation Chris Wilson
2016-06-08  8:54   ` Daniel Vetter
2016-06-03 16:08 ` [PATCH 16/21] drm/i915: Only query timestamp when measuring elapsed time Chris Wilson
2016-06-06 13:50   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 17/21] drm/i915: Convert trace-irq to the breadcrumb waiter Chris Wilson
2016-06-07 12:04   ` Tvrtko Ursulin
2016-06-08  9:48     ` Chris Wilson
2016-06-08 10:16       ` Tvrtko Ursulin
2016-06-08 11:24         ` Chris Wilson
2016-06-08 11:47           ` Tvrtko Ursulin
2016-06-08 12:34             ` Chris Wilson
2016-06-08 12:44               ` Tvrtko Ursulin
2016-06-08 13:47                 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 18/21] drm/i915: Embed signaling node into the GEM request Chris Wilson
2016-06-07 12:31   ` Tvrtko Ursulin
2016-06-08  9:54     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 19/21] drm/i915: Move the get/put irq locking into the caller Chris Wilson
2016-06-07 12:46   ` Tvrtko Ursulin
2016-06-08 10:01     ` Chris Wilson
2016-06-08 10:18       ` Tvrtko Ursulin
2016-06-08 11:10         ` Chris Wilson
2016-06-08 11:49           ` Tvrtko Ursulin
2016-06-08 12:54             ` Chris Wilson
2016-06-03 16:08 ` [PATCH 20/21] drm/i915: Simplify enabling user-interrupts with L3-remapping Chris Wilson
2016-06-07 12:50   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 21/21] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts Chris Wilson
2016-06-07 12:51   ` Tvrtko Ursulin
2016-06-03 16:35 ` ✗ Ro.CI.BAT: failure for series starting with [01/21] drm/i915/shrinker: Flush active on objects before counting Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1464970133-29859-8-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.