All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v2 03/11] drm/i915: Defer transfer onto execution timeline to actual hw submission
Date: Mon,  7 Nov 2016 13:59:42 +0000	[thread overview]
Message-ID: <20161107135950.28861-4-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20161107135950.28861-1-chris@chris-wilson.co.uk>

Defer the transfer from the client's timeline onto the execution
timeline from the point of readiness to the point of actual submission.
For example, in execlists, a request is finally submitted to hardware
when the hardware is ready, and only put onto the hardware queue when
the request is ready. By deferring the transfer, we ensure that the
timeline is maintained in retirement order if we decide to queue the
requests onto the hardware in a different order than fifo.

v2: Rebased onto distinct global/user timeline lock classes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c    | 31 +++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_request.h    |  2 ++
 drivers/gpu/drm/i915/i915_guc_submission.c | 14 +++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.c           | 23 +++++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 ++
 5 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e41d51a68ed8..19c29fafb07a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -307,25 +307,16 @@ static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
 	return atomic_inc_return(&tl->next_seqno);
 }
 
-static int __i915_sw_fence_call
-submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 {
-	struct drm_i915_gem_request *request =
-		container_of(fence, typeof(*request), submit);
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_timeline *timeline;
-	unsigned long flags;
 	u32 seqno;
 
-	if (state != FENCE_COMPLETE)
-		return NOTIFY_DONE;
-
 	/* Transfer from per-context onto the global per-engine timeline */
 	timeline = engine->timeline;
 	GEM_BUG_ON(timeline == request->timeline);
-
-	/* Will be called from irq-context when using foreign DMA fences */
-	spin_lock_irqsave(&timeline->lock, flags);
+	assert_spin_locked(&timeline->lock);
 
 	seqno = timeline_get_seqno(timeline->common);
 	GEM_BUG_ON(!seqno);
@@ -345,15 +336,29 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	GEM_BUG_ON(!request->global_seqno);
 	engine->emit_breadcrumb(request,
 				request->ring->vaddr + request->postfix);
-	engine->submit_request(request);
 
 	spin_lock(&request->timeline->lock);
 	list_move_tail(&request->link, &timeline->requests);
 	spin_unlock(&request->timeline->lock);
 
 	i915_sw_fence_commit(&request->execute);
+}
 
-	spin_unlock_irqrestore(&timeline->lock, flags);
+static int __i915_sw_fence_call
+submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	if (state == FENCE_COMPLETE) {
+		struct drm_i915_gem_request *request =
+			container_of(fence, typeof(*request), submit);
+		struct intel_engine_cs *engine = request->engine;
+		unsigned long flags;
+
+		/* Will be called from irq-context when using foreign fences. */
+		spin_lock_irqsave_nested(&engine->timeline->lock, flags,
+					 SINGLE_DEPTH_NESTING);
+		engine->submit_request(request);
+		spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index c8547d9b9004..d8904863d3d9 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -241,6 +241,8 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request_no_flush(req) \
 	__i915_add_request(req, false)
 
+void __i915_gem_request_submit(struct drm_i915_gem_request *request);
+
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 666dab7a675a..83438c6a8864 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -629,11 +629,23 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 static void i915_guc_submit(struct drm_i915_gem_request *rq)
 {
 	struct drm_i915_private *dev_priv = rq->i915;
-	unsigned int engine_id = rq->engine->id;
+	struct intel_engine_cs *engine = rq->engine;
+	unsigned int engine_id = engine->id;
 	struct intel_guc *guc = &rq->i915->guc;
 	struct i915_guc_client *client = guc->execbuf_client;
 	int b_ret;
 
+	/* We keep the previous context alive until we retire the following
+	 * request. This ensures that any the context object is still pinned
+	 * for any residual writes the HW makes into it on the context switch
+	 * into the next object following the breadcrumb. Otherwise, we may
+	 * retire the context too early.
+	 */
+	rq->previous_context = engine->last_context;
+	engine->last_context = rq->ctx;
+
+	__i915_gem_request_submit(rq);
+
 	spin_lock(&client->wq_lock);
 	guc_wq_item_append(client, rq);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fa3012c342cc..fa4920e72e93 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -434,6 +434,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *cursor, *last;
 	struct execlist_port *port = engine->execlist_port;
+	unsigned long flags;
 	bool submit = false;
 
 	last = port->request;
@@ -469,6 +470,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+	spin_lock_irqsave(&engine->timeline->lock, flags);
 	spin_lock(&engine->execlist_lock);
 	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
 		/* Can we combine this request with the current port? It has to
@@ -501,6 +503,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			i915_gem_request_assign(&port->request, last);
 			port++;
 		}
+
+		/* We keep the previous context alive until we retire the
+		 * following request. This ensures that any the context object
+		 * is still pinned for any residual writes the HW makes into it
+		 * on the context switch into the next object following the
+		 * breadcrumb. Otherwise, we may retire the context too early.
+		 */
+		cursor->previous_context = engine->last_context;
+		engine->last_context = cursor->ctx;
+
+		__i915_gem_request_submit(cursor);
 		last = cursor;
 		submit = true;
 	}
@@ -512,6 +525,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		i915_gem_request_assign(&port->request, last);
 	}
 	spin_unlock(&engine->execlist_lock);
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
 	if (submit)
 		execlists_submit_ports(engine);
@@ -599,15 +613,6 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
 	spin_lock_irqsave(&engine->execlist_lock, flags);
 
-	/* We keep the previous context alive until we retire the following
-	 * request. This ensures that any the context object is still pinned
-	 * for any residual writes the HW makes into it on the context switch
-	 * into the next object following the breadcrumb. Otherwise, we may
-	 * retire the context too early.
-	 */
-	request->previous_context = engine->last_context;
-	engine->last_context = request->ctx;
-
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	if (execlists_elsp_idle(engine))
 		tasklet_hi_schedule(&engine->irq_tasklet);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 700e93d80616..f91ee24e2763 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1294,6 +1294,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 {
 	struct drm_i915_private *dev_priv = request->i915;
 
+	__i915_gem_request_submit(request);
+
 	I915_WRITE_TAIL(request->engine, request->tail);
 }
 
-- 
2.10.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-11-07 14:00 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-07 13:59 Trivial scheduler, take 2 Chris Wilson
2016-11-07 13:59 ` [PATCH v2 01/11] drm/i915: Create distinct lockclasses for execution vs user timelines Chris Wilson
2016-11-08  7:43   ` Joonas Lahtinen
2016-11-08  8:50     ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 02/11] drm/i915: Split request submit/execute phase into two Chris Wilson
2016-11-08  9:06   ` Joonas Lahtinen
2016-11-07 13:59 ` Chris Wilson [this message]
2016-11-10 10:43   ` [PATCH v2 03/11] drm/i915: Defer transfer onto execution timeline to actual hw submission Tvrtko Ursulin
2016-11-10 11:11     ` Chris Wilson
2016-11-10 11:51       ` Tvrtko Ursulin
2016-11-10 14:43         ` Chris Wilson
2016-11-10 11:23     ` [PATCH v3] " Chris Wilson
2016-11-07 13:59 ` [PATCH v2 04/11] drm/i915: Remove engine->execlist_lock Chris Wilson
2016-11-07 13:59 ` [PATCH v2 05/11] drm/i915/scheduler: Signal the arrival of a new request Chris Wilson
2016-11-07 13:59 ` [PATCH v2 06/11] drm/i915/scheduler: Record all dependencies upon request construction Chris Wilson
2016-11-08 12:20   ` Chris Wilson
2016-11-10 10:44     ` Tvrtko Ursulin
2016-11-10 10:55       ` Chris Wilson
2016-11-10 11:54         ` Tvrtko Ursulin
2016-11-10 12:10           ` Chris Wilson
2016-11-10 14:45   ` Tvrtko Ursulin
2016-11-10 15:01     ` Chris Wilson
2016-11-10 15:36       ` Tvrtko Ursulin
2016-11-10 15:55         ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 07/11] drm/i915/scheduler: Boost priorities for flips Chris Wilson
2016-11-10 10:52   ` Tvrtko Ursulin
2016-11-07 13:59 ` [PATCH v2 08/11] HACK drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
2016-11-07 13:59 ` [PATCH v2 09/11] drm/i915/scheduler: Support user-defined priorities Chris Wilson
2016-11-10 13:02   ` Tvrtko Ursulin
2016-11-10 13:10     ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 10/11] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-11-07 13:59 ` [PATCH v2 11/11] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-11-07 15:18 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/11] drm/i915: Create distinct lockclasses for execution vs user timelines Patchwork
2016-11-10 11:45 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/11] drm/i915: Create distinct lockclasses for execution vs user timelines (rev2) Patchwork
2016-11-10 12:04   ` Saarinen, Jani
2016-11-14  8:56 ` [PATCH v3 01/14] drm/i915: Give each sw_fence its own lockclass Chris Wilson
2016-11-14  8:56   ` [PATCH v3 02/14] drm/i915: Create distinct lockclasses for execution vs user timelines Chris Wilson
2016-11-14  8:56   ` [PATCH v3 03/14] drm/i915: Split request submit/execute phase into two Chris Wilson
2016-11-14  8:56   ` [PATCH v3 04/14] drm/i915: Defer transfer onto execution timeline to actual hw submission Chris Wilson
2016-11-14 10:59     ` Tvrtko Ursulin
2016-11-14  8:56   ` [PATCH v3 05/14] drm/i915: Remove engine->execlist_lock Chris Wilson
2016-11-14  8:56   ` [PATCH v3 06/14] drm/i915/scheduler: Signal the arrival of a new request Chris Wilson
2016-11-14  8:56   ` [PATCH v3 07/14] drm/i915/scheduler: Record all dependencies upon request construction Chris Wilson
2016-11-14 11:09     ` Tvrtko Ursulin
2016-11-14  8:56   ` [PATCH v3 08/14] drm/i915/scheduler: Execute requests in order of priorities Chris Wilson
2016-11-14 11:15     ` Tvrtko Ursulin
2016-11-14 11:41       ` Chris Wilson
2016-11-14 11:48         ` Tvrtko Ursulin
2016-11-14 14:25           ` Chris Wilson
2016-11-14  8:56   ` [PATCH v3 09/14] drm/i915: Store the execution priority on the context Chris Wilson
2016-11-14 11:16     ` Tvrtko Ursulin
2016-11-14  8:56   ` [PATCH v3 10/14] drm/i915/scheduler: Boost priorities for flips Chris Wilson
2016-11-14  8:57   ` [PATCH v3 11/14] HACK drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
2016-11-14 11:31     ` Tvrtko Ursulin
2016-11-14 14:40       ` Chris Wilson
2016-12-01 10:45     ` Tvrtko Ursulin
2016-12-01 11:18       ` Chris Wilson
2016-12-01 12:45         ` Tvrtko Ursulin
2016-12-01 13:01           ` Chris Wilson
2016-11-14  8:57   ` [PATCH v3 12/14] drm/i915/scheduler: Support user-defined priorities Chris Wilson
2016-11-14 11:32     ` Tvrtko Ursulin
2016-11-14  8:57   ` [PATCH v3 13/14] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2017-01-25 20:38     ` Chad Versace
2017-01-26 10:32       ` Chris Wilson
2017-01-26 10:58         ` [PATCH] i965: Share the workaround bo between all contexts Chris Wilson
2017-01-26 17:39           ` [Mesa-dev] " Chad Versace
2017-01-26 18:05             ` Chris Wilson
2017-01-26 23:40               ` Chad Versace
2017-01-26 18:46             ` Chris Wilson
2017-01-27  0:01             ` Chad Versace
2017-01-27 18:20               ` [Intel-gfx] " Emil Velikov
2017-01-27 18:30                 ` [Mesa-dev] " Chris Wilson
2017-01-27 18:37                   ` [Intel-gfx] " Emil Velikov
2017-01-27  0:07         ` [PATCH v3 13/14] drm/i915: Enable userspace to opt-out of implicit fencing Chad Versace
2016-11-14  8:57   ` [PATCH v3 14/14] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-11-14 22:29     ` Rafael Antognolli
2017-01-25 20:27     ` Chad Versace
2016-11-14  9:01   ` [PATCH v3 01/14] drm/i915: Give each sw_fence its own lockclass Tvrtko Ursulin
2016-11-14  9:05     ` Chris Wilson
2016-11-14 10:57   ` Tvrtko Ursulin
2016-11-14 14:48   ` Joonas Lahtinen
2016-11-14 15:13     ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161107135950.28861-4-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.