All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 05/12] drm/i915/execlists: Defer schedule_out until after the next dequeue
Date: Wed, 17 Jun 2020 15:16:49 +0100	[thread overview]
Message-ID: <20200617141656.24384-5-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20200617141656.24384-1-chris@chris-wilson.co.uk>

Inside schedule_out, we do extra work upon idling the context, such as
updating the runtime, kicking off retires, kicking virtual engines.
However, if we are in a series of processing single requests per
contexts, we may find ourselves scheduling out the context, only to
immediately schedule it back in during dequeue. This is just extra work
that we can avoid if we keep the context marked as inflight across the
dequeue. This becomes more significant later on for minimising virtual
engine misses.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |  4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +++++
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 47 ++++++++++++++-----
 4 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4954b0df4864..b63db45bab7b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -45,8 +45,8 @@ struct intel_context {
 
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
-#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2)
-#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2)
+#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 3)
+#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 3)
 
 	struct i915_address_space *vm;
 	struct i915_gem_context __rcu *gem_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f30cdd591c8c..c8255611573e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -515,6 +515,8 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
 	memset(execlists->pending, 0, sizeof(execlists->pending));
 	execlists->active =
 		memset(execlists->inflight, 0, sizeof(execlists->inflight));
+	execlists->inactive =
+		memset(execlists->post, 0, sizeof(execlists->post));
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 073c3769e8cc..31cf60cef5a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -208,6 +208,10 @@ struct intel_engine_execlists {
 	 * @active: the currently known context executing on HW
 	 */
 	struct i915_request * const *active;
+	/**
+	 * @inactive: the current vacancy of completed CS
+	 */
+	struct i915_request **inactive;
 	/**
 	 * @inflight: the set of contexts submitted and acknowleged by HW
 	 *
@@ -225,6 +229,15 @@ struct intel_engine_execlists {
 	 * preemption or idle-to-active event.
 	 */
 	struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
+	/**
+	 * @post: the set of completed context switches
+	 *
+	 * Since we may want to stagger the processing of the CS switches
+	 * with the next submission, so that the context are notionally
+	 * kept in flight across the dequeue, we defer scheduling out of
+	 * the completed context switches.
+	 */
+	struct i915_request *post[2 * EXECLIST_MAX_PORTS + 1];
 
 	/**
 	 * @port_mask: number of execlist ports - 1
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c26f3fe17ebb..0fd8a6741b06 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1387,6 +1387,8 @@ __execlists_schedule_in(struct i915_request *rq)
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(engine);
 
+	CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid);
+
 	return engine;
 }
 
@@ -1433,6 +1435,8 @@ __execlists_schedule_out(struct i915_request *rq,
 	 * refrain from doing non-trivial work here.
 	 */
 
+	CE_TRACE(ce, "schedule-out, ccid:%x\n", ccid);
+
 	/*
 	 * If we have just completed this context, the engine may now be
 	 * idle and we want to re-enter powersaving.
@@ -2057,9 +2061,10 @@ static void set_preempt_timeout(struct intel_engine_cs *engine,
 		     active_preempt_timeout(engine, rq));
 }
 
-static inline void clear_ports(struct i915_request **ports, int count)
+static inline struct i915_request **
+clear_ports(struct i915_request **ports, int count)
 {
-	memset_p((void **)ports, NULL, count);
+	return memset_p((void **)ports, NULL, count);
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -2435,7 +2440,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!memcmp(active, execlists->pending,
 			    (port - execlists->pending + 1) * sizeof(*port))) {
 			do
-				execlists_schedule_out(fetch_and_zero(port));
+				*execlists->inactive++ = *port;
 			while (port-- != execlists->pending);
 
 			goto skip_submit;
@@ -2449,6 +2454,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		start_timeslice(engine, execlists->queue_priority_hint);
 skip_submit:
 		ring_set_paused(engine, 0);
+		execlists->pending[0] = NULL;
 	}
 }
 
@@ -2458,12 +2464,12 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 	struct i915_request * const *port;
 
 	for (port = execlists->pending; *port; port++)
-		execlists_schedule_out(*port);
+		*execlists->inactive++ = *port;
 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
 
 	/* Mark the end of active before we overwrite *active */
 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
-		execlists_schedule_out(*port);
+		*execlists->inactive++ = *port;
 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
 
 	smp_wmb(); /* complete the seqlock for execlists_active() */
@@ -2624,7 +2630,7 @@ static void process_csb(struct intel_engine_cs *engine)
 			/* cancel old inflight, prepare for switch */
 			trace_ports(execlists, "preempted", old);
 			while (*old)
-				execlists_schedule_out(*old++);
+				*execlists->inactive++ = *old++;
 
 			/* switch pending to inflight */
 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
@@ -2681,7 +2687,7 @@ static void process_csb(struct intel_engine_cs *engine)
 					     regs[CTX_RING_TAIL]);
 			}
 
-			execlists_schedule_out(*execlists->active++);
+			*execlists->inactive++ = *execlists->active++;
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
 				   execlists_num_ports(execlists));
@@ -2705,6 +2711,20 @@ static void process_csb(struct intel_engine_cs *engine)
 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
 }
 
+static void post_process_csb(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const el = &engine->execlists;
+	struct i915_request **port;
+
+	if (!el->post[0])
+		return;
+
+	GEM_BUG_ON(el->post[2 * EXECLIST_MAX_PORTS]);
+	for (port = el->post; *port; port++)
+		execlists_schedule_out(*port);
+	el->inactive = clear_ports(el->post, port - el->post);
+}
+
 static void __execlists_hold(struct i915_request *rq)
 {
 	LIST_HEAD(list);
@@ -2973,8 +2993,8 @@ active_context(struct intel_engine_cs *engine, u32 ccid)
 	for (port = el->active; (rq = *port); port++) {
 		if (rq->context->lrc.ccid == ccid) {
 			ENGINE_TRACE(engine,
-				     "ccid found at active:%zd\n",
-				     port - el->active);
+				     "ccid:%x found at active:%zd\n",
+				     ccid, port - el->active);
 			return rq;
 		}
 	}
@@ -2982,8 +3002,8 @@ active_context(struct intel_engine_cs *engine, u32 ccid)
 	for (port = el->pending; (rq = *port); port++) {
 		if (rq->context->lrc.ccid == ccid) {
 			ENGINE_TRACE(engine,
-				     "ccid found at pending:%zd\n",
-				     port - el->pending);
+				     "ccid:%x found at pending:%zd\n",
+				     ccid, port - el->pending);
 			return rq;
 		}
 	}
@@ -3125,6 +3145,8 @@ static void execlists_submission_tasklet(unsigned long data)
 		spin_unlock_irqrestore(&engine->active.lock, flags);
 		rcu_read_unlock();
 	}
+
+	post_process_csb(engine);
 }
 
 static void __execlists_kick(struct intel_engine_execlists *execlists)
@@ -4063,8 +4085,6 @@ static void enable_execlists(struct intel_engine_cs *engine)
 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
 
 	enable_error_interrupt(engine);
-
-	engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
 }
 
 static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -5107,6 +5127,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	else
 		execlists->csb_size = GEN11_CSB_ENTRIES;
 
+	engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
 	if (INTEL_GEN(engine->i915) >= 11) {
 		execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
 		execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2020-06-17 14:17 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-17 14:16 [Intel-gfx] [PATCH 01/12] drm/i915/selftests: Enable selftesting of busy-stats Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 02/12] drm/i915/gt: Always report the sample time for busy-stats Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 03/12] drm/i915/gt: Decouple completed requests on unwind Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 04/12] drm/i915/execlists: Replace direct submit with direct call to tasklet Chris Wilson
2020-06-17 14:16 ` Chris Wilson [this message]
2020-06-17 14:16 ` [Intel-gfx] [PATCH 06/12] drm/i915/gt: ce->inflight updates are now serialised Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 07/12] drm/i915/gt: Drop atomic for engine->fw_active tracking Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 08/12] drm/i915/gt: Extract busy-stats for ring-scheduler Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 09/12] drm/i915/gt: Convert stats.active to plain unsigned int Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 10/12] drm/i915/gt: Use virtual_engine during execlists_dequeue Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 11/12] drm/i915/gt: Decouple inflight virtual engines Chris Wilson
2020-06-17 14:16 ` [Intel-gfx] [PATCH 12/12] drm/i915/gt: Resubmit the virtual engine on schedule-out Chris Wilson
2020-06-17 14:41 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/12] drm/i915/selftests: Enable selftesting of busy-stats Patchwork
2020-06-17 14:43 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-06-17 15:11 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200617141656.24384-5-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.