All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission
@ 2021-02-03 11:17 Chris Wilson
  2021-02-03 11:17 ` [Intel-gfx] [CI 02/12] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
                   ` (11 more replies)
  0 siblings, 12 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:17 UTC (permalink / raw)
  To: intel-gfx

Now that we no longer switch back and forth between guc and execlists,
we no longer need to restore the backend's vfunc and can leave them set
after initialisation. The only catch is that we lose the submission on
wedging and still need to reset the submit_request vfunc on unwedging.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 46 ++++++++---------
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  4 --
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 50 ++++++++-----------
 3 files changed, 44 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index b8bd3d48b345..c98fdeb94dba 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3069,29 +3069,6 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = execlists_submit_request;
 	engine->schedule = i915_schedule;
 	engine->execlists.tasklet.callback = execlists_submission_tasklet;
-
-	engine->reset.prepare = execlists_reset_prepare;
-	engine->reset.rewind = execlists_reset_rewind;
-	engine->reset.cancel = execlists_reset_cancel;
-	engine->reset.finish = execlists_reset_finish;
-
-	engine->park = execlists_park;
-	engine->unpark = NULL;
-
-	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (!intel_vgpu_active(engine->i915)) {
-		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-		if (can_preempt(engine)) {
-			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
-				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
-		}
-	}
-
-	if (intel_engine_has_preemption(engine))
-		engine->emit_bb_start = gen8_emit_bb_start;
-	else
-		engine->emit_bb_start = gen8_emit_bb_start_noarb;
 }
 
 static void execlists_shutdown(struct intel_engine_cs *engine)
@@ -3122,6 +3099,14 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->cops = &execlists_context_ops;
 	engine->request_alloc = execlists_request_alloc;
 
+	engine->reset.prepare = execlists_reset_prepare;
+	engine->reset.rewind = execlists_reset_rewind;
+	engine->reset.cancel = execlists_reset_cancel;
+	engine->reset.finish = execlists_reset_finish;
+
+	engine->park = execlists_park;
+	engine->unpark = NULL;
+
 	engine->emit_flush = gen8_emit_flush_xcs;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
@@ -3142,6 +3127,21 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		 * until a more refined solution exists.
 		 */
 	}
+
+	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+	if (!intel_vgpu_active(engine->i915)) {
+		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+		if (can_preempt(engine)) {
+			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+		}
+	}
+
+	if (intel_engine_has_preemption(engine))
+		engine->emit_bb_start = gen8_emit_bb_start;
+	else
+		engine->emit_bb_start = gen8_emit_bb_start_noarb;
 }
 
 static void logical_ring_default_irqs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 9c2c605d7a92..3cb2ce503544 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -969,14 +969,10 @@ static void gen6_bsd_submit_request(struct i915_request *request)
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = i9xx_submit_request;
-
-	engine->park = NULL;
-	engine->unpark = NULL;
 }
 
 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 {
-	i9xx_set_default_submission(engine);
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 92688a9b6717..f72faa0b8339 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -608,35 +608,6 @@ static int guc_resume(struct intel_engine_cs *engine)
 static void guc_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = guc_submit_request;
-	engine->schedule = i915_schedule;
-	engine->execlists.tasklet.callback = guc_submission_tasklet;
-
-	engine->reset.prepare = guc_reset_prepare;
-	engine->reset.rewind = guc_reset_rewind;
-	engine->reset.cancel = guc_reset_cancel;
-	engine->reset.finish = guc_reset_finish;
-
-	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-
-	/*
-	 * TODO: GuC supports timeslicing and semaphores as well, but they're
-	 * handled by the firmware so some minor tweaks are required before
-	 * enabling.
-	 *
-	 * engine->flags |= I915_ENGINE_HAS_TIMESLICES;
-	 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-	 */
-
-	engine->emit_bb_start = gen8_emit_bb_start;
-
-	/*
-	 * For the breadcrumb irq to work we need the interrupts to stay
-	 * enabled. However, on all platforms on which we'll have support for
-	 * GuC submission we don't allow disabling the interrupts at runtime, so
-	 * we're always safe with the current flow.
-	 */
-	GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
 }
 
 static void guc_release(struct intel_engine_cs *engine)
@@ -658,6 +629,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	engine->cops = &guc_context_ops;
 	engine->request_alloc = guc_request_alloc;
 
+	engine->schedule = i915_schedule;
+
+	engine->reset.prepare = guc_reset_prepare;
+	engine->reset.rewind = guc_reset_rewind;
+	engine->reset.cancel = guc_reset_cancel;
+	engine->reset.finish = guc_reset_finish;
+
 	engine->emit_flush = gen8_emit_flush_xcs;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
@@ -666,6 +644,20 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 		engine->emit_flush = gen12_emit_flush_xcs;
 	}
 	engine->set_default_submission = guc_set_default_submission;
+
+	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
+	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+
+	/*
+	 * TODO: GuC supports timeslicing and semaphores as well, but they're
+	 * handled by the firmware so some minor tweaks are required before
+	 * enabling.
+	 *
+	 * engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+	 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+	 */
+
+	engine->emit_bb_start = gen8_emit_bb_start;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 02/12] drm/i915/gt: Move submission_method into intel_gt
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
@ 2021-02-03 11:17 ` Chris Wilson
  2021-02-03 11:17 ` [Intel-gfx] [CI 03/12] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:17 UTC (permalink / raw)
  To: intel-gfx

Since we setup the submission method for the engines once, it is easy to
assign an enum and use that instead of probing into the backends.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h               |  8 +++++++-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c            | 12 ++++++++----
 drivers/gpu/drm/i915/gt/intel_execlists_submission.c |  8 --------
 drivers/gpu/drm/i915/gt/intel_execlists_submission.h |  3 ---
 drivers/gpu/drm/i915/gt/intel_gt_types.h             |  7 +++++++
 drivers/gpu/drm/i915/gt/intel_reset.c                |  7 +++----
 drivers/gpu/drm/i915/gt/selftest_execlists.c         |  2 +-
 drivers/gpu/drm/i915/gt/selftest_ring_submission.c   |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c    |  5 -----
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h    |  1 -
 drivers/gpu/drm/i915/i915_perf.c                     | 10 +++++-----
 11 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 47ee8578e511..8d9184920c51 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -13,8 +13,9 @@
 #include "i915_reg.h"
 #include "i915_request.h"
 #include "i915_selftest.h"
-#include "gt/intel_timeline.h"
 #include "intel_engine_types.h"
+#include "intel_gt_types.h"
+#include "intel_timeline.h"
 #include "intel_workarounds.h"
 
 struct drm_printer;
@@ -262,6 +263,11 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
 #define ENGINE_MOCK	1
 #define ENGINE_VIRTUAL	2
 
+static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine)
+{
+	return engine->gt->submission_method >= INTEL_SUBMISSION_GUC;
+}
+
 static inline bool
 intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 56fb9cece71b..dab8d734e272 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -891,12 +891,16 @@ int intel_engines_init(struct intel_gt *gt)
 	enum intel_engine_id id;
 	int err;
 
-	if (intel_uc_uses_guc_submission(&gt->uc))
+	if (intel_uc_uses_guc_submission(&gt->uc)) {
+		gt->submission_method = INTEL_SUBMISSION_GUC;
 		setup = intel_guc_submission_setup;
-	else if (HAS_EXECLISTS(gt->i915))
+	} else if (HAS_EXECLISTS(gt->i915)) {
+		gt->submission_method = INTEL_SUBMISSION_ELSP;
 		setup = intel_execlists_submission_setup;
-	else
+	} else {
+		gt->submission_method = INTEL_SUBMISSION_RING;
 		setup = intel_ring_submission_setup;
+	}
 
 	for_each_engine(engine, gt, id) {
 		err = engine_setup_common(engine);
@@ -1467,7 +1471,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
 	}
 
-	if (intel_engine_in_guc_submission_mode(engine)) {
+	if (intel_engine_uses_guc(engine)) {
 		/* nothing to print yet */
 	} else if (HAS_EXECLISTS(dev_priv)) {
 		struct i915_request * const *port, *rq;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index c98fdeb94dba..8dc52cc43f27 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1757,7 +1757,6 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
 	 */
 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
 		   !reset_in_progress(execlists));
-	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
 
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -3877,13 +3876,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-bool
-intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
-{
-	return engine->set_default_submission ==
-	       execlists_set_default_submission;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_execlists.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
index a8fd7adefd82..f7bd3fccfee8 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
@@ -41,7 +41,4 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
 				     const struct intel_engine_cs *master,
 				     const struct intel_engine_cs *sibling);
 
-bool
-intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
-
 #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 91d20daca536..626af37c7790 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -29,6 +29,12 @@ struct i915_ggtt;
 struct intel_engine_cs;
 struct intel_uncore;
 
+enum intel_submission_method {
+	INTEL_SUBMISSION_RING,
+	INTEL_SUBMISSION_ELSP,
+	INTEL_SUBMISSION_GUC,
+};
+
 struct intel_gt {
 	struct drm_i915_private *i915;
 	struct intel_uncore *uncore;
@@ -108,6 +114,7 @@ struct intel_gt {
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
 	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
 					    [MAX_ENGINE_INSTANCE + 1];
+	enum intel_submission_method submission_method;
 
 	/*
 	 * Default address space (either GGTT or ppGTT depending on arch).
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 7638fb2a45f4..bf5b9f303a68 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1113,7 +1113,6 @@ static int intel_gt_reset_engine(struct intel_engine_cs *engine)
 int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 {
 	struct intel_gt *gt = engine->gt;
-	bool uses_guc = intel_engine_in_guc_submission_mode(engine);
 	int ret;
 
 	ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
@@ -1129,10 +1128,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 			   "Resetting %s for %s\n", engine->name, msg);
 	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
 
-	if (!uses_guc)
-		ret = intel_gt_reset_engine(engine);
-	else
+	if (intel_engine_uses_guc(engine))
 		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
+	else
+		ret = intel_gt_reset_engine(engine);
 	if (ret) {
 		/* If we fail here, we expect to fallback to a global reset */
 		ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 5d7fac383add..9304a35384aa 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -4715,7 +4715,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_virtual_reset),
 	};
 
-	if (!HAS_EXECLISTS(i915))
+	if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
 		return 0;
 
 	if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 3350e7c995bc..6cd9f6bc240c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -291,7 +291,7 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_ctx_switch_wa),
 	};
 
-	if (HAS_EXECLISTS(i915))
+	if (i915->gt.submission_method > INTEL_SUBMISSION_RING)
 		return 0;
 
 	return intel_gt_live_subtests(tests, &i915->gt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f72faa0b8339..17b551a0c89f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -745,8 +745,3 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
 {
 	guc->submission_selected = __guc_submission_selected(guc);
 }
-
-bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
-{
-	return engine->set_default_submission == guc_set_default_submission;
-}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index 5f7b9e6347d0..3f7005018939 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -20,7 +20,6 @@ void intel_guc_submission_fini(struct intel_guc *guc);
 int intel_guc_preempt_work_create(struct intel_guc *guc);
 void intel_guc_preempt_work_destroy(struct intel_guc *guc);
 int intel_guc_submission_setup(struct intel_engine_cs *engine);
-bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
 
 static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
 {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 112ba5f2ce90..89665e14ab01 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1273,11 +1273,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	case 8:
 	case 9:
 	case 10:
-		if (intel_engine_in_execlists_submission_mode(ce->engine)) {
-			stream->specific_ctx_id_mask =
-				(1U << GEN8_CTX_ID_WIDTH) - 1;
-			stream->specific_ctx_id = stream->specific_ctx_id_mask;
-		} else {
+		if (intel_engine_uses_guc(ce->engine)) {
 			/*
 			 * When using GuC, the context descriptor we write in
 			 * i915 is read by GuC and rewritten before it's
@@ -1296,6 +1292,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 			 */
 			stream->specific_ctx_id_mask =
 				(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
+		} else {
+			stream->specific_ctx_id_mask =
+				(1U << GEN8_CTX_ID_WIDTH) - 1;
+			stream->specific_ctx_id = stream->specific_ctx_id_mask;
 		}
 		break;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 03/12] drm/i915/gt: Move CS interrupt handler to the backend
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
  2021-02-03 11:17 ` [Intel-gfx] [CI 02/12] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
@ 2021-02-03 11:17 ` Chris Wilson
  2021-02-03 11:17 ` [Intel-gfx] [CI 04/12] drm/i915: Replace engine->schedule() with a known request operation Chris Wilson
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:17 UTC (permalink / raw)
  To: intel-gfx

The different submission backends each have their own preferred
behaviour and interrupt setup. Let each handle their own interrupts.

This becomes more useful later as we to extract the use of auxiliary
state in the interrupt handler that is backend specific.

v2: An overabundance of caution is always justified; put a barrier on
updating the irq handler so that we know that the next interrupt will
be redirected towards ourselves.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  7 ++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 14 +---
 .../drm/i915/gt/intel_execlists_submission.c  | 41 ++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_irq.c        | 82 ++++++-------------
 drivers/gpu/drm/i915/gt/intel_gt_irq.h        | 23 ++++++
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  8 ++
 drivers/gpu/drm/i915/gt/intel_rps.c           |  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 ++-
 drivers/gpu/drm/i915/i915_irq.c               | 10 ++-
 9 files changed, 124 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index dab8d734e272..13ef5725ef51 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 }
 
+static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+	GEM_DEBUG_WARN_ON(iir);
+}
+
 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
@@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->hw_id = info->hw_id;
 	engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
 
+	engine->irq_handler = nop_irq_handler;
+
 	engine->class = info->class;
 	engine->instance = info->instance;
 	__sprint_engine_name(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9d59de5c559a..c9974de2dd00 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -402,6 +402,7 @@ struct intel_engine_cs {
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
 	void		(*irq_enable)(struct intel_engine_cs *engine);
 	void		(*irq_disable)(struct intel_engine_cs *engine);
+	void		(*irq_handler)(struct intel_engine_cs *engine, u16 iir);
 
 	void		(*sanitize)(struct intel_engine_cs *engine);
 	int		(*resume)(struct intel_engine_cs *engine);
@@ -481,10 +482,9 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
-#define I915_ENGINE_IS_VIRTUAL       BIT(6)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
 	unsigned int flags;
 
 	/*
@@ -588,12 +588,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_TIMESLICES;
 }
 
-static inline bool
-intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-}
-
 static inline bool
 intel_engine_is_virtual(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 8dc52cc43f27..554eaaa268a7 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -118,6 +118,7 @@
 #include "intel_engine_stats.h"
 #include "intel_execlists_submission.h"
 #include "intel_gt.h"
+#include "intel_gt_irq.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 #include "intel_lrc.h"
@@ -2394,6 +2395,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
 	rcu_read_unlock();
 }
 
+static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+	bool tasklet = false;
+
+	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
+		u32 eir;
+
+		/* Upper 16b are the enabling mask, rsvd for internal errors */
+		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
+		ENGINE_TRACE(engine, "CS error: %x\n", eir);
+
+		/* Disable the error interrupt until after the reset */
+		if (likely(eir)) {
+			ENGINE_WRITE(engine, RING_EMR, ~0u);
+			ENGINE_WRITE(engine, RING_EIR, eir);
+			WRITE_ONCE(engine->execlists.error_interrupt, eir);
+			tasklet = true;
+		}
+	}
+
+	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+		WRITE_ONCE(engine->execlists.yield,
+			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+			     engine->execlists.yield);
+		if (del_timer(&engine->execlists.timer))
+			tasklet = true;
+	}
+
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
+		tasklet = true;
+
+	if (iir & GT_RENDER_USER_INTERRUPT)
+		intel_engine_signal_breadcrumbs(engine);
+
+	if (tasklet)
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 static void __execlists_kick(struct intel_engine_execlists *execlists)
 {
 	/* Kick the tasklet for some interrupt coalescing and reset handling */
@@ -3126,6 +3166,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		 * until a more refined solution exists.
 		 */
 	}
+	intel_engine_set_irq_handler(engine, execlists_irq_handler);
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 9fc6c912a4e5..d29126c458ba 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
 		intel_guc_to_host_event_handler(guc);
 }
 
-static void
-cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
-{
-	bool tasklet = false;
-
-	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
-		u32 eir;
-
-		/* Upper 16b are the enabling mask, rsvd for internal errors */
-		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
-		ENGINE_TRACE(engine, "CS error: %x\n", eir);
-
-		/* Disable the error interrupt until after the reset */
-		if (likely(eir)) {
-			ENGINE_WRITE(engine, RING_EMR, ~0u);
-			ENGINE_WRITE(engine, RING_EIR, eir);
-			WRITE_ONCE(engine->execlists.error_interrupt, eir);
-			tasklet = true;
-		}
-	}
-
-	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
-		WRITE_ONCE(engine->execlists.yield,
-			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
-		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
-			     engine->execlists.yield);
-		if (del_timer(&engine->execlists.timer))
-			tasklet = true;
-	}
-
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
-		tasklet = true;
-
-	if (iir & GT_RENDER_USER_INTERRUPT) {
-		intel_engine_signal_breadcrumbs(engine);
-		tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
-	}
-
-	if (tasklet)
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-}
-
 static u32
 gen11_gt_engine_identity(struct intel_gt *gt,
 			 const unsigned int bank, const unsigned int bit)
@@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
 		engine = NULL;
 
 	if (likely(engine))
-		return cs_irq_handler(engine, iir);
+		return intel_engine_cs_irq(engine, iir);
 
 	WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
 		  class, instance);
@@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+				    gt_iir);
 }
 
 static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
@@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
 void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+				    gt_iir >> 12);
+
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+				    gt_iir >> 22);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
-				       iir >> GEN8_RCS_IRQ_SHIFT);
-			cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
-				       iir >> GEN8_BCS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+					    iir >> GEN8_RCS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+					    iir >> GEN8_BCS_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
 		}
 	}
@@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
-				       iir >> GEN8_VCS0_IRQ_SHIFT);
-			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
-				       iir >> GEN8_VCS1_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+					    iir >> GEN8_VCS0_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
+					    iir >> GEN8_VCS1_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
 		}
 	}
@@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
-				       iir >> GEN8_VECS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+					    iir >> GEN8_VECS_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
index f667e976fb2b..41cad38668c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#include "intel_engine_types.h"
+
 struct intel_gt;
 
 #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
@@ -39,4 +41,25 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
 void gen8_gt_irq_reset(struct intel_gt *gt);
 void gen8_gt_irq_postinstall(struct intel_gt *gt);
 
+static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u16 iir)
+{
+	if (iir)
+		engine->irq_handler(engine, iir);
+}
+
+static inline void
+intel_engine_set_irq_handler(struct intel_engine_cs *engine,
+			     void (*fn)(struct intel_engine_cs *engine,
+					u16 iir))
+{
+	/*
+	 * As the interrupt is live as allocate and setup the engines,
+	 * err on the side of caution and apply barriers to updating
+	 * the irq handler callback. This assures that when we do use
+	 * the engine, we will receive interrupts only to ourselves,
+	 * and not lose any.
+	 */
+	smp_store_mb(engine->irq_handler, fn);
+}
+
 #endif /* INTEL_GT_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 3cb2ce503544..aa9cfb4dcbca 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -12,6 +12,7 @@
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_gt.h"
+#include "intel_gt_irq.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
 #include "shmem_utils.h"
@@ -997,10 +998,17 @@ static void ring_release(struct intel_engine_cs *engine)
 	intel_timeline_put(engine->legacy.timeline);
 }
 
+static void irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+	intel_engine_signal_breadcrumbs(engine);
+}
+
 static void setup_irq(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
+	intel_engine_set_irq_handler(engine, irq_handler);
+
 	if (INTEL_GEN(i915) >= 6) {
 		engine->irq_enable = gen6_irq_enable;
 		engine->irq_disable = gen6_irq_disable;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 405d814e9040..97cab1b99871 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 		return;
 
 	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
+		intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
 
 	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 17b551a0c89f..335719f17490 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -11,6 +11,7 @@
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_irq.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_lrc.h"
 #include "gt/intel_mocs.h"
@@ -264,6 +265,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
+{
+	if (iir & GT_RENDER_USER_INTERRUPT) {
+		intel_engine_signal_breadcrumbs(engine);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+}
+
 static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -645,7 +654,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	engine->set_default_submission = guc_set_default_submission;
 
-	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	/*
@@ -681,6 +689,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
 static inline void guc_default_irqs(struct intel_engine_cs *engine)
 {
 	engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
+	intel_engine_set_irq_handler(engine, cs_irq_handler);
 }
 
 int intel_guc_submission_setup(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9d47da8ec86d..0ff3f64c4bb5 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3954,7 +3954,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4062,7 +4062,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4207,10 +4207,12 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0],
+					    iir);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[VCS0],
+					    iir >> 25);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 04/12] drm/i915: Replace engine->schedule() with a known request operation
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
  2021-02-03 11:17 ` [Intel-gfx] [CI 02/12] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
  2021-02-03 11:17 ` [Intel-gfx] [CI 03/12] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
@ 2021-02-03 11:17 ` Chris Wilson
  2021-02-03 11:17 ` [Intel-gfx] [CI 05/12] drm/i915: Restructure priority inheritance Chris Wilson
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:17 UTC (permalink / raw)
  To: intel-gfx

Looking to the future, we want to set the scheduling attributes
explicitly and so replace the generic engine->schedule() with the more
direct i915_request_set_priority()

What it loses in removing the 'schedule' name from the function, it
gains in having an explicit entry point with a stated goal.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  5 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  5 ++-
 drivers/gpu/drm/i915/gem/i915_gem_wait.c      | 29 +++++-----------
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  3 --
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  4 +--
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 27 ++++++++-------
 drivers/gpu/drm/i915/gt/intel_engine_user.c   |  2 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  3 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  | 33 +++++--------------
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 11 +++----
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_request.c           | 10 +++---
 drivers/gpu/drm/i915/i915_request.h           |  5 +++
 drivers/gpu/drm/i915/i915_scheduler.c         | 15 +++++----
 drivers/gpu/drm/i915/i915_scheduler.h         |  3 +-
 15 files changed, 64 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index d8f10589e09e..aca964f7ba72 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -13662,7 +13662,6 @@ int
 intel_prepare_plane_fb(struct drm_plane *_plane,
 		       struct drm_plane_state *_new_plane_state)
 {
-	struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY };
 	struct intel_plane *plane = to_intel_plane(_plane);
 	struct intel_plane_state *new_plane_state =
 		to_intel_plane_state(_new_plane_state);
@@ -13703,7 +13702,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 
 	if (new_plane_state->uapi.fence) { /* explicit fencing */
 		i915_gem_fence_wait_priority(new_plane_state->uapi.fence,
-					     &attr);
+					     I915_PRIORITY_DISPLAY);
 		ret = i915_sw_fence_await_dma_fence(&state->commit_ready,
 						    new_plane_state->uapi.fence,
 						    i915_fence_timeout(dev_priv),
@@ -13725,7 +13724,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 	if (ret)
 		return ret;
 
-	i915_gem_object_wait_priority(obj, 0, &attr);
+	i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
 	i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
 
 	if (!new_plane_state->uapi.fence) { /* implicit fencing */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3411ad197fa6..325766abca21 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -549,15 +549,14 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
 		obj->cache_dirty = true;
 }
 
-void i915_gem_fence_wait_priority(struct dma_fence *fence,
-				  const struct i915_sched_attr *attr);
+void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio);
 
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 			 unsigned int flags,
 			 long timeout);
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
-				  const struct i915_sched_attr *attr);
+				  int prio);
 
 void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
 					 enum fb_op_origin origin);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 4b9856d5ba14..d79bf16083bd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -91,22 +91,12 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
 	return timeout;
 }
 
-static void fence_set_priority(struct dma_fence *fence,
-			       const struct i915_sched_attr *attr)
+static void fence_set_priority(struct dma_fence *fence, int prio)
 {
-	struct i915_request *rq;
-	struct intel_engine_cs *engine;
-
 	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
 		return;
 
-	rq = to_request(fence);
-	engine = rq->engine;
-
-	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(rq, attr);
-	rcu_read_unlock();
+	i915_request_set_priority(to_request(fence), prio);
 }
 
 static inline bool __dma_fence_is_chain(const struct dma_fence *fence)
@@ -114,8 +104,7 @@ static inline bool __dma_fence_is_chain(const struct dma_fence *fence)
 	return fence->ops == &dma_fence_chain_ops;
 }
 
-void i915_gem_fence_wait_priority(struct dma_fence *fence,
-				  const struct i915_sched_attr *attr)
+void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio)
 {
 	if (dma_fence_is_signaled(fence))
 		return;
@@ -128,19 +117,19 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence,
 		int i;
 
 		for (i = 0; i < array->num_fences; i++)
-			fence_set_priority(array->fences[i], attr);
+			fence_set_priority(array->fences[i], prio);
 	} else if (__dma_fence_is_chain(fence)) {
 		struct dma_fence *iter;
 
 		/* The chain is ordered; if we boost the last, we boost all */
 		dma_fence_chain_for_each(iter, fence) {
 			fence_set_priority(to_dma_fence_chain(iter)->fence,
-					   attr);
+					   prio);
 			break;
 		}
 		dma_fence_put(iter);
 	} else {
-		fence_set_priority(fence, attr);
+		fence_set_priority(fence, prio);
 	}
 
 	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
@@ -149,7 +138,7 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence,
 int
 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			      unsigned int flags,
-			      const struct i915_sched_attr *attr)
+			      int prio)
 {
 	struct dma_fence *excl;
 
@@ -164,7 +153,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			return ret;
 
 		for (i = 0; i < count; i++) {
-			i915_gem_fence_wait_priority(shared[i], attr);
+			i915_gem_fence_wait_priority(shared[i], prio);
 			dma_fence_put(shared[i]);
 		}
 
@@ -174,7 +163,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 	}
 
 	if (excl) {
-		i915_gem_fence_wait_priority(excl, attr);
+		i915_gem_fence_wait_priority(excl, prio);
 		dma_fence_put(excl);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 13ef5725ef51..2be0db187ee6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -326,9 +326,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	if (engine->context_size)
 		DRIVER_CAPS(i915)->has_logical_contexts = true;
 
-	/* Nothing to do here, execute in order of dependencies */
-	engine->schedule = NULL;
-
 	ewma__engine_latency_init(&engine->latency);
 
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 778bcae5ef2c..0b026cde9f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -114,7 +114,7 @@ static void heartbeat(struct work_struct *wrk)
 			 * but all other contexts, including the kernel
 			 * context are stuck waiting for the signal.
 			 */
-		} else if (engine->schedule &&
+		} else if (intel_engine_has_scheduler(engine) &&
 			   rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 			/*
 			 * Gradually raise the priority of the heartbeat to
@@ -129,7 +129,7 @@ static void heartbeat(struct work_struct *wrk)
 				attr.priority = I915_PRIORITY_BARRIER;
 
 			local_bh_disable();
-			engine->schedule(rq, &attr);
+			i915_request_set_priority(rq, attr.priority);
 			local_bh_enable();
 		} else {
 			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c9974de2dd00..7cc5f4ca3f56 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -454,14 +454,6 @@ struct intel_engine_cs {
 	void            (*bond_execute)(struct i915_request *rq,
 					struct dma_fence *signal);
 
-	/*
-	 * Call when the priority on a request has changed and it and its
-	 * dependencies may need rescheduling. Note the request itself may
-	 * not be ready to run!
-	 */
-	void		(*schedule)(struct i915_request *request,
-				    const struct i915_sched_attr *attr);
-
 	void		(*release)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
@@ -479,12 +471,13 @@ struct intel_engine_cs {
 
 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
-#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
-#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
-#define I915_ENGINE_HAS_TIMESLICES   BIT(4)
-#define I915_ENGINE_IS_VIRTUAL       BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_HAS_SCHEDULER    BIT(2)
+#define I915_ENGINE_HAS_PREEMPTION   BIT(3)
+#define I915_ENGINE_HAS_SEMAPHORES   BIT(4)
+#define I915_ENGINE_HAS_TIMESLICES   BIT(5)
+#define I915_ENGINE_IS_VIRTUAL       BIT(6)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
 	unsigned int flags;
 
 	/*
@@ -567,6 +560,12 @@ intel_engine_supports_stats(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
 }
 
+static inline bool
+intel_engine_has_scheduler(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_SCHEDULER;
+}
+
 static inline bool
 intel_engine_has_preemption(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 1cbd84eb24e4..64eccdf32a22 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -107,7 +107,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 	for_each_uabi_engine(engine, i915) { /* all engines must agree! */
 		int i;
 
-		if (engine->schedule)
+		if (intel_engine_has_scheduler(engine))
 			enabled |= (I915_SCHEDULER_CAP_ENABLED |
 				    I915_SCHEDULER_CAP_PRIORITY);
 		else
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 554eaaa268a7..f95a35383f48 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3106,7 +3106,6 @@ static bool can_preempt(struct intel_engine_cs *engine)
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
-	engine->schedule = i915_schedule;
 	engine->execlists.tasklet.callback = execlists_submission_tasklet;
 }
 
@@ -3168,6 +3167,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	intel_engine_set_irq_handler(engine, execlists_irq_handler);
 
+	engine->flags |= I915_ENGINE_HAS_SCHEDULER;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915)) {
 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
@@ -3680,7 +3680,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 	ve->base.cops = &virtual_context_ops;
 	ve->base.request_alloc = execlists_request_alloc;
 
-	ve->base.schedule = i915_schedule;
 	ve->base.submit_request = virtual_submit_request;
 	ve->base.bond_execute = virtual_bond_execute;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 9304a35384aa..951e2bf867e1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -268,12 +268,8 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
 		i915_request_put(rq[0]);
 
 		if (prio) {
-			struct i915_sched_attr attr = {
-				.priority = prio,
-			};
-
 			/* Alternatively preempt the spinner with ce[1] */
-			engine->schedule(rq[1], &attr);
+			i915_request_set_priority(rq[1], prio);
 		}
 
 		/* And switch back to ce[0] for good measure */
@@ -873,9 +869,6 @@ release_queue(struct intel_engine_cs *engine,
 	      struct i915_vma *vma,
 	      int idx, int prio)
 {
-	struct i915_sched_attr attr = {
-		.priority = prio,
-	};
 	struct i915_request *rq;
 	u32 *cs;
 
@@ -900,7 +893,7 @@ release_queue(struct intel_engine_cs *engine,
 	i915_request_add(rq);
 
 	local_bh_disable();
-	engine->schedule(rq, &attr);
+	i915_request_set_priority(rq, prio);
 	local_bh_enable(); /* kick tasklet */
 
 	i915_request_put(rq);
@@ -1310,7 +1303,6 @@ static int live_timeslice_queue(void *arg)
 		goto err_pin;
 
 	for_each_engine(engine, gt, id) {
-		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
 		struct i915_request *rq, *nop;
 
 		if (!intel_engine_has_preemption(engine))
@@ -1325,7 +1317,7 @@ static int live_timeslice_queue(void *arg)
 			err = PTR_ERR(rq);
 			goto err_heartbeat;
 		}
-		engine->schedule(rq, &attr);
+		i915_request_set_priority(rq, I915_PRIORITY_MAX);
 		err = wait_for_submit(engine, rq, HZ / 2);
 		if (err) {
 			pr_err("%s: Timed out trying to submit semaphores\n",
@@ -1806,7 +1798,6 @@ static int live_late_preempt(void *arg)
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
-	struct i915_sched_attr attr = {};
 	enum intel_engine_id id;
 	int err = -ENOMEM;
 
@@ -1866,8 +1857,7 @@ static int live_late_preempt(void *arg)
 			goto err_wedged;
 		}
 
-		attr.priority = I915_PRIORITY_MAX;
-		engine->schedule(rq, &attr);
+		i915_request_set_priority(rq, I915_PRIORITY_MAX);
 
 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 			pr_err("High priority context failed to preempt the low priority context\n");
@@ -2412,7 +2402,6 @@ static int live_preempt_cancel(void *arg)
 
 static int live_suppress_self_preempt(void *arg)
 {
-	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct preempt_client a, b;
@@ -2480,7 +2469,7 @@ static int live_suppress_self_preempt(void *arg)
 			i915_request_add(rq_b);
 
 			GEM_BUG_ON(i915_request_completed(rq_a));
-			engine->schedule(rq_a, &attr);
+			i915_request_set_priority(rq_a, I915_PRIORITY_MAX);
 			igt_spinner_end(&a.spin);
 
 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
@@ -2545,7 +2534,6 @@ static int live_chain_preempt(void *arg)
 		goto err_client_hi;
 
 	for_each_engine(engine, gt, id) {
-		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
 		struct igt_live_test t;
 		struct i915_request *rq;
 		int ring_size, count, i;
@@ -2612,7 +2600,7 @@ static int live_chain_preempt(void *arg)
 
 			i915_request_get(rq);
 			i915_request_add(rq);
-			engine->schedule(rq, &attr);
+			i915_request_set_priority(rq, I915_PRIORITY_MAX);
 
 			igt_spinner_end(&hi.spin);
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
@@ -2964,14 +2952,12 @@ static int live_preempt_gang(void *arg)
 			return -EIO;
 
 		do {
-			struct i915_sched_attr attr = { .priority = prio++ };
-
 			err = create_gang(engine, &rq);
 			if (err)
 				break;
 
 			/* Submit each spinner at increasing priority */
-			engine->schedule(rq, &attr);
+			i915_request_set_priority(rq, prio++);
 		} while (prio <= I915_PRIORITY_MAX &&
 			 !__igt_timeout(end_time, NULL));
 		pr_debug("%s: Preempt chain of %d requests\n",
@@ -3192,9 +3178,6 @@ static int preempt_user(struct intel_engine_cs *engine,
 			struct i915_vma *global,
 			int id)
 {
-	struct i915_sched_attr attr = {
-		.priority = I915_PRIORITY_MAX
-	};
 	struct i915_request *rq;
 	int err = 0;
 	u32 *cs;
@@ -3219,7 +3202,7 @@ static int preempt_user(struct intel_engine_cs *engine,
 	i915_request_get(rq);
 	i915_request_add(rq);
 
-	engine->schedule(rq, &attr);
+	i915_request_set_priority(rq, I915_PRIORITY_MAX);
 
 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
 		err = -ETIME;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index d6ce4075602c..8cad102922e7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -858,12 +858,11 @@ static int active_engine(void *data)
 		rq[idx] = i915_request_get(new);
 		i915_request_add(new);
 
-		if (engine->schedule && arg->flags & TEST_PRIORITY) {
-			struct i915_sched_attr attr = {
-				.priority =
-					i915_prandom_u32_max_state(512, &prng),
-			};
-			engine->schedule(rq[idx], &attr);
+		if (intel_engine_has_scheduler(engine) &&
+		    arg->flags & TEST_PRIORITY) {
+			int prio = i915_prandom_u32_max_state(512, &prng);
+
+			i915_request_set_priority(rq[idx], prio);
 		}
 
 		err = active_request_put(old);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 335719f17490..2f5c39091d49 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -638,8 +638,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	engine->cops = &guc_context_ops;
 	engine->request_alloc = guc_request_alloc;
 
-	engine->schedule = i915_schedule;
-
 	engine->reset.prepare = guc_reset_prepare;
 	engine->reset.rewind = guc_reset_rewind;
 	engine->reset.cancel = guc_reset_cancel;
@@ -654,6 +652,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	engine->set_default_submission = guc_set_default_submission;
 
+	engine->flags |= I915_ENGINE_HAS_SCHEDULER;
 	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a336d6c40d8b..916e74fbab6c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1223,7 +1223,7 @@ __i915_request_await_execution(struct i915_request *to,
 	}
 
 	/* Couple the dependency tree for PI on this exposed to->fence */
-	if (to->engine->schedule) {
+	if (i915_request_use_scheduler(to)) {
 		err = i915_sched_node_add_dependency(&to->sched,
 						     &from->sched,
 						     I915_DEPENDENCY_WEAK);
@@ -1364,7 +1364,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 		return 0;
 	}
 
-	if (to->engine->schedule) {
+	if (i915_request_use_scheduler(to)) {
 		ret = i915_sched_node_add_dependency(&to->sched,
 						     &from->sched,
 						     I915_DEPENDENCY_EXTERNAL);
@@ -1551,7 +1551,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 			__i915_sw_fence_await_dma_fence(&rq->submit,
 							&prev->fence,
 							&rq->dmaq);
-		if (rq->engine->schedule)
+		if (i915_request_use_scheduler(rq))
 			__i915_sched_node_add_dependency(&rq->sched,
 							 &prev->sched,
 							 &rq->dep,
@@ -1623,8 +1623,8 @@ void __i915_request_queue(struct i915_request *rq,
 	 * decide whether to preempt the entire chain so that it is ready to
 	 * run at the earliest possible convenience.
 	 */
-	if (attr && rq->engine->schedule)
-		rq->engine->schedule(rq, attr);
+	if (attr)
+		i915_request_set_priority(rq, attr->priority);
 
 	local_bh_disable();
 	__i915_request_queue_bh(rq);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c0bd4cb8786a..9ce074ffc1dd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -616,4 +616,9 @@ i915_request_active_timeline(const struct i915_request *rq)
 					 lockdep_is_held(&rq->engine->active.lock));
 }
 
+static inline bool i915_request_use_scheduler(const struct i915_request *rq)
+{
+	return intel_engine_has_scheduler(rq->engine);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 85d18037a915..84a55df88687 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -227,10 +227,8 @@ static void kick_submission(struct intel_engine_cs *engine,
 	rcu_read_unlock();
 }
 
-static void __i915_schedule(struct i915_sched_node *node,
-			    const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_sched_node *node, int prio)
 {
-	const int prio = max(attr->priority, node->attr.priority);
 	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
@@ -244,6 +242,8 @@ static void __i915_schedule(struct i915_sched_node *node,
 	if (node_signaled(node))
 		return;
 
+	prio = max(prio, node->attr.priority);
+
 	stack.signaler = node;
 	list_add(&stack.dfs_link, &dfs);
 
@@ -297,7 +297,7 @@ static void __i915_schedule(struct i915_sched_node *node,
 	 */
 	if (node->attr.priority == I915_PRIORITY_INVALID) {
 		GEM_BUG_ON(!list_empty(&node->link));
-		node->attr = *attr;
+		node->attr.priority = prio;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
 			return;
@@ -352,10 +352,13 @@ static void __i915_schedule(struct i915_sched_node *node,
 	spin_unlock(&engine->active.lock);
 }
 
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+void i915_request_set_priority(struct i915_request *rq, int prio)
 {
+	if (!i915_request_use_scheduler(rq))
+		return;
+
 	spin_lock_irq(&schedule_lock);
-	__i915_schedule(&rq->sched, attr);
+	__i915_schedule(&rq->sched, prio);
 	spin_unlock_irq(&schedule_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 8c5ed6fe0994..a045be784c67 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -35,8 +35,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
 
 void i915_sched_node_retire(struct i915_sched_node *node);
 
-void i915_schedule(struct i915_request *request,
-		   const struct i915_sched_attr *attr);
+void i915_request_set_priority(struct i915_request *request, int prio);
 
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 05/12] drm/i915: Restructure priority inheritance
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (2 preceding siblings ...)
  2021-02-03 11:17 ` [Intel-gfx] [CI 04/12] drm/i915: Replace engine->schedule() with a known request operation Chris Wilson
@ 2021-02-03 11:17 ` Chris Wilson
  2021-02-03 11:18 ` [Intel-gfx] [CI 06/12] drm/i915/selftests: Measure set-priority duration Chris Wilson
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:17 UTC (permalink / raw)
  To: intel-gfx

In anticipation of wanting to be able to call pi from underneath an
engine's active.lock, rework the priority inheritance to primarily work
along an engine's priority queue, delegating any other engine that the
chain may traverse to a worker. This reduces the global spinlock from
governing the entire multi-engine priority inheritance depth-first search,
to a smaller lock on each engine around a single list on that engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   3 +
 drivers/gpu/drm/i915/i915_scheduler.c         | 356 +++++++++++-------
 drivers/gpu/drm/i915/i915_scheduler.h         |   3 +
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  23 +-
 6 files changed, 249 insertions(+), 141 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 2be0db187ee6..b65a411aa7ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -582,6 +582,8 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
+
+	i915_sched_init_ipi(&execlists->ipi);
 }
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 0b026cde9f09..48a91c0dbad6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -114,8 +114,7 @@ static void heartbeat(struct work_struct *wrk)
 			 * but all other contexts, including the kernel
 			 * context are stuck waiting for the signal.
 			 */
-		} else if (intel_engine_has_scheduler(engine) &&
-			   rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+		} else if (rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 			/*
 			 * Gradually raise the priority of the heartbeat to
 			 * give high priority work [which presumably desires
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7cc5f4ca3f56..7159f9575e65 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -20,6 +20,7 @@
 #include "i915_gem.h"
 #include "i915_pmu.h"
 #include "i915_priolist_types.h"
+#include "i915_scheduler_types.h"
 #include "i915_selftest.h"
 #include "intel_breadcrumbs_types.h"
 #include "intel_sseu.h"
@@ -257,6 +258,8 @@ struct intel_engine_execlists {
 	struct rb_root_cached queue;
 	struct rb_root_cached virtual;
 
+	struct i915_sched_ipi ipi;
+
 	/**
 	 * @csb_write: control register for Context Switch buffer
 	 *
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 84a55df88687..035e4be5d573 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -17,7 +17,25 @@ static struct i915_global_scheduler {
 	struct kmem_cache *slab_priorities;
 } global;
 
-static DEFINE_SPINLOCK(schedule_lock);
+/*
+ * Virtual engines complicate acquiring the engine timeline lock,
+ * as their rq->engine pointer is not stable until under that
+ * engine lock. The simple ploy we use is to take the lock then
+ * check that the rq still belongs to the newly locked engine.
+ */
+#define lock_engine_irqsave(rq, flags) ({ \
+	struct i915_request * const rq__ = (rq); \
+	struct intel_engine_cs *engine__ = READ_ONCE(rq__->engine); \
+\
+	spin_lock_irqsave(&engine__->active.lock, (flags)); \
+	while (engine__ != READ_ONCE((rq__)->engine)) { \
+		spin_unlock(&engine__->active.lock); \
+		engine__ = READ_ONCE(rq__->engine); \
+		spin_lock(&engine__->active.lock); \
+	} \
+\
+	engine__; \
+})
 
 static struct i915_sched_node *node_get(struct i915_sched_node *node)
 {
@@ -30,17 +48,104 @@ static void node_put(struct i915_sched_node *node)
 	i915_request_put(container_of(node, struct i915_request, sched));
 }
 
+static inline int rq_prio(const struct i915_request *rq)
+{
+	return READ_ONCE(rq->sched.attr.priority);
+}
+
+static int ipi_get_prio(struct i915_request *rq)
+{
+	if (READ_ONCE(rq->sched.ipi_priority) == I915_PRIORITY_INVALID)
+		return I915_PRIORITY_INVALID;
+
+	return xchg(&rq->sched.ipi_priority, I915_PRIORITY_INVALID);
+}
+
+static void ipi_schedule(struct work_struct *wrk)
+{
+	struct i915_sched_ipi *ipi = container_of(wrk, typeof(*ipi), work);
+	struct i915_request *rq = xchg(&ipi->list, NULL);
+
+	do {
+		struct i915_request *rn = xchg(&rq->sched.ipi_link, NULL);
+		int prio;
+
+		prio = ipi_get_prio(rq);
+
+		/*
+		 * For cross-engine scheduling to work we rely on one of two
+		 * things:
+		 *
+		 * a) The requests are using dma-fence fences and so will not
+		 * be scheduled until the previous engine is completed, and
+		 * so we cannot cross back onto the original engine and end up
+		 * queuing an earlier request after the first (due to the
+		 * interrupted DFS).
+		 *
+		 * b) The requests are using semaphores and so may be already
+		 * be in flight, in which case if we cross back onto the same
+		 * engine, we will already have put the interrupted DFS into
+		 * the priolist, and the continuation will now be queued
+		 * afterwards [out-of-order]. However, since we are using
+		 * semaphores in this case, we also perform yield on semaphore
+		 * waits and so will reorder the requests back into the correct
+		 * sequence. This occurrence (of promoting a request chain
+		 * that crosses the engines using semaphores back unto itself)
+		 * should be unlikely enough that it probably does not matter...
+		 */
+		local_bh_disable();
+		i915_request_set_priority(rq, prio);
+		local_bh_enable();
+
+		i915_request_put(rq);
+		rq = ptr_mask_bits(rn, 1);
+	} while (rq);
+}
+
+void i915_sched_init_ipi(struct i915_sched_ipi *ipi)
+{
+	INIT_WORK(&ipi->work, ipi_schedule);
+	ipi->list = NULL;
+}
+
+static void __ipi_add(struct i915_request *rq)
+{
+#define STUB ((struct i915_request *)1)
+	struct intel_engine_cs *engine = READ_ONCE(rq->engine);
+	struct i915_request *first;
+
+	if (!i915_request_get_rcu(rq))
+		return;
+
+	/*
+	 * We only want to add the request once into the ipi.list (or else
+	 * the chain will be broken). The worker must be guaranteed to run
+	 * at least once for every call to ipi_add, but it is allowed to
+	 * coalesce multiple ipi_add into a single pass using the final
+	 * property value.
+	 */
+	if (__i915_request_is_complete(rq) ||
+	    cmpxchg(&rq->sched.ipi_link, NULL, STUB)) { /* already queued */
+		i915_request_put(rq);
+		return;
+	}
+
+	/* Carefully insert ourselves into the head of the llist */
+	first = READ_ONCE(engine->execlists.ipi.list);
+	do {
+		rq->sched.ipi_link = ptr_pack_bits(first, 1, 1);
+	} while (!try_cmpxchg(&engine->execlists.ipi.list, &first, rq));
+
+	if (!first)
+		queue_work(system_unbound_wq, &engine->execlists.ipi.work);
+}
+
 static const struct i915_request *
 node_to_request(const struct i915_sched_node *node)
 {
 	return container_of(node, const struct i915_request, sched);
 }
 
-static inline bool node_started(const struct i915_sched_node *node)
-{
-	return i915_request_started(node_to_request(node));
-}
-
 static inline bool node_signaled(const struct i915_sched_node *node)
 {
 	return i915_request_completed(node_to_request(node));
@@ -137,42 +242,6 @@ void __i915_priolist_free(struct i915_priolist *p)
 	kmem_cache_free(global.slab_priorities, p);
 }
 
-struct sched_cache {
-	struct list_head *priolist;
-};
-
-static struct intel_engine_cs *
-sched_lock_engine(const struct i915_sched_node *node,
-		  struct intel_engine_cs *locked,
-		  struct sched_cache *cache)
-{
-	const struct i915_request *rq = node_to_request(node);
-	struct intel_engine_cs *engine;
-
-	GEM_BUG_ON(!locked);
-
-	/*
-	 * Virtual engines complicate acquiring the engine timeline lock,
-	 * as their rq->engine pointer is not stable until under that
-	 * engine lock. The simple ploy we use is to take the lock then
-	 * check that the rq still belongs to the newly locked engine.
-	 */
-	while (locked != (engine = READ_ONCE(rq->engine))) {
-		spin_unlock(&locked->active.lock);
-		memset(cache, 0, sizeof(*cache));
-		spin_lock(&engine->active.lock);
-		locked = engine;
-	}
-
-	GEM_BUG_ON(locked != engine);
-	return locked;
-}
-
-static inline int rq_prio(const struct i915_request *rq)
-{
-	return rq->sched.attr.priority;
-}
-
 static inline bool need_preempt(int prio, int active)
 {
 	/*
@@ -198,19 +267,17 @@ static void kick_submission(struct intel_engine_cs *engine,
 	if (prio <= engine->execlists.queue_priority_hint)
 		return;
 
-	rcu_read_lock();
-
 	/* Nothing currently active? We're overdue for a submission! */
 	inflight = execlists_active(&engine->execlists);
 	if (!inflight)
-		goto unlock;
+		return;
 
 	/*
 	 * If we are already the currently executing context, don't
 	 * bother evaluating if we should preempt ourselves.
 	 */
 	if (inflight->context == rq->context)
-		goto unlock;
+		return;
 
 	ENGINE_TRACE(engine,
 		     "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
@@ -222,30 +289,28 @@ static void kick_submission(struct intel_engine_cs *engine,
 	engine->execlists.queue_priority_hint = prio;
 	if (need_preempt(prio, rq_prio(inflight)))
 		tasklet_hi_schedule(&engine->execlists.tasklet);
-
-unlock:
-	rcu_read_unlock();
 }
 
-static void __i915_schedule(struct i915_sched_node *node, int prio)
+static void ipi_priority(struct i915_request *rq, int prio)
 {
-	struct intel_engine_cs *engine;
-	struct i915_dependency *dep, *p;
-	struct i915_dependency stack;
-	struct sched_cache cache;
+	int old = READ_ONCE(rq->sched.ipi_priority);
+
+	do {
+		if (prio <= old)
+			return;
+	} while (!try_cmpxchg(&rq->sched.ipi_priority, &old, prio));
+
+	__ipi_add(rq);
+}
+
+static void __i915_request_set_priority(struct i915_request *rq, int prio)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_request *rn;
+	struct list_head *plist;
 	LIST_HEAD(dfs);
 
-	/* Needed in order to use the temporary link inside i915_dependency */
-	lockdep_assert_held(&schedule_lock);
-	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
-
-	if (node_signaled(node))
-		return;
-
-	prio = max(prio, node->attr.priority);
-
-	stack.signaler = node;
-	list_add(&stack.dfs_link, &dfs);
+	list_add(&rq->sched.dfs, &dfs);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
@@ -265,66 +330,41 @@ static void __i915_schedule(struct i915_sched_node *node, int prio)
 	 * end result is a topological list of requests in reverse order, the
 	 * last element in the list is the request we must execute first.
 	 */
-	list_for_each_entry(dep, &dfs, dfs_link) {
-		struct i915_sched_node *node = dep->signaler;
+	list_for_each_entry(rq, &dfs, sched.dfs) {
+		struct i915_dependency *p;
 
-		/* If we are already flying, we know we have no signalers */
-		if (node_started(node))
-			continue;
+		/* Also release any children on this engine that are ready */
+		GEM_BUG_ON(rq->engine != engine);
 
-		/*
-		 * Within an engine, there can be no cycle, but we may
-		 * refer to the same dependency chain multiple times
-		 * (redundant dependencies are not eliminated) and across
-		 * engines.
-		 */
-		list_for_each_entry(p, &node->signalers_list, signal_link) {
-			GEM_BUG_ON(p == dep); /* no cycles! */
+		for_each_signaler(p, rq) {
+			struct i915_request *s =
+				container_of(p->signaler, typeof(*s), sched);
 
-			if (node_signaled(p->signaler))
+			GEM_BUG_ON(s == rq);
+
+			if (rq_prio(s) >= prio)
 				continue;
 
-			if (prio > READ_ONCE(p->signaler->attr.priority))
-				list_move_tail(&p->dfs_link, &dfs);
+			if (__i915_request_is_complete(s))
+				continue;
+
+			if (s->engine != rq->engine) {
+				ipi_priority(s, prio);
+				continue;
+			}
+
+			list_move_tail(&s->sched.dfs, &dfs);
 		}
 	}
 
-	/*
-	 * If we didn't need to bump any existing priorities, and we haven't
-	 * yet submitted this request (i.e. there is no potential race with
-	 * execlists_submit_request()), we can set our own priority and skip
-	 * acquiring the engine locks.
-	 */
-	if (node->attr.priority == I915_PRIORITY_INVALID) {
-		GEM_BUG_ON(!list_empty(&node->link));
-		node->attr.priority = prio;
+	plist = i915_sched_lookup_priolist(engine, prio);
 
-		if (stack.dfs_link.next == stack.dfs_link.prev)
-			return;
+	/* Fifo and depth-first replacement ensure our deps execute first */
+	list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) {
+		GEM_BUG_ON(rq->engine != engine);
 
-		__list_del_entry(&stack.dfs_link);
-	}
-
-	memset(&cache, 0, sizeof(cache));
-	engine = node_to_request(node)->engine;
-	spin_lock(&engine->active.lock);
-
-	/* Fifo and depth-first replacement ensure our deps execute before us */
-	engine = sched_lock_engine(node, engine, &cache);
-	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-		INIT_LIST_HEAD(&dep->dfs_link);
-
-		node = dep->signaler;
-		engine = sched_lock_engine(node, engine, &cache);
-		lockdep_assert_held(&engine->active.lock);
-
-		/* Recheck after acquiring the engine->timeline.lock */
-		if (prio <= node->attr.priority || node_signaled(node))
-			continue;
-
-		GEM_BUG_ON(node_to_request(node)->engine != engine);
-
-		WRITE_ONCE(node->attr.priority, prio);
+		INIT_LIST_HEAD(&rq->sched.dfs);
+		WRITE_ONCE(rq->sched.attr.priority, prio);
 
 		/*
 		 * Once the request is ready, it will be placed into the
@@ -334,32 +374,79 @@ static void __i915_schedule(struct i915_sched_node *node, int prio)
 		 * any preemption required, be dealt with upon submission.
 		 * See engine->submit_request()
 		 */
-		if (list_empty(&node->link))
+		if (!i915_request_is_ready(rq))
 			continue;
 
-		if (i915_request_in_priority_queue(node_to_request(node))) {
-			if (!cache.priolist)
-				cache.priolist =
-					i915_sched_lookup_priolist(engine,
-								   prio);
-			list_move_tail(&node->link, cache.priolist);
-		}
+		if (i915_request_in_priority_queue(rq))
+			list_move_tail(&rq->sched.link, plist);
 
-		/* Defer (tasklet) submission until after all of our updates. */
-		kick_submission(engine, node_to_request(node), prio);
+		/* Defer (tasklet) submission until after all updates. */
+		kick_submission(engine, rq, prio);
 	}
-
-	spin_unlock(&engine->active.lock);
 }
 
+#define all_signalers_checked(p, rq) \
+	list_entry_is_head(p, &(rq)->sched.signalers_list, signal_link)
+
 void i915_request_set_priority(struct i915_request *rq, int prio)
 {
-	if (!i915_request_use_scheduler(rq))
+	struct intel_engine_cs *engine;
+	unsigned long flags;
+
+	if (prio <= rq_prio(rq))
 		return;
 
-	spin_lock_irq(&schedule_lock);
-	__i915_schedule(&rq->sched, prio);
-	spin_unlock_irq(&schedule_lock);
+	/*
+	 * If we are setting the priority before being submitted, see if we
+	 * can quickly adjust our own priority in-situ and avoid taking
+	 * the contended engine->active.lock. If we need priority inheritance,
+	 * take the slow route.
+	 */
+	if (rq_prio(rq) == I915_PRIORITY_INVALID) {
+		struct i915_dependency *p;
+
+		rcu_read_lock();
+		for_each_signaler(p, rq) {
+			struct i915_request *s =
+				container_of(p->signaler, typeof(*s), sched);
+
+			if (rq_prio(s) >= prio)
+				continue;
+
+			if (__i915_request_is_complete(s))
+				continue;
+
+			break;
+		}
+		rcu_read_unlock();
+
+		/* Update priority in place if no PI required */
+		if (all_signalers_checked(p, rq) &&
+		    cmpxchg(&rq->sched.attr.priority,
+			    I915_PRIORITY_INVALID,
+			    prio) == I915_PRIORITY_INVALID)
+			return;
+	}
+
+	engine = lock_engine_irqsave(rq, flags);
+	if (prio <= rq_prio(rq))
+		goto unlock;
+
+	if (__i915_request_is_complete(rq))
+		goto unlock;
+
+	if (!intel_engine_has_scheduler(engine)) {
+		rq->sched.attr.priority = prio;
+		goto unlock;
+	}
+
+	rcu_read_lock();
+	__i915_request_set_priority(rq, prio);
+	rcu_read_unlock();
+	GEM_BUG_ON(rq_prio(rq) != prio);
+
+unlock:
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void i915_sched_node_init(struct i915_sched_node *node)
@@ -369,6 +456,9 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
+	INIT_LIST_HEAD(&node->dfs);
+
+	node->ipi_link = NULL;
 
 	i915_sched_node_reinit(node);
 }
@@ -379,6 +469,9 @@ void i915_sched_node_reinit(struct i915_sched_node *node)
 	node->semaphores = 0;
 	node->flags = 0;
 
+	GEM_BUG_ON(node->ipi_link);
+	node->ipi_priority = I915_PRIORITY_INVALID;
+
 	GEM_BUG_ON(!list_empty(&node->signalers_list));
 	GEM_BUG_ON(!list_empty(&node->waiters_list));
 	GEM_BUG_ON(!list_empty(&node->link));
@@ -414,7 +507,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 	spin_lock(&signal->lock);
 
 	if (!node_signaled(signal)) {
-		INIT_LIST_HEAD(&dep->dfs_link);
 		dep->signaler = signal;
 		dep->waiter = node_get(node);
 		dep->flags = flags;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index a045be784c67..2870fa3e089e 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -14,6 +14,7 @@
 #include "i915_scheduler_types.h"
 
 struct drm_printer;
+struct intel_engine_cs;
 
 #define priolist_for_each_request(it, plist) \
 	list_for_each_entry(it, &(plist)->requests, sched.link)
@@ -35,6 +36,8 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
 
 void i915_sched_node_retire(struct i915_sched_node *node);
 
+void i915_sched_init_ipi(struct i915_sched_ipi *ipi);
+
 void i915_request_set_priority(struct i915_request *request, int prio);
 
 struct list_head *
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 623bf41fcf35..2a5265d9aff1 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -8,13 +8,17 @@
 #define _I915_SCHEDULER_TYPES_H_
 
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
-#include "gt/intel_engine_types.h"
 #include "i915_priolist_types.h"
 
-struct drm_i915_private;
 struct i915_request;
-struct intel_engine_cs;
+
+/* Inter-engine scheduling delegation */
+struct i915_sched_ipi {
+	struct i915_request *list;
+	struct work_struct work;
+};
 
 struct i915_sched_attr {
 	/**
@@ -61,13 +65,19 @@ struct i915_sched_attr {
  */
 struct i915_sched_node {
 	spinlock_t lock; /* protect the lists */
+
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct list_head link;
+	struct list_head link; /* guarded by engine->active.lock */
+	struct list_head dfs; /* guarded by engine->active.lock */
 	struct i915_sched_attr attr;
-	unsigned int flags;
+	unsigned long flags;
 #define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(0)
-	intel_engine_mask_t semaphores;
+	unsigned long semaphores;
+
+	/* handle being scheduled for PI from outside of our active.lock */
+	struct i915_request *ipi_link;
+	int ipi_priority;
 };
 
 struct i915_dependency {
@@ -75,7 +85,6 @@ struct i915_dependency {
 	struct i915_sched_node *waiter;
 	struct list_head signal_link;
 	struct list_head wait_link;
-	struct list_head dfs_link;
 	struct rcu_head rcu;
 	unsigned long flags;
 #define I915_DEPENDENCY_ALLOC		BIT(0)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 06/12] drm/i915/selftests: Measure set-priority duration
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (3 preceding siblings ...)
  2021-02-03 11:17 ` [Intel-gfx] [CI 05/12] drm/i915: Restructure priority inheritance Chris Wilson
@ 2021-02-03 11:18 ` Chris Wilson
  2021-02-03 11:18 ` [Intel-gfx] [CI 07/12] drm/i915/selftests: Exercise priority inheritance around an engine loop Chris Wilson
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:18 UTC (permalink / raw)
  To: intel-gfx

As a topological sort, we expect it to run in linear graph time,
O(V+E). In removing the recursion, it is no longer a DFS but rather a
BFS, and performs as O(VE). Let's demonstrate how bad this is with a few
examples, and build a few test cases to verify a potential fix.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c         |   4 +
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 .../gpu/drm/i915/selftests/i915_scheduler.c   | 673 ++++++++++++++++++
 4 files changed, 679 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_scheduler.c

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 035e4be5d573..27bda7617b29 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -609,6 +609,10 @@ void i915_request_show_with_schedule(struct drm_printer *m,
 	rcu_read_unlock();
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_scheduler.c"
+#endif
+
 static void i915_global_scheduler_shrink(void)
 {
 	kmem_cache_shrink(global.slab_dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index a92c0e9b7e6b..2200a5baa68e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests)
 selftest(gt_pm, intel_gt_pm_live_selftests)
 selftest(gt_heartbeat, intel_heartbeat_live_selftests)
 selftest(requests, i915_request_live_selftests)
+selftest(scheduler, i915_scheduler_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(mman, i915_gem_mman_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index c2389f8a257d..137e35283fee 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -17,5 +17,6 @@
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
 selftest(request, i915_request_perf_selftests)
+selftest(scheduler, i915_scheduler_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
new file mode 100644
index 000000000000..9700d4e3302b
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_context.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/selftest_engine_heartbeat.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/i915_random.h"
+
+static void scheduling_disable(struct intel_engine_cs *engine)
+{
+	engine->props.preempt_timeout_ms = 0;
+	engine->props.timeslice_duration_ms = 0;
+
+	st_engine_heartbeat_disable(engine);
+}
+
+static void scheduling_enable(struct intel_engine_cs *engine)
+{
+	st_engine_heartbeat_enable(engine);
+
+	engine->props.preempt_timeout_ms =
+		engine->defaults.preempt_timeout_ms;
+	engine->props.timeslice_duration_ms =
+		engine->defaults.timeslice_duration_ms;
+}
+
+static int first_engine(struct drm_i915_private *i915,
+			int (*chain)(struct intel_engine_cs *engine,
+				     unsigned long param,
+				     bool (*fn)(struct i915_request *rq,
+						unsigned long v,
+						unsigned long e)),
+			unsigned long param,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		return chain(engine, param, fn);
+	}
+
+	return 0;
+}
+
+static int all_engines(struct drm_i915_private *i915,
+		       int (*chain)(struct intel_engine_cs *engine,
+				    unsigned long param,
+				    bool (*fn)(struct i915_request *rq,
+					       unsigned long v,
+					       unsigned long e)),
+		       unsigned long param,
+		       bool (*fn)(struct i915_request *rq,
+				  unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+	int err;
+
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		err = chain(engine, param, fn);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool check_context_order(struct intel_engine_cs *engine)
+{
+	u64 last_seqno, last_context;
+	unsigned long count;
+	bool result = false;
+	struct rb_node *rb;
+	int last_prio;
+
+	/* We expect the execution order to follow ascending fence-context */
+	spin_lock_irq(&engine->active.lock);
+
+	count = 0;
+	last_context = 0;
+	last_seqno = 0;
+	last_prio = 0;
+	for (rb = rb_first_cached(&engine->execlists.queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct i915_request *rq;
+
+		priolist_for_each_request(rq, p) {
+			if (rq->fence.context < last_context ||
+			    (rq->fence.context == last_context &&
+			     rq->fence.seqno < last_seqno)) {
+				pr_err("[%lu] %llx:%lld [prio:%d] after %llx:%lld [prio:%d]\n",
+				       count,
+				       rq->fence.context,
+				       rq->fence.seqno,
+				       rq_prio(rq),
+				       last_context,
+				       last_seqno,
+				       last_prio);
+				goto out_unlock;
+			}
+
+			last_context = rq->fence.context;
+			last_seqno = rq->fence.seqno;
+			last_prio = rq_prio(rq);
+			count++;
+		}
+	}
+	result = true;
+out_unlock:
+	spin_unlock_irq(&engine->active.lock);
+
+	return result;
+}
+
+static int __single_chain(struct intel_engine_cs *engine, unsigned long length,
+			  bool (*fn)(struct i915_request *rq,
+				     unsigned long v, unsigned long e))
+{
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	unsigned long count;
+	unsigned long min;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto err_spin;
+	}
+	ce->ring = __intel_context_ring_size(SZ_512K);
+
+	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_context;
+	}
+	i915_request_add(rq);
+	min = ce->ring->size - ce->ring->space;
+
+	count = 1;
+	while (count < length && ce->ring->space > min) {
+		rq = intel_context_create_request(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+		i915_request_add(rq);
+		count++;
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq, count, count - 1) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_context:
+	intel_context_put(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __wide_chain(struct intel_engine_cs *engine, unsigned long width,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+		for (j = 0; j < i; j++) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			if (err)
+				break;
+			count++;
+		}
+		i915_request_add(rq[i]);
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __inv_chain(struct intel_engine_cs *engine, unsigned long width,
+		       bool (*fn)(struct i915_request *rq,
+				  unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+		for (j = i; j > 0; j--) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j - 1]->fence);
+			if (err)
+				break;
+			count++;
+		}
+		i915_request_add(rq[i]);
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __sparse_chain(struct intel_engine_cs *engine, unsigned long width,
+			  bool (*fn)(struct i915_request *rq,
+				     unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	I915_RND_STATE(prng);
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+
+		if (err == 0 && i > 1) {
+			j = i915_prandom_u32_max_state(i - 1, &prng);
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			count++;
+		}
+
+		if (err == 0) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[i - 1]->fence);
+			count++;
+		}
+
+		if (err == 0 && i > 2) {
+			j = i915_prandom_u32_max_state(i - 2, &prng);
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			count++;
+		}
+
+		i915_request_add(rq[i]);
+		if (err)
+			break;
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int igt_schedule_chains(struct drm_i915_private *i915,
+			       bool (*fn)(struct i915_request *rq,
+					  unsigned long v, unsigned long e))
+{
+	static int (* const chains[])(struct intel_engine_cs *engine,
+				      unsigned long length,
+				      bool (*fn)(struct i915_request *rq,
+						 unsigned long v, unsigned long e)) = {
+		__single_chain,
+		__wide_chain,
+		__inv_chain,
+		__sparse_chain,
+	};
+	int n, err;
+
+	for (n = 0; n < ARRAY_SIZE(chains); n++) {
+		err = all_engines(i915, chains[n], 17, fn);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool igt_priority(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	i915_request_set_priority(rq, I915_PRIORITY_BARRIER);
+	GEM_BUG_ON(rq_prio(rq) != I915_PRIORITY_BARRIER);
+	return true;
+}
+
+static int igt_priority_chains(void *arg)
+{
+	return igt_schedule_chains(arg, igt_priority);
+}
+
+int i915_scheduler_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_priority_chains),
+	};
+
+	return i915_subtests(tests, i915);
+}
+
+static int chains(struct drm_i915_private *i915,
+		  int (*chain)(struct drm_i915_private *i915,
+			       unsigned long length,
+			       bool (*fn)(struct i915_request *rq,
+					  unsigned long v, unsigned long e)),
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	unsigned long x[] = { 1, 4, 16, 64, 128, 256, 512, 1024, 4096 };
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(x); i++) {
+		IGT_TIMEOUT(end_time);
+
+		err = chain(i915, x[i], fn);
+		if (err)
+			return err;
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	return 0;
+}
+
+static int single_chain(struct drm_i915_private *i915,
+			unsigned long length,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __single_chain, length, fn);
+}
+
+static int single(struct drm_i915_private *i915,
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	return chains(i915, single_chain, fn);
+}
+
+static int wide_chain(struct drm_i915_private *i915,
+		      unsigned long width,
+		      bool (*fn)(struct i915_request *rq,
+				 unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __wide_chain, width, fn);
+}
+
+static int wide(struct drm_i915_private *i915,
+		bool (*fn)(struct i915_request *rq,
+			   unsigned long v, unsigned long e))
+{
+	return chains(i915, wide_chain, fn);
+}
+
+static int inv_chain(struct drm_i915_private *i915,
+		     unsigned long width,
+		     bool (*fn)(struct i915_request *rq,
+				unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __inv_chain, width, fn);
+}
+
+static int inv(struct drm_i915_private *i915,
+	       bool (*fn)(struct i915_request *rq,
+			  unsigned long v, unsigned long e))
+{
+	return chains(i915, inv_chain, fn);
+}
+
+static int sparse_chain(struct drm_i915_private *i915,
+			unsigned long width,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __sparse_chain, width, fn);
+}
+
+static int sparse(struct drm_i915_private *i915,
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	return chains(i915, sparse_chain, fn);
+}
+
+static void report(const char *what, unsigned long v, unsigned long e, u64 dt)
+{
+	pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt);
+}
+
+static u64 __set_priority(struct i915_request *rq, int prio)
+{
+	u64 dt;
+
+	preempt_disable();
+	dt = ktime_get_raw_fast_ns();
+	i915_request_set_priority(rq, prio);
+	dt = ktime_get_raw_fast_ns() - dt;
+	preempt_enable();
+
+	return dt;
+}
+
+static bool set_priority(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER));
+	return true;
+}
+
+static int single_priority(void *arg)
+{
+	return single(arg, set_priority);
+}
+
+static int wide_priority(void *arg)
+{
+	return wide(arg, set_priority);
+}
+
+static int inv_priority(void *arg)
+{
+	return inv(arg, set_priority);
+}
+
+static int sparse_priority(void *arg)
+{
+	return sparse(arg, set_priority);
+}
+
+int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(single_priority),
+		SUBTEST(wide_priority),
+		SUBTEST(inv_priority),
+		SUBTEST(sparse_priority),
+	};
+	static const struct {
+		const char *name;
+		size_t sz;
+	} types[] = {
+#define T(t) { #t, sizeof(struct t) }
+		T(i915_dependency),
+		T(i915_priolist),
+		T(i915_sched_attr),
+		T(i915_sched_node),
+		T(i915_request),
+#undef T
+		{}
+	};
+	typeof(*types) *t;
+
+	for (t = types; t->name; t++)
+		pr_info("sizeof(%s): %zd\n", t->name, t->sz);
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 07/12] drm/i915/selftests: Exercise priority inheritance around an engine loop
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (4 preceding siblings ...)
  2021-02-03 11:18 ` [Intel-gfx] [CI 06/12] drm/i915/selftests: Measure set-priority duration Chris Wilson
@ 2021-02-03 11:18 ` Chris Wilson
  2021-02-03 11:18 ` [Intel-gfx] [CI 08/12] drm/i915: Improve DFS for priority inheritance Chris Wilson
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:18 UTC (permalink / raw)
  To: intel-gfx

Exercise rescheduling priority inheritance around a sequence of requests
that wrap around all the engines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/selftests/i915_scheduler.c   | 215 ++++++++++++++++++
 1 file changed, 215 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
index 9700d4e3302b..298c31ee550c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -7,6 +7,7 @@
 
 #include "gt/intel_context.h"
 #include "gt/intel_gpu_commands.h"
+#include "gt/intel_ring.h"
 #include "gt/selftest_engine_heartbeat.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/i915_random.h"
@@ -504,10 +505,224 @@ static int igt_priority_chains(void *arg)
 	return igt_schedule_chains(arg, igt_priority);
 }
 
+static struct i915_request *
+__write_timestamp(struct intel_engine_cs *engine,
+		  struct drm_i915_gem_object *obj,
+		  int slot,
+		  struct i915_request *prev)
+{
+	struct i915_request *rq = ERR_PTR(-EINVAL);
+	bool use_64b = INTEL_GEN(engine->i915) >= 8;
+	struct intel_context *ce;
+	struct i915_vma *vma;
+	int err = 0;
+	u32 *cs;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_ce;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto out_ce;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto out_request;
+
+	if (prev) {
+		err = i915_request_await_dma_fence(rq, &prev->fence);
+		if (err)
+			goto out_request;
+	}
+
+	if (engine->emit_init_breadcrumb) {
+		err = engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto out_request;
+	}
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto out_request;
+	}
+
+	*cs++ = MI_STORE_REGISTER_MEM + use_64b;
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(engine->mmio_base));
+	*cs++ = lower_32_bits(vma->node.start) + sizeof(u32) * slot;
+	*cs++ = upper_32_bits(vma->node.start);
+	intel_ring_advance(rq, cs);
+
+	i915_request_get(rq);
+out_request:
+	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+out_ce:
+	intel_context_put(ce);
+	i915_request_put(prev);
+	return err ? ERR_PTR(err) : rq;
+}
+
+static struct i915_request *create_spinner(struct drm_i915_private *i915,
+					   struct igt_spinner *spin)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+		struct i915_request *rq;
+
+		if (igt_spinner_init(spin, engine->gt))
+			return ERR_PTR(-ENOMEM);
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			return ERR_CAST(ce);
+
+		rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+		intel_context_put(ce);
+		if (rq == ERR_PTR(-ENODEV))
+			continue;
+		if (IS_ERR(rq))
+			return rq;
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+		return rq;
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
+static bool has_timestamp(const struct drm_i915_private *i915)
+{
+	return INTEL_GEN(i915) >= 7;
+}
+
+static int __igt_schedule_cycle(struct drm_i915_private *i915,
+				bool (*fn)(struct i915_request *rq,
+					   unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+	struct drm_i915_gem_object *obj;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	unsigned long count, n;
+	u32 *time, last;
+	int err, loop;
+
+	/*
+	 * Queue a bunch of ordered requests (each waiting on the previous)
+	 * around the engines a couple of times. Each request will write
+	 * the timestamp it executes at into the scratch, with the expectation
+	 * that the timestamp will be in our desired execution order.
+	 */
+
+	if (!i915->caps.scheduler || !has_timestamp(i915))
+		return 0;
+
+	obj = i915_gem_object_create_internal(i915, SZ_64K);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	time = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(time)) {
+		err = PTR_ERR(time);
+		goto out_obj;
+	}
+
+	rq = create_spinner(i915, &spin);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_obj;
+	}
+
+	err = 0;
+	count = 0;
+	for (loop = 0; !err && loop < 3; loop++) {
+		for_each_uabi_engine(engine, i915) {
+			if (!intel_engine_has_scheduler(engine))
+				continue;
+
+			rq = __write_timestamp(engine, obj, count, rq);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				break;
+			}
+
+			count++;
+		}
+	}
+	GEM_BUG_ON(count * sizeof(u32) > obj->base.size);
+	if (err || !count)
+		goto out_spin;
+
+	fn(rq, count + 1, count);
+	igt_spinner_end(&spin);
+
+	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+		err = -ETIME;
+		goto out_request;
+	}
+
+	last = time[0];
+	for (n = 1; n < count; n++) {
+		if (i915_seqno_passed(last, time[n])) {
+			pr_err("Timestamp[%lu] %x before previous %x\n",
+			       n, time[n], last);
+			err = -EINVAL;
+			break;
+		}
+		last = time[n];
+	}
+
+out_request:
+	i915_request_put(rq);
+out_spin:
+	igt_spinner_fini(&spin);
+out_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static bool noop(struct i915_request *rq, unsigned long v, unsigned long e)
+{
+	return true;
+}
+
+static int igt_schedule_cycle(void *arg)
+{
+	return __igt_schedule_cycle(arg, noop);
+}
+
+static int igt_priority_cycle(void *arg)
+{
+	return __igt_schedule_cycle(arg, igt_priority);
+}
+
 int i915_scheduler_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_priority_chains),
+
+		SUBTEST(igt_schedule_cycle),
+		SUBTEST(igt_priority_cycle),
 	};
 
 	return i915_subtests(tests, i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 08/12] drm/i915: Improve DFS for priority inheritance
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (5 preceding siblings ...)
  2021-02-03 11:18 ` [Intel-gfx] [CI 07/12] drm/i915/selftests: Exercise priority inheritance around an engine loop Chris Wilson
@ 2021-02-03 11:18 ` Chris Wilson
  2021-02-03 11:18 ` [Intel-gfx] [CI 09/12] drm/i915: Extract request submission from execlists Chris Wilson
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:18 UTC (permalink / raw)
  To: intel-gfx

The core of the scheduling algorithm is that we compute the topological
order of the fence DAG. Knowing that we have a DAG, we should be able to
use a DFS to compute the topological sort in linear time. However,
during the conversion of the recursive algorithm into an iterative one,
the memoization of how far we had progressed down a branch was
forgotten. The result was that instead of running in linear time, it was
running in geometric time and could easily run for a few hundred
milliseconds given a wide enough graph, not the microseconds as required.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Andi Shyti <andi.shyti@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c       | 58 ++++++++++++---------
 drivers/gpu/drm/i915/i915_scheduler_types.h |  6 ++-
 2 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 27bda7617b29..9e88417bf451 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -242,6 +242,26 @@ void __i915_priolist_free(struct i915_priolist *p)
 	kmem_cache_free(global.slab_priorities, p);
 }
 
+static struct i915_request *
+stack_push(struct i915_request *rq,
+	   struct i915_request *prev,
+	   struct list_head *pos)
+{
+	prev->sched.dfs.pos = pos;
+	rq->sched.dfs.prev = prev;
+	return rq;
+}
+
+static struct i915_request *
+stack_pop(struct i915_request *rq,
+	  struct list_head **pos)
+{
+	rq = rq->sched.dfs.prev;
+	if (rq)
+		*pos = rq->sched.dfs.pos;
+	return rq;
+}
+
 static inline bool need_preempt(int prio, int active)
 {
 	/*
@@ -306,11 +326,10 @@ static void ipi_priority(struct i915_request *rq, int prio)
 static void __i915_request_set_priority(struct i915_request *rq, int prio)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_request *rn;
+	struct list_head *pos = &rq->sched.signalers_list;
 	struct list_head *plist;
-	LIST_HEAD(dfs);
 
-	list_add(&rq->sched.dfs, &dfs);
+	plist = i915_sched_lookup_priolist(engine, prio);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
@@ -330,40 +349,31 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 	 * end result is a topological list of requests in reverse order, the
 	 * last element in the list is the request we must execute first.
 	 */
-	list_for_each_entry(rq, &dfs, sched.dfs) {
-		struct i915_dependency *p;
-
-		/* Also release any children on this engine that are ready */
-		GEM_BUG_ON(rq->engine != engine);
-
-		for_each_signaler(p, rq) {
+	rq->sched.dfs.prev = NULL;
+	do {
+		list_for_each_continue(pos, &rq->sched.signalers_list) {
+			struct i915_dependency *p =
+				list_entry(pos, typeof(*p), signal_link);
 			struct i915_request *s =
 				container_of(p->signaler, typeof(*s), sched);
 
-			GEM_BUG_ON(s == rq);
-
 			if (rq_prio(s) >= prio)
 				continue;
 
 			if (__i915_request_is_complete(s))
 				continue;
 
-			if (s->engine != rq->engine) {
+			if (s->engine != engine) {
 				ipi_priority(s, prio);
 				continue;
 			}
 
-			list_move_tail(&s->sched.dfs, &dfs);
+			/* Remember our position along this branch */
+			rq = stack_push(s, rq, pos);
+			pos = &rq->sched.signalers_list;
 		}
-	}
 
-	plist = i915_sched_lookup_priolist(engine, prio);
-
-	/* Fifo and depth-first replacement ensure our deps execute first */
-	list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) {
-		GEM_BUG_ON(rq->engine != engine);
-
-		INIT_LIST_HEAD(&rq->sched.dfs);
+		RQ_TRACE(rq, "set-priority:%d\n", prio);
 		WRITE_ONCE(rq->sched.attr.priority, prio);
 
 		/*
@@ -377,12 +387,13 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 		if (!i915_request_is_ready(rq))
 			continue;
 
+		GEM_BUG_ON(rq->engine != engine);
 		if (i915_request_in_priority_queue(rq))
 			list_move_tail(&rq->sched.link, plist);
 
 		/* Defer (tasklet) submission until after all updates. */
 		kick_submission(engine, rq, prio);
-	}
+	} while ((rq = stack_pop(rq, &pos)));
 }
 
 #define all_signalers_checked(p, rq) \
@@ -456,7 +467,6 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
-	INIT_LIST_HEAD(&node->dfs);
 
 	node->ipi_link = NULL;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2a5265d9aff1..28138c3fcc81 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -69,7 +69,11 @@ struct i915_sched_node {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
 	struct list_head link; /* guarded by engine->active.lock */
-	struct list_head dfs; /* guarded by engine->active.lock */
+	struct i915_sched_stack {
+		/* Branch memoization used during depth-first search */
+		struct i915_request *prev;
+		struct list_head *pos;
+	} dfs; /* guarded by engine->active.lock */
 	struct i915_sched_attr attr;
 	unsigned long flags;
 #define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(0)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 09/12] drm/i915: Extract request submission from execlists
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (6 preceding siblings ...)
  2021-02-03 11:18 ` [Intel-gfx] [CI 08/12] drm/i915: Improve DFS for priority inheritance Chris Wilson
@ 2021-02-03 11:18 ` Chris Wilson
  2021-02-03 11:18 ` [Intel-gfx] [CI 10/12] drm/i915: Extract request rewinding " Chris Wilson
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:18 UTC (permalink / raw)
  To: intel-gfx

In the process of preparing to reuse the request submission logic for
other backends, lift it out of the execlists backend. It already
operates on the common structs, so just a matter of moving and renaming.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 55 +------------
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 30 +------
 drivers/gpu/drm/i915/i915_scheduler.c         | 82 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  2 +
 4 files changed, 86 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f95a35383f48..7b426e461ce3 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2453,59 +2453,6 @@ static void execlists_preempt(struct timer_list *timer)
 	execlists_kick(timer, preempt);
 }
 
-static void queue_request(struct intel_engine_cs *engine,
-			  struct i915_request *rq)
-{
-	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link,
-		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
-	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
-static bool submit_queue(struct intel_engine_cs *engine,
-			 const struct i915_request *rq)
-{
-	struct intel_engine_execlists *execlists = &engine->execlists;
-
-	if (rq_prio(rq) <= execlists->queue_priority_hint)
-		return false;
-
-	execlists->queue_priority_hint = rq_prio(rq);
-	return true;
-}
-
-static bool ancestor_on_hold(const struct intel_engine_cs *engine,
-			     const struct i915_request *rq)
-{
-	GEM_BUG_ON(i915_request_on_hold(rq));
-	return !list_empty(&engine->active.hold) && hold_request(rq);
-}
-
-static void execlists_submit_request(struct i915_request *request)
-{
-	struct intel_engine_cs *engine = request->engine;
-	unsigned long flags;
-
-	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->active.lock, flags);
-
-	if (unlikely(ancestor_on_hold(engine, request))) {
-		RQ_TRACE(request, "ancestor on hold\n");
-		list_add_tail(&request->sched.link, &engine->active.hold);
-		i915_request_set_hold(request);
-	} else {
-		queue_request(engine, request);
-
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-		GEM_BUG_ON(list_empty(&request->sched.link));
-
-		if (submit_queue(engine, request))
-			__execlists_kick(&engine->execlists);
-	}
-
-	spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
 static int execlists_context_pre_pin(struct intel_context *ce,
 				     struct i915_gem_ww_ctx *ww,
 				     void **vaddr)
@@ -3105,7 +3052,7 @@ static bool can_preempt(struct intel_engine_cs *engine)
 
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = execlists_submit_request;
+	engine->submit_request = i915_request_enqueue;
 	engine->execlists.tasklet.callback = execlists_submission_tasklet;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 2f5c39091d49..aa34a2f3d6d5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -520,34 +520,6 @@ static int guc_request_alloc(struct i915_request *request)
 	return 0;
 }
 
-static inline void queue_request(struct intel_engine_cs *engine,
-				 struct i915_request *rq,
-				 int prio)
-{
-	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link,
-		      i915_sched_lookup_priolist(engine, prio));
-	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
-static void guc_submit_request(struct i915_request *rq)
-{
-	struct intel_engine_cs *engine = rq->engine;
-	unsigned long flags;
-
-	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->active.lock, flags);
-
-	queue_request(engine, rq, rq_prio(rq));
-
-	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-	GEM_BUG_ON(list_empty(&rq->sched.link));
-
-	tasklet_hi_schedule(&engine->execlists.tasklet);
-
-	spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
 static void sanitize_hwsp(struct intel_engine_cs *engine)
 {
 	struct intel_timeline *tl;
@@ -616,7 +588,7 @@ static int guc_resume(struct intel_engine_cs *engine)
 
 static void guc_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = guc_submit_request;
+	engine->submit_request = i915_request_enqueue;
 }
 
 static void guc_release(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 9e88417bf451..a56252d3546a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -460,6 +460,88 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void queue_request(struct intel_engine_cs *engine,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(!list_empty(&rq->sched.link));
+	list_add_tail(&rq->sched.link,
+		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
+static bool submit_queue(struct intel_engine_cs *engine,
+			 const struct i915_request *rq)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	if (rq_prio(rq) <= execlists->queue_priority_hint)
+		return false;
+
+	execlists->queue_priority_hint = rq_prio(rq);
+	return true;
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+	struct i915_dependency *p;
+	bool result = false;
+
+	/*
+	 * If one of our ancestors is on hold, we must also be put on hold,
+	 * otherwise we will bypass it and execute before it.
+	 */
+	rcu_read_lock();
+	for_each_signaler(p, rq) {
+		const struct i915_request *s =
+			container_of(p->signaler, typeof(*s), sched);
+
+		if (s->engine != rq->engine)
+			continue;
+
+		result = i915_request_on_hold(s);
+		if (result)
+			break;
+	}
+	rcu_read_unlock();
+
+	return result;
+}
+
+static bool ancestor_on_hold(const struct intel_engine_cs *engine,
+			     const struct i915_request *rq)
+{
+	GEM_BUG_ON(i915_request_on_hold(rq));
+	return unlikely(!list_empty(&engine->active.hold)) && hold_request(rq);
+}
+
+void i915_request_enqueue(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	unsigned long flags;
+	bool kick = false;
+
+	/* Will be called from irq-context when using foreign fences. */
+	spin_lock_irqsave(&engine->active.lock, flags);
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
+
+	if (unlikely(ancestor_on_hold(engine, rq))) {
+		RQ_TRACE(rq, "ancestor on hold\n");
+		list_add_tail(&rq->sched.link, &engine->active.hold);
+		i915_request_set_hold(rq);
+	} else {
+		queue_request(engine, rq);
+
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+
+		kick = submit_queue(engine, rq);
+	}
+
+	GEM_BUG_ON(list_empty(&rq->sched.link));
+	spin_unlock_irqrestore(&engine->active.lock, flags);
+	if (kick)
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 void i915_sched_node_init(struct i915_sched_node *node)
 {
 	spin_lock_init(&node->lock);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 2870fa3e089e..89d998f226e0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -40,6 +40,8 @@ void i915_sched_init_ipi(struct i915_sched_ipi *ipi);
 
 void i915_request_set_priority(struct i915_request *request, int prio);
 
+void i915_request_enqueue(struct i915_request *request);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 10/12] drm/i915: Extract request rewinding from execlists
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (7 preceding siblings ...)
  2021-02-03 11:18 ` [Intel-gfx] [CI 09/12] drm/i915: Extract request submission from execlists Chris Wilson
@ 2021-02-03 11:18 ` Chris Wilson
  2021-02-03 11:18 ` [Intel-gfx] [CI 11/12] drm/i915: Extract request suspension from the execlists Chris Wilson
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:18 UTC (permalink / raw)
  To: intel-gfx

In the process of preparing to reuse the request submission logic for
other backends, lift it out of the execlists backend.

While this operates on the common structs, we do have a bit of backend
knowledge, which is harmless for !lrc but still unsightly.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  3 -
 .../drm/i915/gt/intel_execlists_submission.c  | 58 ++-----------------
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |  3 +
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 44 ++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  3 +
 7 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 8d9184920c51..cc2df80eb449 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -137,9 +137,6 @@ execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
 	local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
 }
 
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
-
 static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 7b426e461ce3..9fe9df794126 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -360,56 +360,6 @@ assert_priority_queue(const struct i915_request *prev,
 	return rq_prio(prev) >= rq_prio(next);
 }
 
-static struct i915_request *
-__unwind_incomplete_requests(struct intel_engine_cs *engine)
-{
-	struct i915_request *rq, *rn, *active = NULL;
-	struct list_head *pl;
-	int prio = I915_PRIORITY_INVALID;
-
-	lockdep_assert_held(&engine->active.lock);
-
-	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->active.requests,
-					 sched.link) {
-		if (__i915_request_is_complete(rq)) {
-			list_del_init(&rq->sched.link);
-			continue;
-		}
-
-		__i915_request_unsubmit(rq);
-
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-
-		list_move(&rq->sched.link, pl);
-		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-
-		/* Check in case we rollback so far we wrap [size/2] */
-		if (intel_ring_direction(rq->ring,
-					 rq->tail,
-					 rq->ring->tail + 8) > 0)
-			rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
-
-		active = rq;
-	}
-
-	return active;
-}
-
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
-{
-	struct intel_engine_cs *engine =
-		container_of(execlists, typeof(*engine), execlists);
-
-	return __unwind_incomplete_requests(engine);
-}
-
 static void
 execlists_context_status_change(struct i915_request *rq, unsigned long status)
 {
@@ -1081,7 +1031,7 @@ static void defer_active(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
 
-	rq = __unwind_incomplete_requests(engine);
+	rq = __i915_sched_rewind_requests(engine);
 	if (!rq)
 		return;
 
@@ -1293,7 +1243,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * the preemption, some of the unwound requests may
 			 * complete!
 			 */
-			__unwind_incomplete_requests(engine);
+			__i915_sched_rewind_requests(engine);
 
 			last = NULL;
 		} else if (timeslice_expired(engine, last)) {
@@ -2288,7 +2238,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
 	 * which we return it to the queue for signaling.
 	 *
 	 * By removing them from the execlists queue, we also remove the
-	 * requests from being processed by __unwind_incomplete_requests()
+	 * requests from being processed by __intel_engine_rewind_requests()
 	 * during the intel_engine_reset(), and so they will *not* be replayed
 	 * afterwards.
 	 *
@@ -2898,7 +2848,7 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	/* Push back any incomplete requests for replay after the reset. */
 	rcu_read_lock();
 	spin_lock_irqsave(&engine->active.lock, flags);
-	__unwind_incomplete_requests(engine);
+	__i915_sched_rewind_requests(engine);
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 	rcu_read_unlock();
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 41e5350a7a05..364656bedec7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -92,4 +92,7 @@
 /* in Gen12 ID 0x7FF is reserved to indicate idle */
 #define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
 
+#define CTX_DESC_RELOAD_PD BIT_ULL(1)
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
 #endif /* _INTEL_LRC_REG_H_ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 951e2bf867e1..9ec720b23a5f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -4582,7 +4582,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
 
 	/* Fake a preemption event; failed of course */
 	spin_lock_irq(&engine->active.lock);
-	__unwind_incomplete_requests(engine);
+	__i915_sched_rewind_requests(engine);
 	spin_unlock_irq(&engine->active.lock);
 	GEM_BUG_ON(rq->engine != engine);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index aa34a2f3d6d5..186c8601689e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -315,14 +315,13 @@ static void guc_reset_state(struct intel_context *ce,
 
 static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_request *rq;
 	unsigned long flags;
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	rq = execlists_unwind_incomplete_requests(execlists);
+	rq = __i915_sched_rewind_requests(engine);
 	if (!rq)
 		goto out_unlock;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index a56252d3546a..6c0654bc49a8 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -6,6 +6,9 @@
 
 #include <linux/mutex.h>
 
+#include "gt/intel_ring.h"
+#include "gt/intel_lrc_reg.h"
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_request.h"
@@ -542,6 +545,47 @@ void i915_request_enqueue(struct i915_request *rq)
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
+struct i915_request *
+__i915_sched_rewind_requests(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq, *rn, *active = NULL;
+	struct list_head *pl;
+	int prio = I915_PRIORITY_INVALID;
+
+	lockdep_assert_held(&engine->active.lock);
+
+	list_for_each_entry_safe_reverse(rq, rn,
+					 &engine->active.requests,
+					 sched.link) {
+		if (__i915_request_is_complete(rq)) {
+			list_del_init(&rq->sched.link);
+			continue;
+		}
+
+		__i915_request_unsubmit(rq);
+
+		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+		if (rq_prio(rq) != prio) {
+			prio = rq_prio(rq);
+			pl = i915_sched_lookup_priolist(engine, prio);
+		}
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+
+		list_move(&rq->sched.link, pl);
+		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
+		/* Check in case we rollback so far we wrap [size/2] */
+		if (intel_ring_direction(rq->ring,
+					 rq->tail,
+					 rq->ring->tail + 8) > 0)
+			rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
+
+		active = rq;
+	}
+
+	return active;
+}
+
 void i915_sched_node_init(struct i915_sched_node *node)
 {
 	spin_lock_init(&node->lock);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 89d998f226e0..d3984f65b3a6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -42,6 +42,9 @@ void i915_request_set_priority(struct i915_request *request, int prio);
 
 void i915_request_enqueue(struct i915_request *request);
 
+struct i915_request *
+__i915_sched_rewind_requests(struct intel_engine_cs *engine);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 11/12] drm/i915: Extract request suspension from the execlists
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (8 preceding siblings ...)
  2021-02-03 11:18 ` [Intel-gfx] [CI 10/12] drm/i915: Extract request rewinding " Chris Wilson
@ 2021-02-03 11:18 ` Chris Wilson
  2021-02-03 11:18 ` [Intel-gfx] [CI 12/12] drm/i915: Extract the ability to defer and rerun a request later Chris Wilson
  2021-02-03 17:16 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/12] drm/i915/gt: Move engine setup out of set_default_submission Patchwork
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:18 UTC (permalink / raw)
  To: intel-gfx

Make the ability to suspend and resume a request and its dependents
generic.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 167 +-----------------
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |   8 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 153 ++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  10 ++
 4 files changed, 169 insertions(+), 169 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 9fe9df794126..310c248efa0b 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1922,169 +1922,6 @@ static void post_process_csb(struct i915_request **port,
 		execlists_schedule_out(*port++);
 }
 
-static void __execlists_hold(struct i915_request *rq)
-{
-	LIST_HEAD(list);
-
-	do {
-		struct i915_dependency *p;
-
-		if (i915_request_is_active(rq))
-			__i915_request_unsubmit(rq);
-
-		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
-		i915_request_set_hold(rq);
-		RQ_TRACE(rq, "on hold\n");
-
-		for_each_waiter(p, rq) {
-			struct i915_request *w =
-				container_of(p->waiter, typeof(*w), sched);
-
-			if (p->flags & I915_DEPENDENCY_WEAK)
-				continue;
-
-			/* Leave semaphores spinning on the other engines */
-			if (w->engine != rq->engine)
-				continue;
-
-			if (!i915_request_is_ready(w))
-				continue;
-
-			if (__i915_request_is_complete(w))
-				continue;
-
-			if (i915_request_on_hold(w))
-				continue;
-
-			list_move_tail(&w->sched.link, &list);
-		}
-
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
-}
-
-static bool execlists_hold(struct intel_engine_cs *engine,
-			   struct i915_request *rq)
-{
-	if (i915_request_on_hold(rq))
-		return false;
-
-	spin_lock_irq(&engine->active.lock);
-
-	if (__i915_request_is_complete(rq)) { /* too late! */
-		rq = NULL;
-		goto unlock;
-	}
-
-	/*
-	 * Transfer this request onto the hold queue to prevent it
-	 * being resumbitted to HW (and potentially completed) before we have
-	 * released it. Since we may have already submitted following
-	 * requests, we need to remove those as well.
-	 */
-	GEM_BUG_ON(i915_request_on_hold(rq));
-	GEM_BUG_ON(rq->engine != engine);
-	__execlists_hold(rq);
-	GEM_BUG_ON(list_empty(&engine->active.hold));
-
-unlock:
-	spin_unlock_irq(&engine->active.lock);
-	return rq;
-}
-
-static bool hold_request(const struct i915_request *rq)
-{
-	struct i915_dependency *p;
-	bool result = false;
-
-	/*
-	 * If one of our ancestors is on hold, we must also be on hold,
-	 * otherwise we will bypass it and execute before it.
-	 */
-	rcu_read_lock();
-	for_each_signaler(p, rq) {
-		const struct i915_request *s =
-			container_of(p->signaler, typeof(*s), sched);
-
-		if (s->engine != rq->engine)
-			continue;
-
-		result = i915_request_on_hold(s);
-		if (result)
-			break;
-	}
-	rcu_read_unlock();
-
-	return result;
-}
-
-static void __execlists_unhold(struct i915_request *rq)
-{
-	LIST_HEAD(list);
-
-	do {
-		struct i915_dependency *p;
-
-		RQ_TRACE(rq, "hold release\n");
-
-		GEM_BUG_ON(!i915_request_on_hold(rq));
-		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
-
-		i915_request_clear_hold(rq);
-		list_move_tail(&rq->sched.link,
-			       i915_sched_lookup_priolist(rq->engine,
-							  rq_prio(rq)));
-		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-
-		/* Also release any children on this engine that are ready */
-		for_each_waiter(p, rq) {
-			struct i915_request *w =
-				container_of(p->waiter, typeof(*w), sched);
-
-			if (p->flags & I915_DEPENDENCY_WEAK)
-				continue;
-
-			/* Propagate any change in error status */
-			if (rq->fence.error)
-				i915_request_set_error_once(w, rq->fence.error);
-
-			if (w->engine != rq->engine)
-				continue;
-
-			if (!i915_request_on_hold(w))
-				continue;
-
-			/* Check that no other parents are also on hold */
-			if (hold_request(w))
-				continue;
-
-			list_move_tail(&w->sched.link, &list);
-		}
-
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
-}
-
-static void execlists_unhold(struct intel_engine_cs *engine,
-			     struct i915_request *rq)
-{
-	spin_lock_irq(&engine->active.lock);
-
-	/*
-	 * Move this request back to the priority queue, and all of its
-	 * children and grandchildren that were suspended along with it.
-	 */
-	__execlists_unhold(rq);
-
-	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = rq_prio(rq);
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-	}
-
-	spin_unlock_irq(&engine->active.lock);
-}
-
 struct execlists_capture {
 	struct work_struct work;
 	struct i915_request *rq;
@@ -2117,7 +1954,7 @@ static void execlists_capture_work(struct work_struct *work)
 	i915_gpu_coredump_put(cap->error);
 
 	/* Return this request and all that depend upon it for signaling */
-	execlists_unhold(engine, cap->rq);
+	i915_sched_resume_request(engine, cap->rq);
 	i915_request_put(cap->rq);
 
 	kfree(cap);
@@ -2251,7 +2088,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
 	 * simply hold that request accountable for being non-preemptible
 	 * long enough to force the reset.
 	 */
-	if (!execlists_hold(engine, cap->rq))
+	if (!i915_sched_suspend_request(engine, cap->rq))
 		goto err_rq;
 
 	INIT_WORK(&cap->work, execlists_capture_work);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 9ec720b23a5f..7bab147e4421 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -608,7 +608,7 @@ static int live_hold_reset(void *arg)
 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 
 		i915_request_get(rq);
-		execlists_hold(engine, rq);
+		i915_sched_suspend_request(engine, rq);
 		GEM_BUG_ON(!i915_request_on_hold(rq));
 
 		__intel_engine_reset_bh(engine, NULL);
@@ -630,7 +630,7 @@ static int live_hold_reset(void *arg)
 		GEM_BUG_ON(!i915_request_on_hold(rq));
 
 		/* But is resubmitted on release */
-		execlists_unhold(engine, rq);
+		i915_sched_resume_request(engine, rq);
 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("%s: held request did not complete!\n",
 			       engine->name);
@@ -4587,7 +4587,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
 	GEM_BUG_ON(rq->engine != engine);
 
 	/* Reset the engine while keeping our active request on hold */
-	execlists_hold(engine, rq);
+	i915_sched_suspend_request(engine, rq);
 	GEM_BUG_ON(!i915_request_on_hold(rq));
 
 	__intel_engine_reset_bh(engine, NULL);
@@ -4610,7 +4610,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
 	GEM_BUG_ON(!i915_request_on_hold(rq));
 
 	/* But is resubmitted on release */
-	execlists_unhold(engine, rq);
+	i915_sched_resume_request(engine, rq);
 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 		pr_err("%s: held request did not complete!\n",
 		       engine->name);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 6c0654bc49a8..a5df27061c3c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -586,6 +586,159 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
 	return active;
 }
 
+bool __i915_sched_suspend_request(struct intel_engine_cs *engine,
+				  struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	lockdep_assert_held(&engine->active.lock);
+	GEM_BUG_ON(rq->engine != engine);
+
+	if (__i915_request_is_complete(rq)) /* too late! */
+		return false;
+
+	if (i915_request_on_hold(rq))
+		return false;
+
+	ENGINE_TRACE(engine, "suspending request %llx:%lld\n",
+		     rq->fence.context, rq->fence.seqno);
+
+	/*
+	 * Transfer this request onto the hold queue to prevent it
+	 * being resumbitted to HW (and potentially completed) before we have
+	 * released it. Since we may have already submitted following
+	 * requests, we need to remove those as well.
+	 */
+	do {
+		struct i915_dependency *p;
+
+		if (i915_request_is_active(rq))
+			__i915_request_unsubmit(rq);
+
+		list_move_tail(&rq->sched.link, &engine->active.hold);
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		i915_request_set_hold(rq);
+		RQ_TRACE(rq, "on hold\n");
+
+		for_each_waiter(p, rq) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (p->flags & I915_DEPENDENCY_WEAK)
+				continue;
+
+			/* Leave semaphores spinning on the other engines */
+			if (w->engine != engine)
+				continue;
+
+			if (!i915_request_is_ready(w))
+				continue;
+
+			if (__i915_request_is_complete(w))
+				continue;
+
+			if (i915_request_on_hold(w)) /* acts as a visited bit */
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+
+	GEM_BUG_ON(list_empty(&engine->active.hold));
+
+	return true;
+}
+
+bool i915_sched_suspend_request(struct intel_engine_cs *engine,
+				struct i915_request *rq)
+{
+	bool result;
+
+	if (i915_request_on_hold(rq))
+		return false;
+
+	spin_lock_irq(&engine->active.lock);
+	result = __i915_sched_suspend_request(engine, rq);
+	spin_unlock_irq(&engine->active.lock);
+
+	return result;
+}
+
+void __i915_sched_resume_request(struct intel_engine_cs *engine,
+				 struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	lockdep_assert_held(&engine->active.lock);
+
+	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+		engine->execlists.queue_priority_hint = rq_prio(rq);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+
+	if (!i915_request_on_hold(rq))
+		return;
+
+	ENGINE_TRACE(engine, "resuming request %llx:%lld\n",
+		     rq->fence.context, rq->fence.seqno);
+
+	/*
+	 * Move this request back to the priority queue, and all of its
+	 * children and grandchildren that were suspended along with it.
+	 */
+	do {
+		struct i915_dependency *p;
+
+		RQ_TRACE(rq, "hold release\n");
+
+		GEM_BUG_ON(!i915_request_on_hold(rq));
+		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+		i915_request_clear_hold(rq);
+		list_del_init(&rq->sched.link);
+
+		queue_request(engine, rq);
+
+		/* Also release any children on this engine that are ready */
+		for_each_waiter(p, rq) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (p->flags & I915_DEPENDENCY_WEAK)
+				continue;
+
+			/* Propagate any change in error status */
+			if (rq->fence.error)
+				i915_request_set_error_once(w, rq->fence.error);
+
+			if (w->engine != engine)
+				continue;
+
+			/* We also treat the on-hold status as a visited bit */
+			if (!i915_request_on_hold(w))
+				continue;
+
+			/* Check that no other parents are also on hold [BFS] */
+			if (hold_request(w))
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+}
+
+void i915_sched_resume_request(struct intel_engine_cs *engine,
+			       struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+	__i915_sched_resume_request(engine, rq);
+	spin_unlock_irq(&engine->active.lock);
+}
+
 void i915_sched_node_init(struct i915_sched_node *node)
 {
 	spin_lock_init(&node->lock);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index d3984f65b3a6..9860459fedb1 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -45,6 +45,16 @@ void i915_request_enqueue(struct i915_request *request);
 struct i915_request *
 __i915_sched_rewind_requests(struct intel_engine_cs *engine);
 
+bool __i915_sched_suspend_request(struct intel_engine_cs *engine,
+				  struct i915_request *rq);
+void __i915_sched_resume_request(struct intel_engine_cs *engine,
+				 struct i915_request *request);
+
+bool i915_sched_suspend_request(struct intel_engine_cs *engine,
+				struct i915_request *request);
+void i915_sched_resume_request(struct intel_engine_cs *engine,
+			       struct i915_request *rq);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] [CI 12/12] drm/i915: Extract the ability to defer and rerun a request later
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (9 preceding siblings ...)
  2021-02-03 11:18 ` [Intel-gfx] [CI 11/12] drm/i915: Extract request suspension from the execlists Chris Wilson
@ 2021-02-03 11:18 ` Chris Wilson
  2021-02-03 17:16 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/12] drm/i915/gt: Move engine setup out of set_default_submission Patchwork
  11 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2021-02-03 11:18 UTC (permalink / raw)
  To: intel-gfx

Lift the ability to defer a request until later from execlists into the
common layer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 57 +++--------------
 drivers/gpu/drm/i915/i915_scheduler.c         | 63 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h         |  5 +-
 3 files changed, 67 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 310c248efa0b..9d24d91f5ae7 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -979,54 +979,6 @@ static void virtual_xfer_context(struct virtual_engine *ve,
 	}
 }
 
-static void defer_request(struct i915_request *rq, struct list_head * const pl)
-{
-	LIST_HEAD(list);
-
-	/*
-	 * We want to move the interrupted request to the back of
-	 * the round-robin list (i.e. its priority level), but
-	 * in doing so, we must then move all requests that were in
-	 * flight and were waiting for the interrupted request to
-	 * be run after it again.
-	 */
-	do {
-		struct i915_dependency *p;
-
-		GEM_BUG_ON(i915_request_is_active(rq));
-		list_move_tail(&rq->sched.link, pl);
-
-		for_each_waiter(p, rq) {
-			struct i915_request *w =
-				container_of(p->waiter, typeof(*w), sched);
-
-			if (p->flags & I915_DEPENDENCY_WEAK)
-				continue;
-
-			/* Leave semaphores spinning on the other engines */
-			if (w->engine != rq->engine)
-				continue;
-
-			/* No waiter should start before its signaler */
-			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
-				   __i915_request_has_started(w) &&
-				   !__i915_request_is_complete(rq));
-
-			if (!i915_request_is_ready(w))
-				continue;
-
-			if (rq_prio(w) < rq_prio(rq))
-				continue;
-
-			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
-			GEM_BUG_ON(i915_request_is_active(w));
-			list_move_tail(&w->sched.link, &list);
-		}
-
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
-}
-
 static void defer_active(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
@@ -1035,7 +987,14 @@ static void defer_active(struct intel_engine_cs *engine)
 	if (!rq)
 		return;
 
-	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	/*
+	 * We want to move the interrupted request to the back of
+	 * the round-robin list (i.e. its priority level), but
+	 * in doing so, we must then move all requests that were in
+	 * flight and were waiting for the interrupted request to
+	 * be run after it again.
+	 */
+	__i915_sched_defer_request(engine, rq);
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index a5df27061c3c..641141f3ce10 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -179,8 +179,8 @@ static void assert_priolists(struct intel_engine_execlists * const execlists)
 	}
 }
 
-struct list_head *
-i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
+static struct list_head *
+lookup_priolist(struct intel_engine_cs *engine, int prio)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_priolist *p;
@@ -332,7 +332,7 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 	struct list_head *pos = &rq->sched.signalers_list;
 	struct list_head *plist;
 
-	plist = i915_sched_lookup_priolist(engine, prio);
+	plist = lookup_priolist(engine, prio);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
@@ -463,12 +463,63 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+void __i915_sched_defer_request(struct intel_engine_cs *engine,
+				struct i915_request *rq)
+{
+	struct list_head *pl;
+	LIST_HEAD(list);
+
+	lockdep_assert_held(&engine->active.lock);
+	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
+
+	/*
+	 * When we defer a request, we must maintain its order with respect
+	 * to those that are waiting upon it. So we traverse its chain of
+	 * waiters and move any that are earlier than the request to after it.
+	 */
+	pl = lookup_priolist(engine, rq_prio(rq));
+	do {
+		struct i915_dependency *p;
+
+		GEM_BUG_ON(i915_request_is_active(rq));
+		list_move_tail(&rq->sched.link, pl);
+
+		for_each_waiter(p, rq) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (p->flags & I915_DEPENDENCY_WEAK)
+				continue;
+
+			/* Leave semaphores spinning on the other engines */
+			if (w->engine != engine)
+				continue;
+
+			/* No waiter should start before its signaler */
+			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
+				   __i915_request_has_started(w) &&
+				   !__i915_request_is_complete(rq));
+
+			if (!i915_request_is_ready(w))
+				continue;
+
+			if (rq_prio(w) < rq_prio(rq))
+				continue;
+
+			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+			GEM_BUG_ON(i915_request_is_active(w));
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+}
+
 static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_request *rq)
 {
 	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link,
-		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	list_add_tail(&rq->sched.link, lookup_priolist(engine, rq_prio(rq)));
 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 }
 
@@ -567,7 +618,7 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != prio) {
 			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
+			pl = lookup_priolist(engine, prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 9860459fedb1..00ce0a9d519d 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -44,6 +44,8 @@ void i915_request_enqueue(struct i915_request *request);
 
 struct i915_request *
 __i915_sched_rewind_requests(struct intel_engine_cs *engine);
+void __i915_sched_defer_request(struct intel_engine_cs *engine,
+				struct i915_request *request);
 
 bool __i915_sched_suspend_request(struct intel_engine_cs *engine,
 				  struct i915_request *rq);
@@ -55,9 +57,6 @@ bool i915_sched_suspend_request(struct intel_engine_cs *engine,
 void i915_sched_resume_request(struct intel_engine_cs *engine,
 			       struct i915_request *rq);
 
-struct list_head *
-i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
-
 void __i915_priolist_free(struct i915_priolist *p);
 static inline void i915_priolist_free(struct i915_priolist *p)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/12] drm/i915/gt: Move engine setup out of set_default_submission
  2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (10 preceding siblings ...)
  2021-02-03 11:18 ` [Intel-gfx] [CI 12/12] drm/i915: Extract the ability to defer and rerun a request later Chris Wilson
@ 2021-02-03 17:16 ` Patchwork
  11 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2021-02-03 17:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,01/12] drm/i915/gt: Move engine setup out of set_default_submission
URL   : https://patchwork.freedesktop.org/series/86639/
State : failure

== Summary ==

Applying: drm/i915/gt: Move engine setup out of set_default_submission
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/gt/intel_execlists_submission.c
M	drivers/gpu/drm/i915/gt/intel_ring_submission.c
M	drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
Auto-merging drivers/gpu/drm/i915/gt/intel_ring_submission.c
Auto-merging drivers/gpu/drm/i915/gt/intel_execlists_submission.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/gt/intel_execlists_submission.c
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 drm/i915/gt: Move engine setup out of set_default_submission
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-02-03 17:16 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-03 11:17 [Intel-gfx] [CI 01/12] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
2021-02-03 11:17 ` [Intel-gfx] [CI 02/12] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
2021-02-03 11:17 ` [Intel-gfx] [CI 03/12] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
2021-02-03 11:17 ` [Intel-gfx] [CI 04/12] drm/i915: Replace engine->schedule() with a known request operation Chris Wilson
2021-02-03 11:17 ` [Intel-gfx] [CI 05/12] drm/i915: Restructure priority inheritance Chris Wilson
2021-02-03 11:18 ` [Intel-gfx] [CI 06/12] drm/i915/selftests: Measure set-priority duration Chris Wilson
2021-02-03 11:18 ` [Intel-gfx] [CI 07/12] drm/i915/selftests: Exercise priority inheritance around an engine loop Chris Wilson
2021-02-03 11:18 ` [Intel-gfx] [CI 08/12] drm/i915: Improve DFS for priority inheritance Chris Wilson
2021-02-03 11:18 ` [Intel-gfx] [CI 09/12] drm/i915: Extract request submission from execlists Chris Wilson
2021-02-03 11:18 ` [Intel-gfx] [CI 10/12] drm/i915: Extract request rewinding " Chris Wilson
2021-02-03 11:18 ` [Intel-gfx] [CI 11/12] drm/i915: Extract request suspension from the execlists Chris Wilson
2021-02-03 11:18 ` [Intel-gfx] [CI 12/12] drm/i915: Extract the ability to defer and rerun a request later Chris Wilson
2021-02-03 17:16 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/12] drm/i915/gt: Move engine setup out of set_default_submission Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.