All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission
@ 2021-02-02 15:14 Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 02/14] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
                   ` (13 more replies)
  0 siblings, 14 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

Now that we no longer switch back and forth between guc and execlists,
we no longer need to restore the backend's vfunc and can leave them set
after initialisation. The only catch is that we lose the submission on
wedging and still need to reset the submit_request vfunc on unwedging.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 46 ++++++++---------
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  4 --
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 50 ++++++++-----------
 3 files changed, 44 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 45a8ac152b88..5d824e1cfcba 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3089,29 +3089,6 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = execlists_submit_request;
 	engine->schedule = i915_schedule;
 	engine->execlists.tasklet.callback = execlists_submission_tasklet;
-
-	engine->reset.prepare = execlists_reset_prepare;
-	engine->reset.rewind = execlists_reset_rewind;
-	engine->reset.cancel = execlists_reset_cancel;
-	engine->reset.finish = execlists_reset_finish;
-
-	engine->park = execlists_park;
-	engine->unpark = NULL;
-
-	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (!intel_vgpu_active(engine->i915)) {
-		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-		if (can_preempt(engine)) {
-			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
-				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
-		}
-	}
-
-	if (intel_engine_has_preemption(engine))
-		engine->emit_bb_start = gen8_emit_bb_start;
-	else
-		engine->emit_bb_start = gen8_emit_bb_start_noarb;
 }
 
 static void execlists_shutdown(struct intel_engine_cs *engine)
@@ -3142,6 +3119,14 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->cops = &execlists_context_ops;
 	engine->request_alloc = execlists_request_alloc;
 
+	engine->reset.prepare = execlists_reset_prepare;
+	engine->reset.rewind = execlists_reset_rewind;
+	engine->reset.cancel = execlists_reset_cancel;
+	engine->reset.finish = execlists_reset_finish;
+
+	engine->park = execlists_park;
+	engine->unpark = NULL;
+
 	engine->emit_flush = gen8_emit_flush_xcs;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
@@ -3162,6 +3147,21 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		 * until a more refined solution exists.
 		 */
 	}
+
+	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+	if (!intel_vgpu_active(engine->i915)) {
+		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+		if (can_preempt(engine)) {
+			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+		}
+	}
+
+	if (intel_engine_has_preemption(engine))
+		engine->emit_bb_start = gen8_emit_bb_start;
+	else
+		engine->emit_bb_start = gen8_emit_bb_start_noarb;
 }
 
 static void logical_ring_default_irqs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 9c2c605d7a92..3cb2ce503544 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -969,14 +969,10 @@ static void gen6_bsd_submit_request(struct i915_request *request)
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = i9xx_submit_request;
-
-	engine->park = NULL;
-	engine->unpark = NULL;
 }
 
 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 {
-	i9xx_set_default_submission(engine);
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 92688a9b6717..f72faa0b8339 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -608,35 +608,6 @@ static int guc_resume(struct intel_engine_cs *engine)
 static void guc_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = guc_submit_request;
-	engine->schedule = i915_schedule;
-	engine->execlists.tasklet.callback = guc_submission_tasklet;
-
-	engine->reset.prepare = guc_reset_prepare;
-	engine->reset.rewind = guc_reset_rewind;
-	engine->reset.cancel = guc_reset_cancel;
-	engine->reset.finish = guc_reset_finish;
-
-	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-
-	/*
-	 * TODO: GuC supports timeslicing and semaphores as well, but they're
-	 * handled by the firmware so some minor tweaks are required before
-	 * enabling.
-	 *
-	 * engine->flags |= I915_ENGINE_HAS_TIMESLICES;
-	 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-	 */
-
-	engine->emit_bb_start = gen8_emit_bb_start;
-
-	/*
-	 * For the breadcrumb irq to work we need the interrupts to stay
-	 * enabled. However, on all platforms on which we'll have support for
-	 * GuC submission we don't allow disabling the interrupts at runtime, so
-	 * we're always safe with the current flow.
-	 */
-	GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
 }
 
 static void guc_release(struct intel_engine_cs *engine)
@@ -658,6 +629,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	engine->cops = &guc_context_ops;
 	engine->request_alloc = guc_request_alloc;
 
+	engine->schedule = i915_schedule;
+
+	engine->reset.prepare = guc_reset_prepare;
+	engine->reset.rewind = guc_reset_rewind;
+	engine->reset.cancel = guc_reset_cancel;
+	engine->reset.finish = guc_reset_finish;
+
 	engine->emit_flush = gen8_emit_flush_xcs;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
@@ -666,6 +644,20 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 		engine->emit_flush = gen12_emit_flush_xcs;
 	}
 	engine->set_default_submission = guc_set_default_submission;
+
+	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
+	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+
+	/*
+	 * TODO: GuC supports timeslicing and semaphores as well, but they're
+	 * handled by the firmware so some minor tweaks are required before
+	 * enabling.
+	 *
+	 * engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+	 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+	 */
+
+	engine->emit_bb_start = gen8_emit_bb_start;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 02/14] drm/i915/gt: Move submission_method into intel_gt
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

Since we setup the submission method for the engines once, it is easy to
assign an enum and use that instead of probing into the backends.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h               |  8 +++++++-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c            | 12 ++++++++----
 drivers/gpu/drm/i915/gt/intel_execlists_submission.c |  8 --------
 drivers/gpu/drm/i915/gt/intel_execlists_submission.h |  3 ---
 drivers/gpu/drm/i915/gt/intel_gt_types.h             |  7 +++++++
 drivers/gpu/drm/i915/gt/intel_reset.c                |  7 +++----
 drivers/gpu/drm/i915/gt/selftest_execlists.c         |  2 +-
 drivers/gpu/drm/i915/gt/selftest_ring_submission.c   |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c    |  5 -----
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h    |  1 -
 drivers/gpu/drm/i915/i915_perf.c                     | 10 +++++-----
 11 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 47ee8578e511..8d9184920c51 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -13,8 +13,9 @@
 #include "i915_reg.h"
 #include "i915_request.h"
 #include "i915_selftest.h"
-#include "gt/intel_timeline.h"
 #include "intel_engine_types.h"
+#include "intel_gt_types.h"
+#include "intel_timeline.h"
 #include "intel_workarounds.h"
 
 struct drm_printer;
@@ -262,6 +263,11 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
 #define ENGINE_MOCK	1
 #define ENGINE_VIRTUAL	2
 
+static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine)
+{
+	return engine->gt->submission_method >= INTEL_SUBMISSION_GUC;
+}
+
 static inline bool
 intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 56fb9cece71b..dab8d734e272 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -891,12 +891,16 @@ int intel_engines_init(struct intel_gt *gt)
 	enum intel_engine_id id;
 	int err;
 
-	if (intel_uc_uses_guc_submission(&gt->uc))
+	if (intel_uc_uses_guc_submission(&gt->uc)) {
+		gt->submission_method = INTEL_SUBMISSION_GUC;
 		setup = intel_guc_submission_setup;
-	else if (HAS_EXECLISTS(gt->i915))
+	} else if (HAS_EXECLISTS(gt->i915)) {
+		gt->submission_method = INTEL_SUBMISSION_ELSP;
 		setup = intel_execlists_submission_setup;
-	else
+	} else {
+		gt->submission_method = INTEL_SUBMISSION_RING;
 		setup = intel_ring_submission_setup;
+	}
 
 	for_each_engine(engine, gt, id) {
 		err = engine_setup_common(engine);
@@ -1467,7 +1471,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
 	}
 
-	if (intel_engine_in_guc_submission_mode(engine)) {
+	if (intel_engine_uses_guc(engine)) {
 		/* nothing to print yet */
 	} else if (HAS_EXECLISTS(dev_priv)) {
 		struct i915_request * const *port, *rq;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 5d824e1cfcba..4ddd2099a931 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1757,7 +1757,6 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
 	 */
 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
 		   !reset_in_progress(execlists));
-	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
 
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -3897,13 +3896,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-bool
-intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
-{
-	return engine->set_default_submission ==
-	       execlists_set_default_submission;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_execlists.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
index a8fd7adefd82..f7bd3fccfee8 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
@@ -41,7 +41,4 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
 				     const struct intel_engine_cs *master,
 				     const struct intel_engine_cs *sibling);
 
-bool
-intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
-
 #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 91d20daca536..626af37c7790 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -29,6 +29,12 @@ struct i915_ggtt;
 struct intel_engine_cs;
 struct intel_uncore;
 
+enum intel_submission_method {
+	INTEL_SUBMISSION_RING,
+	INTEL_SUBMISSION_ELSP,
+	INTEL_SUBMISSION_GUC,
+};
+
 struct intel_gt {
 	struct drm_i915_private *i915;
 	struct intel_uncore *uncore;
@@ -108,6 +114,7 @@ struct intel_gt {
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
 	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
 					    [MAX_ENGINE_INSTANCE + 1];
+	enum intel_submission_method submission_method;
 
 	/*
 	 * Default address space (either GGTT or ppGTT depending on arch).
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index c8cf3981ad7f..3577d55401f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1113,7 +1113,6 @@ static int intel_gt_reset_engine(struct intel_engine_cs *engine)
 int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 {
 	struct intel_gt *gt = engine->gt;
-	bool uses_guc = intel_engine_in_guc_submission_mode(engine);
 	int ret;
 
 	ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
@@ -1129,10 +1128,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 			   "Resetting %s for %s\n", engine->name, msg);
 	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
 
-	if (!uses_guc)
-		ret = intel_gt_reset_engine(engine);
-	else
+	if (intel_engine_uses_guc(engine))
 		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
+	else
+		ret = intel_gt_reset_engine(engine);
 	if (ret) {
 		/* If we fail here, we expect to fallback to a global reset */
 		ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 5d7fac383add..9304a35384aa 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -4715,7 +4715,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_virtual_reset),
 	};
 
-	if (!HAS_EXECLISTS(i915))
+	if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
 		return 0;
 
 	if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 3350e7c995bc..6cd9f6bc240c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -291,7 +291,7 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_ctx_switch_wa),
 	};
 
-	if (HAS_EXECLISTS(i915))
+	if (i915->gt.submission_method > INTEL_SUBMISSION_RING)
 		return 0;
 
 	return intel_gt_live_subtests(tests, &i915->gt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f72faa0b8339..17b551a0c89f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -745,8 +745,3 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
 {
 	guc->submission_selected = __guc_submission_selected(guc);
 }
-
-bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
-{
-	return engine->set_default_submission == guc_set_default_submission;
-}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index 5f7b9e6347d0..3f7005018939 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -20,7 +20,6 @@ void intel_guc_submission_fini(struct intel_guc *guc);
 int intel_guc_preempt_work_create(struct intel_guc *guc);
 void intel_guc_preempt_work_destroy(struct intel_guc *guc);
 int intel_guc_submission_setup(struct intel_engine_cs *engine);
-bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
 
 static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
 {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 112ba5f2ce90..89665e14ab01 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1273,11 +1273,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	case 8:
 	case 9:
 	case 10:
-		if (intel_engine_in_execlists_submission_mode(ce->engine)) {
-			stream->specific_ctx_id_mask =
-				(1U << GEN8_CTX_ID_WIDTH) - 1;
-			stream->specific_ctx_id = stream->specific_ctx_id_mask;
-		} else {
+		if (intel_engine_uses_guc(ce->engine)) {
 			/*
 			 * When using GuC, the context descriptor we write in
 			 * i915 is read by GuC and rewritten before it's
@@ -1296,6 +1292,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 			 */
 			stream->specific_ctx_id_mask =
 				(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
+		} else {
+			stream->specific_ctx_id_mask =
+				(1U << GEN8_CTX_ID_WIDTH) - 1;
+			stream->specific_ctx_id = stream->specific_ctx_id_mask;
 		}
 		break;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 02/14] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:49   ` Tvrtko Ursulin
  2021-02-02 16:15   ` [Intel-gfx] [PATCH v2] " Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 04/14] drm/i915: Replace engine->schedule() with a known request operation Chris Wilson
                   ` (11 subsequent siblings)
  13 siblings, 2 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

The different submission backends each have their own preferred
behaviour and interrupt setup. Let each handle their own interrupts.

This becomes more useful later as we to extract the use of auxiliary
state in the interrupt handler that is backend specific.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  7 ++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 14 +---
 .../drm/i915/gt/intel_execlists_submission.c  | 40 +++++++++
 drivers/gpu/drm/i915/gt/intel_gt_irq.c        | 82 ++++++-------------
 drivers/gpu/drm/i915/gt/intel_gt_irq.h        |  7 ++
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++
 drivers/gpu/drm/i915/gt/intel_rps.c           |  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 ++-
 drivers/gpu/drm/i915/i915_irq.c               |  8 +-
 9 files changed, 103 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index dab8d734e272..2a453ba5f25a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 }
 
+static void nop_irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	GEM_DEBUG_WARN_ON(iir);
+}
+
 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
@@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->hw_id = info->hw_id;
 	engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
 
+	engine->irq_handler = nop_irq_handler;
+
 	engine->class = info->class;
 	engine->instance = info->instance;
 	__sprint_engine_name(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9d59de5c559a..7fd035d45263 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -402,6 +402,7 @@ struct intel_engine_cs {
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
 	void		(*irq_enable)(struct intel_engine_cs *engine);
 	void		(*irq_disable)(struct intel_engine_cs *engine);
+	void		(*irq_handler)(struct intel_engine_cs *engine, u32 iir);
 
 	void		(*sanitize)(struct intel_engine_cs *engine);
 	int		(*resume)(struct intel_engine_cs *engine);
@@ -481,10 +482,9 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
-#define I915_ENGINE_IS_VIRTUAL       BIT(6)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
 	unsigned int flags;
 
 	/*
@@ -588,12 +588,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_TIMESLICES;
 }
 
-static inline bool
-intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-}
-
 static inline bool
 intel_engine_is_virtual(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 4ddd2099a931..ed62e4b549d2 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2394,6 +2394,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
 	rcu_read_unlock();
 }
 
+static void execlists_irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	bool tasklet = false;
+
+	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
+		u32 eir;
+
+		/* Upper 16b are the enabling mask, rsvd for internal errors */
+		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
+		ENGINE_TRACE(engine, "CS error: %x\n", eir);
+
+		/* Disable the error interrupt until after the reset */
+		if (likely(eir)) {
+			ENGINE_WRITE(engine, RING_EMR, ~0u);
+			ENGINE_WRITE(engine, RING_EIR, eir);
+			WRITE_ONCE(engine->execlists.error_interrupt, eir);
+			tasklet = true;
+		}
+	}
+
+	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+		WRITE_ONCE(engine->execlists.yield,
+			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+			     engine->execlists.yield);
+		if (del_timer(&engine->execlists.timer))
+			tasklet = true;
+	}
+
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
+		tasklet = true;
+
+	if (iir & GT_RENDER_USER_INTERRUPT)
+		intel_engine_signal_breadcrumbs(engine);
+
+	if (tasklet)
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 static void __execlists_kick(struct intel_engine_execlists *execlists)
 {
 	/* Kick the tasklet for some interrupt coalescing and reset handling */
@@ -3146,6 +3185,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		 * until a more refined solution exists.
 		 */
 	}
+	engine->irq_handler = execlists_irq_handler;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 9fc6c912a4e5..f5aa31ae8f6c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
 		intel_guc_to_host_event_handler(guc);
 }
 
-static void
-cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
-{
-	bool tasklet = false;
-
-	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
-		u32 eir;
-
-		/* Upper 16b are the enabling mask, rsvd for internal errors */
-		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
-		ENGINE_TRACE(engine, "CS error: %x\n", eir);
-
-		/* Disable the error interrupt until after the reset */
-		if (likely(eir)) {
-			ENGINE_WRITE(engine, RING_EMR, ~0u);
-			ENGINE_WRITE(engine, RING_EIR, eir);
-			WRITE_ONCE(engine->execlists.error_interrupt, eir);
-			tasklet = true;
-		}
-	}
-
-	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
-		WRITE_ONCE(engine->execlists.yield,
-			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
-		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
-			     engine->execlists.yield);
-		if (del_timer(&engine->execlists.timer))
-			tasklet = true;
-	}
-
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
-		tasklet = true;
-
-	if (iir & GT_RENDER_USER_INTERRUPT) {
-		intel_engine_signal_breadcrumbs(engine);
-		tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
-	}
-
-	if (tasklet)
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-}
-
 static u32
 gen11_gt_engine_identity(struct intel_gt *gt,
 			 const unsigned int bank, const unsigned int bit)
@@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
 		engine = NULL;
 
 	if (likely(engine))
-		return cs_irq_handler(engine, iir);
+		return intel_engine_cs_irq(engine, iir);
 
 	WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
 		  class, instance);
@@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+				    gt_iir);
 }
 
 static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
@@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
 void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+				    gt_iir);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
-				       iir >> GEN8_RCS_IRQ_SHIFT);
-			cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
-				       iir >> GEN8_BCS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+					    iir >> GEN8_RCS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+					    iir >> GEN8_BCS_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
 		}
 	}
@@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
-				       iir >> GEN8_VCS0_IRQ_SHIFT);
-			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
-				       iir >> GEN8_VCS1_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+					    iir >> GEN8_VCS0_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
+					    iir >> GEN8_VCS1_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
 		}
 	}
@@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
-				       iir >> GEN8_VECS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+					    iir >> GEN8_VECS_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
index f667e976fb2b..601473fe9df9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#include "intel_engine_types.h"
+
 struct intel_gt;
 
 #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
@@ -39,4 +41,9 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
 void gen8_gt_irq_reset(struct intel_gt *gt);
 void gen8_gt_irq_postinstall(struct intel_gt *gt);
 
+static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u32 iir)
+{
+	engine->irq_handler(engine, iir);
+}
+
 #endif /* INTEL_GT_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 3cb2ce503544..9b5bfbe79347 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -997,10 +997,17 @@ static void ring_release(struct intel_engine_cs *engine)
 	intel_timeline_put(engine->legacy.timeline);
 }
 
+static void irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	intel_engine_signal_breadcrumbs(engine);
+}
+
 static void setup_irq(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
+	engine->irq_handler = irq_handler;
+
 	if (INTEL_GEN(i915) >= 6) {
 		engine->irq_enable = gen6_irq_enable;
 		engine->irq_disable = gen6_irq_disable;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 405d814e9040..4ba6a33f65cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 		return;
 
 	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
+		intel_engine_cs_irq(gt->engine[VECS0], pm_iir);
 
 	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 17b551a0c89f..96a38466299e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -264,6 +264,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	if (iir & GT_RENDER_USER_INTERRUPT) {
+		intel_engine_signal_breadcrumbs(engine);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+}
+
 static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -645,7 +653,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	engine->set_default_submission = guc_set_default_submission;
 
-	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	/*
@@ -681,6 +688,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
 static inline void guc_default_irqs(struct intel_engine_cs *engine)
 {
 	engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
+	engine->irq_handler = cs_irq_handler;
 }
 
 int intel_guc_submission_setup(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9d47da8ec86d..37a48402adc1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3954,7 +3954,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4062,7 +4062,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4207,10 +4207,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[VCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 04/14] drm/i915: Replace engine->schedule() with a known request operation
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 02/14] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 05/14] drm/i915: Restructure priority inheritance Chris Wilson
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

Looking to the future, we want to set the scheduling attributes
explicitly and so replace the generic engine->schedule() with the more
direct i915_request_set_priority()

What it loses in removing the 'schedule' name from the function, it
gains in having an explicit entry point with a stated goal.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  5 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  5 ++-
 drivers/gpu/drm/i915/gem/i915_gem_wait.c      | 29 +++++-----------
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  3 --
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  4 +--
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 27 ++++++++-------
 drivers/gpu/drm/i915/gt/intel_engine_user.c   |  2 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  3 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  | 33 +++++--------------
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 11 +++----
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_request.c           | 10 +++---
 drivers/gpu/drm/i915/i915_request.h           |  5 +++
 drivers/gpu/drm/i915/i915_scheduler.c         | 15 +++++----
 drivers/gpu/drm/i915/i915_scheduler.h         |  3 +-
 15 files changed, 64 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index d8f10589e09e..aca964f7ba72 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -13662,7 +13662,6 @@ int
 intel_prepare_plane_fb(struct drm_plane *_plane,
 		       struct drm_plane_state *_new_plane_state)
 {
-	struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY };
 	struct intel_plane *plane = to_intel_plane(_plane);
 	struct intel_plane_state *new_plane_state =
 		to_intel_plane_state(_new_plane_state);
@@ -13703,7 +13702,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 
 	if (new_plane_state->uapi.fence) { /* explicit fencing */
 		i915_gem_fence_wait_priority(new_plane_state->uapi.fence,
-					     &attr);
+					     I915_PRIORITY_DISPLAY);
 		ret = i915_sw_fence_await_dma_fence(&state->commit_ready,
 						    new_plane_state->uapi.fence,
 						    i915_fence_timeout(dev_priv),
@@ -13725,7 +13724,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 	if (ret)
 		return ret;
 
-	i915_gem_object_wait_priority(obj, 0, &attr);
+	i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
 	i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
 
 	if (!new_plane_state->uapi.fence) { /* implicit fencing */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3411ad197fa6..325766abca21 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -549,15 +549,14 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
 		obj->cache_dirty = true;
 }
 
-void i915_gem_fence_wait_priority(struct dma_fence *fence,
-				  const struct i915_sched_attr *attr);
+void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio);
 
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 			 unsigned int flags,
 			 long timeout);
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
-				  const struct i915_sched_attr *attr);
+				  int prio);
 
 void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
 					 enum fb_op_origin origin);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 4b9856d5ba14..d79bf16083bd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -91,22 +91,12 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
 	return timeout;
 }
 
-static void fence_set_priority(struct dma_fence *fence,
-			       const struct i915_sched_attr *attr)
+static void fence_set_priority(struct dma_fence *fence, int prio)
 {
-	struct i915_request *rq;
-	struct intel_engine_cs *engine;
-
 	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
 		return;
 
-	rq = to_request(fence);
-	engine = rq->engine;
-
-	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(rq, attr);
-	rcu_read_unlock();
+	i915_request_set_priority(to_request(fence), prio);
 }
 
 static inline bool __dma_fence_is_chain(const struct dma_fence *fence)
@@ -114,8 +104,7 @@ static inline bool __dma_fence_is_chain(const struct dma_fence *fence)
 	return fence->ops == &dma_fence_chain_ops;
 }
 
-void i915_gem_fence_wait_priority(struct dma_fence *fence,
-				  const struct i915_sched_attr *attr)
+void i915_gem_fence_wait_priority(struct dma_fence *fence, int prio)
 {
 	if (dma_fence_is_signaled(fence))
 		return;
@@ -128,19 +117,19 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence,
 		int i;
 
 		for (i = 0; i < array->num_fences; i++)
-			fence_set_priority(array->fences[i], attr);
+			fence_set_priority(array->fences[i], prio);
 	} else if (__dma_fence_is_chain(fence)) {
 		struct dma_fence *iter;
 
 		/* The chain is ordered; if we boost the last, we boost all */
 		dma_fence_chain_for_each(iter, fence) {
 			fence_set_priority(to_dma_fence_chain(iter)->fence,
-					   attr);
+					   prio);
 			break;
 		}
 		dma_fence_put(iter);
 	} else {
-		fence_set_priority(fence, attr);
+		fence_set_priority(fence, prio);
 	}
 
 	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
@@ -149,7 +138,7 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence,
 int
 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			      unsigned int flags,
-			      const struct i915_sched_attr *attr)
+			      int prio)
 {
 	struct dma_fence *excl;
 
@@ -164,7 +153,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			return ret;
 
 		for (i = 0; i < count; i++) {
-			i915_gem_fence_wait_priority(shared[i], attr);
+			i915_gem_fence_wait_priority(shared[i], prio);
 			dma_fence_put(shared[i]);
 		}
 
@@ -174,7 +163,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 	}
 
 	if (excl) {
-		i915_gem_fence_wait_priority(excl, attr);
+		i915_gem_fence_wait_priority(excl, prio);
 		dma_fence_put(excl);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 2a453ba5f25a..92a3c8a43e14 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -326,9 +326,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	if (engine->context_size)
 		DRIVER_CAPS(i915)->has_logical_contexts = true;
 
-	/* Nothing to do here, execute in order of dependencies */
-	engine->schedule = NULL;
-
 	ewma__engine_latency_init(&engine->latency);
 
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 778bcae5ef2c..0b026cde9f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -114,7 +114,7 @@ static void heartbeat(struct work_struct *wrk)
 			 * but all other contexts, including the kernel
 			 * context are stuck waiting for the signal.
 			 */
-		} else if (engine->schedule &&
+		} else if (intel_engine_has_scheduler(engine) &&
 			   rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 			/*
 			 * Gradually raise the priority of the heartbeat to
@@ -129,7 +129,7 @@ static void heartbeat(struct work_struct *wrk)
 				attr.priority = I915_PRIORITY_BARRIER;
 
 			local_bh_disable();
-			engine->schedule(rq, &attr);
+			i915_request_set_priority(rq, attr.priority);
 			local_bh_enable();
 		} else {
 			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7fd035d45263..cb81f0d93189 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -454,14 +454,6 @@ struct intel_engine_cs {
 	void            (*bond_execute)(struct i915_request *rq,
 					struct dma_fence *signal);
 
-	/*
-	 * Call when the priority on a request has changed and it and its
-	 * dependencies may need rescheduling. Note the request itself may
-	 * not be ready to run!
-	 */
-	void		(*schedule)(struct i915_request *request,
-				    const struct i915_sched_attr *attr);
-
 	void		(*release)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
@@ -479,12 +471,13 @@ struct intel_engine_cs {
 
 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
-#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
-#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
-#define I915_ENGINE_HAS_TIMESLICES   BIT(4)
-#define I915_ENGINE_IS_VIRTUAL       BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_HAS_SCHEDULER    BIT(2)
+#define I915_ENGINE_HAS_PREEMPTION   BIT(3)
+#define I915_ENGINE_HAS_SEMAPHORES   BIT(4)
+#define I915_ENGINE_HAS_TIMESLICES   BIT(5)
+#define I915_ENGINE_IS_VIRTUAL       BIT(6)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
 	unsigned int flags;
 
 	/*
@@ -567,6 +560,12 @@ intel_engine_supports_stats(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
 }
 
+static inline bool
+intel_engine_has_scheduler(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_SCHEDULER;
+}
+
 static inline bool
 intel_engine_has_preemption(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 1cbd84eb24e4..64eccdf32a22 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -107,7 +107,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 	for_each_uabi_engine(engine, i915) { /* all engines must agree! */
 		int i;
 
-		if (engine->schedule)
+		if (intel_engine_has_scheduler(engine))
 			enabled |= (I915_SCHEDULER_CAP_ENABLED |
 				    I915_SCHEDULER_CAP_PRIORITY);
 		else
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index ed62e4b549d2..6b8984c64b60 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3125,7 +3125,6 @@ static bool can_preempt(struct intel_engine_cs *engine)
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
-	engine->schedule = i915_schedule;
 	engine->execlists.tasklet.callback = execlists_submission_tasklet;
 }
 
@@ -3187,6 +3186,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	engine->irq_handler = execlists_irq_handler;
 
+	engine->flags |= I915_ENGINE_HAS_SCHEDULER;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915)) {
 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
@@ -3699,7 +3699,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 	ve->base.cops = &virtual_context_ops;
 	ve->base.request_alloc = execlists_request_alloc;
 
-	ve->base.schedule = i915_schedule;
 	ve->base.submit_request = virtual_submit_request;
 	ve->base.bond_execute = virtual_bond_execute;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 9304a35384aa..951e2bf867e1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -268,12 +268,8 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
 		i915_request_put(rq[0]);
 
 		if (prio) {
-			struct i915_sched_attr attr = {
-				.priority = prio,
-			};
-
 			/* Alternatively preempt the spinner with ce[1] */
-			engine->schedule(rq[1], &attr);
+			i915_request_set_priority(rq[1], prio);
 		}
 
 		/* And switch back to ce[0] for good measure */
@@ -873,9 +869,6 @@ release_queue(struct intel_engine_cs *engine,
 	      struct i915_vma *vma,
 	      int idx, int prio)
 {
-	struct i915_sched_attr attr = {
-		.priority = prio,
-	};
 	struct i915_request *rq;
 	u32 *cs;
 
@@ -900,7 +893,7 @@ release_queue(struct intel_engine_cs *engine,
 	i915_request_add(rq);
 
 	local_bh_disable();
-	engine->schedule(rq, &attr);
+	i915_request_set_priority(rq, prio);
 	local_bh_enable(); /* kick tasklet */
 
 	i915_request_put(rq);
@@ -1310,7 +1303,6 @@ static int live_timeslice_queue(void *arg)
 		goto err_pin;
 
 	for_each_engine(engine, gt, id) {
-		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
 		struct i915_request *rq, *nop;
 
 		if (!intel_engine_has_preemption(engine))
@@ -1325,7 +1317,7 @@ static int live_timeslice_queue(void *arg)
 			err = PTR_ERR(rq);
 			goto err_heartbeat;
 		}
-		engine->schedule(rq, &attr);
+		i915_request_set_priority(rq, I915_PRIORITY_MAX);
 		err = wait_for_submit(engine, rq, HZ / 2);
 		if (err) {
 			pr_err("%s: Timed out trying to submit semaphores\n",
@@ -1806,7 +1798,6 @@ static int live_late_preempt(void *arg)
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
-	struct i915_sched_attr attr = {};
 	enum intel_engine_id id;
 	int err = -ENOMEM;
 
@@ -1866,8 +1857,7 @@ static int live_late_preempt(void *arg)
 			goto err_wedged;
 		}
 
-		attr.priority = I915_PRIORITY_MAX;
-		engine->schedule(rq, &attr);
+		i915_request_set_priority(rq, I915_PRIORITY_MAX);
 
 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 			pr_err("High priority context failed to preempt the low priority context\n");
@@ -2412,7 +2402,6 @@ static int live_preempt_cancel(void *arg)
 
 static int live_suppress_self_preempt(void *arg)
 {
-	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct preempt_client a, b;
@@ -2480,7 +2469,7 @@ static int live_suppress_self_preempt(void *arg)
 			i915_request_add(rq_b);
 
 			GEM_BUG_ON(i915_request_completed(rq_a));
-			engine->schedule(rq_a, &attr);
+			i915_request_set_priority(rq_a, I915_PRIORITY_MAX);
 			igt_spinner_end(&a.spin);
 
 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
@@ -2545,7 +2534,6 @@ static int live_chain_preempt(void *arg)
 		goto err_client_hi;
 
 	for_each_engine(engine, gt, id) {
-		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
 		struct igt_live_test t;
 		struct i915_request *rq;
 		int ring_size, count, i;
@@ -2612,7 +2600,7 @@ static int live_chain_preempt(void *arg)
 
 			i915_request_get(rq);
 			i915_request_add(rq);
-			engine->schedule(rq, &attr);
+			i915_request_set_priority(rq, I915_PRIORITY_MAX);
 
 			igt_spinner_end(&hi.spin);
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
@@ -2964,14 +2952,12 @@ static int live_preempt_gang(void *arg)
 			return -EIO;
 
 		do {
-			struct i915_sched_attr attr = { .priority = prio++ };
-
 			err = create_gang(engine, &rq);
 			if (err)
 				break;
 
 			/* Submit each spinner at increasing priority */
-			engine->schedule(rq, &attr);
+			i915_request_set_priority(rq, prio++);
 		} while (prio <= I915_PRIORITY_MAX &&
 			 !__igt_timeout(end_time, NULL));
 		pr_debug("%s: Preempt chain of %d requests\n",
@@ -3192,9 +3178,6 @@ static int preempt_user(struct intel_engine_cs *engine,
 			struct i915_vma *global,
 			int id)
 {
-	struct i915_sched_attr attr = {
-		.priority = I915_PRIORITY_MAX
-	};
 	struct i915_request *rq;
 	int err = 0;
 	u32 *cs;
@@ -3219,7 +3202,7 @@ static int preempt_user(struct intel_engine_cs *engine,
 	i915_request_get(rq);
 	i915_request_add(rq);
 
-	engine->schedule(rq, &attr);
+	i915_request_set_priority(rq, I915_PRIORITY_MAX);
 
 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
 		err = -ETIME;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index d6ce4075602c..8cad102922e7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -858,12 +858,11 @@ static int active_engine(void *data)
 		rq[idx] = i915_request_get(new);
 		i915_request_add(new);
 
-		if (engine->schedule && arg->flags & TEST_PRIORITY) {
-			struct i915_sched_attr attr = {
-				.priority =
-					i915_prandom_u32_max_state(512, &prng),
-			};
-			engine->schedule(rq[idx], &attr);
+		if (intel_engine_has_scheduler(engine) &&
+		    arg->flags & TEST_PRIORITY) {
+			int prio = i915_prandom_u32_max_state(512, &prng);
+
+			i915_request_set_priority(rq[idx], prio);
 		}
 
 		err = active_request_put(old);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 96a38466299e..7db2c9decf21 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -637,8 +637,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	engine->cops = &guc_context_ops;
 	engine->request_alloc = guc_request_alloc;
 
-	engine->schedule = i915_schedule;
-
 	engine->reset.prepare = guc_reset_prepare;
 	engine->reset.rewind = guc_reset_rewind;
 	engine->reset.cancel = guc_reset_cancel;
@@ -653,6 +651,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	engine->set_default_submission = guc_set_default_submission;
 
+	engine->flags |= I915_ENGINE_HAS_SCHEDULER;
 	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a336d6c40d8b..916e74fbab6c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1223,7 +1223,7 @@ __i915_request_await_execution(struct i915_request *to,
 	}
 
 	/* Couple the dependency tree for PI on this exposed to->fence */
-	if (to->engine->schedule) {
+	if (i915_request_use_scheduler(to)) {
 		err = i915_sched_node_add_dependency(&to->sched,
 						     &from->sched,
 						     I915_DEPENDENCY_WEAK);
@@ -1364,7 +1364,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 		return 0;
 	}
 
-	if (to->engine->schedule) {
+	if (i915_request_use_scheduler(to)) {
 		ret = i915_sched_node_add_dependency(&to->sched,
 						     &from->sched,
 						     I915_DEPENDENCY_EXTERNAL);
@@ -1551,7 +1551,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 			__i915_sw_fence_await_dma_fence(&rq->submit,
 							&prev->fence,
 							&rq->dmaq);
-		if (rq->engine->schedule)
+		if (i915_request_use_scheduler(rq))
 			__i915_sched_node_add_dependency(&rq->sched,
 							 &prev->sched,
 							 &rq->dep,
@@ -1623,8 +1623,8 @@ void __i915_request_queue(struct i915_request *rq,
 	 * decide whether to preempt the entire chain so that it is ready to
 	 * run at the earliest possible convenience.
 	 */
-	if (attr && rq->engine->schedule)
-		rq->engine->schedule(rq, attr);
+	if (attr)
+		i915_request_set_priority(rq, attr->priority);
 
 	local_bh_disable();
 	__i915_request_queue_bh(rq);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c0bd4cb8786a..9ce074ffc1dd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -616,4 +616,9 @@ i915_request_active_timeline(const struct i915_request *rq)
 					 lockdep_is_held(&rq->engine->active.lock));
 }
 
+static inline bool i915_request_use_scheduler(const struct i915_request *rq)
+{
+	return intel_engine_has_scheduler(rq->engine);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 85d18037a915..84a55df88687 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -227,10 +227,8 @@ static void kick_submission(struct intel_engine_cs *engine,
 	rcu_read_unlock();
 }
 
-static void __i915_schedule(struct i915_sched_node *node,
-			    const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_sched_node *node, int prio)
 {
-	const int prio = max(attr->priority, node->attr.priority);
 	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
@@ -244,6 +242,8 @@ static void __i915_schedule(struct i915_sched_node *node,
 	if (node_signaled(node))
 		return;
 
+	prio = max(prio, node->attr.priority);
+
 	stack.signaler = node;
 	list_add(&stack.dfs_link, &dfs);
 
@@ -297,7 +297,7 @@ static void __i915_schedule(struct i915_sched_node *node,
 	 */
 	if (node->attr.priority == I915_PRIORITY_INVALID) {
 		GEM_BUG_ON(!list_empty(&node->link));
-		node->attr = *attr;
+		node->attr.priority = prio;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
 			return;
@@ -352,10 +352,13 @@ static void __i915_schedule(struct i915_sched_node *node,
 	spin_unlock(&engine->active.lock);
 }
 
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+void i915_request_set_priority(struct i915_request *rq, int prio)
 {
+	if (!i915_request_use_scheduler(rq))
+		return;
+
 	spin_lock_irq(&schedule_lock);
-	__i915_schedule(&rq->sched, attr);
+	__i915_schedule(&rq->sched, prio);
 	spin_unlock_irq(&schedule_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 8c5ed6fe0994..a045be784c67 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -35,8 +35,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
 
 void i915_sched_node_retire(struct i915_sched_node *node);
 
-void i915_schedule(struct i915_request *request,
-		   const struct i915_sched_attr *attr);
+void i915_request_set_priority(struct i915_request *request, int prio);
 
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 05/14] drm/i915: Restructure priority inheritance
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (2 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 04/14] drm/i915: Replace engine->schedule() with a known request operation Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 06/14] drm/i915/selftests: Measure set-priority duration Chris Wilson
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

In anticipation of wanting to be able to call pi from underneath an
engine's active.lock, rework the priority inheritance to primarily work
along an engine's priority queue, delegating any other engine that the
chain may traverse to a worker. This reduces the global spinlock from
governing the entire multi-engine priority inheritance depth-first search,
to a smaller lock on each engine around a single list on that engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   3 +
 drivers/gpu/drm/i915/i915_scheduler.c         | 356 +++++++++++-------
 drivers/gpu/drm/i915/i915_scheduler.h         |   3 +
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  23 +-
 6 files changed, 249 insertions(+), 141 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 92a3c8a43e14..36c6b8d7287d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -582,6 +582,8 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
+
+	i915_sched_init_ipi(&execlists->ipi);
 }
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 0b026cde9f09..48a91c0dbad6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -114,8 +114,7 @@ static void heartbeat(struct work_struct *wrk)
 			 * but all other contexts, including the kernel
 			 * context are stuck waiting for the signal.
 			 */
-		} else if (intel_engine_has_scheduler(engine) &&
-			   rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+		} else if (rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 			/*
 			 * Gradually raise the priority of the heartbeat to
 			 * give high priority work [which presumably desires
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index cb81f0d93189..1b404fef40a6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -20,6 +20,7 @@
 #include "i915_gem.h"
 #include "i915_pmu.h"
 #include "i915_priolist_types.h"
+#include "i915_scheduler_types.h"
 #include "i915_selftest.h"
 #include "intel_breadcrumbs_types.h"
 #include "intel_sseu.h"
@@ -257,6 +258,8 @@ struct intel_engine_execlists {
 	struct rb_root_cached queue;
 	struct rb_root_cached virtual;
 
+	struct i915_sched_ipi ipi;
+
 	/**
 	 * @csb_write: control register for Context Switch buffer
 	 *
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 84a55df88687..035e4be5d573 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -17,7 +17,25 @@ static struct i915_global_scheduler {
 	struct kmem_cache *slab_priorities;
 } global;
 
-static DEFINE_SPINLOCK(schedule_lock);
+/*
+ * Virtual engines complicate acquiring the engine timeline lock,
+ * as their rq->engine pointer is not stable until under that
+ * engine lock. The simple ploy we use is to take the lock then
+ * check that the rq still belongs to the newly locked engine.
+ */
+#define lock_engine_irqsave(rq, flags) ({ \
+	struct i915_request * const rq__ = (rq); \
+	struct intel_engine_cs *engine__ = READ_ONCE(rq__->engine); \
+\
+	spin_lock_irqsave(&engine__->active.lock, (flags)); \
+	while (engine__ != READ_ONCE((rq__)->engine)) { \
+		spin_unlock(&engine__->active.lock); \
+		engine__ = READ_ONCE(rq__->engine); \
+		spin_lock(&engine__->active.lock); \
+	} \
+\
+	engine__; \
+})
 
 static struct i915_sched_node *node_get(struct i915_sched_node *node)
 {
@@ -30,17 +48,104 @@ static void node_put(struct i915_sched_node *node)
 	i915_request_put(container_of(node, struct i915_request, sched));
 }
 
+static inline int rq_prio(const struct i915_request *rq)
+{
+	return READ_ONCE(rq->sched.attr.priority);
+}
+
+static int ipi_get_prio(struct i915_request *rq)
+{
+	if (READ_ONCE(rq->sched.ipi_priority) == I915_PRIORITY_INVALID)
+		return I915_PRIORITY_INVALID;
+
+	return xchg(&rq->sched.ipi_priority, I915_PRIORITY_INVALID);
+}
+
+static void ipi_schedule(struct work_struct *wrk)
+{
+	struct i915_sched_ipi *ipi = container_of(wrk, typeof(*ipi), work);
+	struct i915_request *rq = xchg(&ipi->list, NULL);
+
+	do {
+		struct i915_request *rn = xchg(&rq->sched.ipi_link, NULL);
+		int prio;
+
+		prio = ipi_get_prio(rq);
+
+		/*
+		 * For cross-engine scheduling to work we rely on one of two
+		 * things:
+		 *
+		 * a) The requests are using dma-fence fences and so will not
+		 * be scheduled until the previous engine is completed, and
+		 * so we cannot cross back onto the original engine and end up
+		 * queuing an earlier request after the first (due to the
+		 * interrupted DFS).
+		 *
+		 * b) The requests are using semaphores and so may be already
+		 * be in flight, in which case if we cross back onto the same
+		 * engine, we will already have put the interrupted DFS into
+		 * the priolist, and the continuation will now be queued
+		 * afterwards [out-of-order]. However, since we are using
+		 * semaphores in this case, we also perform yield on semaphore
+		 * waits and so will reorder the requests back into the correct
+		 * sequence. This occurrence (of promoting a request chain
+		 * that crosses the engines using semaphores back unto itself)
+		 * should be unlikely enough that it probably does not matter...
+		 */
+		local_bh_disable();
+		i915_request_set_priority(rq, prio);
+		local_bh_enable();
+
+		i915_request_put(rq);
+		rq = ptr_mask_bits(rn, 1);
+	} while (rq);
+}
+
+void i915_sched_init_ipi(struct i915_sched_ipi *ipi)
+{
+	INIT_WORK(&ipi->work, ipi_schedule);
+	ipi->list = NULL;
+}
+
+static void __ipi_add(struct i915_request *rq)
+{
+#define STUB ((struct i915_request *)1)
+	struct intel_engine_cs *engine = READ_ONCE(rq->engine);
+	struct i915_request *first;
+
+	if (!i915_request_get_rcu(rq))
+		return;
+
+	/*
+	 * We only want to add the request once into the ipi.list (or else
+	 * the chain will be broken). The worker must be guaranteed to run
+	 * at least once for every call to ipi_add, but it is allowed to
+	 * coalesce multiple ipi_add into a single pass using the final
+	 * property value.
+	 */
+	if (__i915_request_is_complete(rq) ||
+	    cmpxchg(&rq->sched.ipi_link, NULL, STUB)) { /* already queued */
+		i915_request_put(rq);
+		return;
+	}
+
+	/* Carefully insert ourselves into the head of the llist */
+	first = READ_ONCE(engine->execlists.ipi.list);
+	do {
+		rq->sched.ipi_link = ptr_pack_bits(first, 1, 1);
+	} while (!try_cmpxchg(&engine->execlists.ipi.list, &first, rq));
+
+	if (!first)
+		queue_work(system_unbound_wq, &engine->execlists.ipi.work);
+}
+
 static const struct i915_request *
 node_to_request(const struct i915_sched_node *node)
 {
 	return container_of(node, const struct i915_request, sched);
 }
 
-static inline bool node_started(const struct i915_sched_node *node)
-{
-	return i915_request_started(node_to_request(node));
-}
-
 static inline bool node_signaled(const struct i915_sched_node *node)
 {
 	return i915_request_completed(node_to_request(node));
@@ -137,42 +242,6 @@ void __i915_priolist_free(struct i915_priolist *p)
 	kmem_cache_free(global.slab_priorities, p);
 }
 
-struct sched_cache {
-	struct list_head *priolist;
-};
-
-static struct intel_engine_cs *
-sched_lock_engine(const struct i915_sched_node *node,
-		  struct intel_engine_cs *locked,
-		  struct sched_cache *cache)
-{
-	const struct i915_request *rq = node_to_request(node);
-	struct intel_engine_cs *engine;
-
-	GEM_BUG_ON(!locked);
-
-	/*
-	 * Virtual engines complicate acquiring the engine timeline lock,
-	 * as their rq->engine pointer is not stable until under that
-	 * engine lock. The simple ploy we use is to take the lock then
-	 * check that the rq still belongs to the newly locked engine.
-	 */
-	while (locked != (engine = READ_ONCE(rq->engine))) {
-		spin_unlock(&locked->active.lock);
-		memset(cache, 0, sizeof(*cache));
-		spin_lock(&engine->active.lock);
-		locked = engine;
-	}
-
-	GEM_BUG_ON(locked != engine);
-	return locked;
-}
-
-static inline int rq_prio(const struct i915_request *rq)
-{
-	return rq->sched.attr.priority;
-}
-
 static inline bool need_preempt(int prio, int active)
 {
 	/*
@@ -198,19 +267,17 @@ static void kick_submission(struct intel_engine_cs *engine,
 	if (prio <= engine->execlists.queue_priority_hint)
 		return;
 
-	rcu_read_lock();
-
 	/* Nothing currently active? We're overdue for a submission! */
 	inflight = execlists_active(&engine->execlists);
 	if (!inflight)
-		goto unlock;
+		return;
 
 	/*
 	 * If we are already the currently executing context, don't
 	 * bother evaluating if we should preempt ourselves.
 	 */
 	if (inflight->context == rq->context)
-		goto unlock;
+		return;
 
 	ENGINE_TRACE(engine,
 		     "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
@@ -222,30 +289,28 @@ static void kick_submission(struct intel_engine_cs *engine,
 	engine->execlists.queue_priority_hint = prio;
 	if (need_preempt(prio, rq_prio(inflight)))
 		tasklet_hi_schedule(&engine->execlists.tasklet);
-
-unlock:
-	rcu_read_unlock();
 }
 
-static void __i915_schedule(struct i915_sched_node *node, int prio)
+static void ipi_priority(struct i915_request *rq, int prio)
 {
-	struct intel_engine_cs *engine;
-	struct i915_dependency *dep, *p;
-	struct i915_dependency stack;
-	struct sched_cache cache;
+	int old = READ_ONCE(rq->sched.ipi_priority);
+
+	do {
+		if (prio <= old)
+			return;
+	} while (!try_cmpxchg(&rq->sched.ipi_priority, &old, prio));
+
+	__ipi_add(rq);
+}
+
+static void __i915_request_set_priority(struct i915_request *rq, int prio)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_request *rn;
+	struct list_head *plist;
 	LIST_HEAD(dfs);
 
-	/* Needed in order to use the temporary link inside i915_dependency */
-	lockdep_assert_held(&schedule_lock);
-	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
-
-	if (node_signaled(node))
-		return;
-
-	prio = max(prio, node->attr.priority);
-
-	stack.signaler = node;
-	list_add(&stack.dfs_link, &dfs);
+	list_add(&rq->sched.dfs, &dfs);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
@@ -265,66 +330,41 @@ static void __i915_schedule(struct i915_sched_node *node, int prio)
 	 * end result is a topological list of requests in reverse order, the
 	 * last element in the list is the request we must execute first.
 	 */
-	list_for_each_entry(dep, &dfs, dfs_link) {
-		struct i915_sched_node *node = dep->signaler;
+	list_for_each_entry(rq, &dfs, sched.dfs) {
+		struct i915_dependency *p;
 
-		/* If we are already flying, we know we have no signalers */
-		if (node_started(node))
-			continue;
+		/* Also release any children on this engine that are ready */
+		GEM_BUG_ON(rq->engine != engine);
 
-		/*
-		 * Within an engine, there can be no cycle, but we may
-		 * refer to the same dependency chain multiple times
-		 * (redundant dependencies are not eliminated) and across
-		 * engines.
-		 */
-		list_for_each_entry(p, &node->signalers_list, signal_link) {
-			GEM_BUG_ON(p == dep); /* no cycles! */
+		for_each_signaler(p, rq) {
+			struct i915_request *s =
+				container_of(p->signaler, typeof(*s), sched);
 
-			if (node_signaled(p->signaler))
+			GEM_BUG_ON(s == rq);
+
+			if (rq_prio(s) >= prio)
 				continue;
 
-			if (prio > READ_ONCE(p->signaler->attr.priority))
-				list_move_tail(&p->dfs_link, &dfs);
+			if (__i915_request_is_complete(s))
+				continue;
+
+			if (s->engine != rq->engine) {
+				ipi_priority(s, prio);
+				continue;
+			}
+
+			list_move_tail(&s->sched.dfs, &dfs);
 		}
 	}
 
-	/*
-	 * If we didn't need to bump any existing priorities, and we haven't
-	 * yet submitted this request (i.e. there is no potential race with
-	 * execlists_submit_request()), we can set our own priority and skip
-	 * acquiring the engine locks.
-	 */
-	if (node->attr.priority == I915_PRIORITY_INVALID) {
-		GEM_BUG_ON(!list_empty(&node->link));
-		node->attr.priority = prio;
+	plist = i915_sched_lookup_priolist(engine, prio);
 
-		if (stack.dfs_link.next == stack.dfs_link.prev)
-			return;
+	/* Fifo and depth-first replacement ensure our deps execute first */
+	list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) {
+		GEM_BUG_ON(rq->engine != engine);
 
-		__list_del_entry(&stack.dfs_link);
-	}
-
-	memset(&cache, 0, sizeof(cache));
-	engine = node_to_request(node)->engine;
-	spin_lock(&engine->active.lock);
-
-	/* Fifo and depth-first replacement ensure our deps execute before us */
-	engine = sched_lock_engine(node, engine, &cache);
-	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-		INIT_LIST_HEAD(&dep->dfs_link);
-
-		node = dep->signaler;
-		engine = sched_lock_engine(node, engine, &cache);
-		lockdep_assert_held(&engine->active.lock);
-
-		/* Recheck after acquiring the engine->timeline.lock */
-		if (prio <= node->attr.priority || node_signaled(node))
-			continue;
-
-		GEM_BUG_ON(node_to_request(node)->engine != engine);
-
-		WRITE_ONCE(node->attr.priority, prio);
+		INIT_LIST_HEAD(&rq->sched.dfs);
+		WRITE_ONCE(rq->sched.attr.priority, prio);
 
 		/*
 		 * Once the request is ready, it will be placed into the
@@ -334,32 +374,79 @@ static void __i915_schedule(struct i915_sched_node *node, int prio)
 		 * any preemption required, be dealt with upon submission.
 		 * See engine->submit_request()
 		 */
-		if (list_empty(&node->link))
+		if (!i915_request_is_ready(rq))
 			continue;
 
-		if (i915_request_in_priority_queue(node_to_request(node))) {
-			if (!cache.priolist)
-				cache.priolist =
-					i915_sched_lookup_priolist(engine,
-								   prio);
-			list_move_tail(&node->link, cache.priolist);
-		}
+		if (i915_request_in_priority_queue(rq))
+			list_move_tail(&rq->sched.link, plist);
 
-		/* Defer (tasklet) submission until after all of our updates. */
-		kick_submission(engine, node_to_request(node), prio);
+		/* Defer (tasklet) submission until after all updates. */
+		kick_submission(engine, rq, prio);
 	}
-
-	spin_unlock(&engine->active.lock);
 }
 
+#define all_signalers_checked(p, rq) \
+	list_entry_is_head(p, &(rq)->sched.signalers_list, signal_link)
+
 void i915_request_set_priority(struct i915_request *rq, int prio)
 {
-	if (!i915_request_use_scheduler(rq))
+	struct intel_engine_cs *engine;
+	unsigned long flags;
+
+	if (prio <= rq_prio(rq))
 		return;
 
-	spin_lock_irq(&schedule_lock);
-	__i915_schedule(&rq->sched, prio);
-	spin_unlock_irq(&schedule_lock);
+	/*
+	 * If we are setting the priority before being submitted, see if we
+	 * can quickly adjust our own priority in-situ and avoid taking
+	 * the contended engine->active.lock. If we need priority inheritance,
+	 * take the slow route.
+	 */
+	if (rq_prio(rq) == I915_PRIORITY_INVALID) {
+		struct i915_dependency *p;
+
+		rcu_read_lock();
+		for_each_signaler(p, rq) {
+			struct i915_request *s =
+				container_of(p->signaler, typeof(*s), sched);
+
+			if (rq_prio(s) >= prio)
+				continue;
+
+			if (__i915_request_is_complete(s))
+				continue;
+
+			break;
+		}
+		rcu_read_unlock();
+
+		/* Update priority in place if no PI required */
+		if (all_signalers_checked(p, rq) &&
+		    cmpxchg(&rq->sched.attr.priority,
+			    I915_PRIORITY_INVALID,
+			    prio) == I915_PRIORITY_INVALID)
+			return;
+	}
+
+	engine = lock_engine_irqsave(rq, flags);
+	if (prio <= rq_prio(rq))
+		goto unlock;
+
+	if (__i915_request_is_complete(rq))
+		goto unlock;
+
+	if (!intel_engine_has_scheduler(engine)) {
+		rq->sched.attr.priority = prio;
+		goto unlock;
+	}
+
+	rcu_read_lock();
+	__i915_request_set_priority(rq, prio);
+	rcu_read_unlock();
+	GEM_BUG_ON(rq_prio(rq) != prio);
+
+unlock:
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void i915_sched_node_init(struct i915_sched_node *node)
@@ -369,6 +456,9 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
+	INIT_LIST_HEAD(&node->dfs);
+
+	node->ipi_link = NULL;
 
 	i915_sched_node_reinit(node);
 }
@@ -379,6 +469,9 @@ void i915_sched_node_reinit(struct i915_sched_node *node)
 	node->semaphores = 0;
 	node->flags = 0;
 
+	GEM_BUG_ON(node->ipi_link);
+	node->ipi_priority = I915_PRIORITY_INVALID;
+
 	GEM_BUG_ON(!list_empty(&node->signalers_list));
 	GEM_BUG_ON(!list_empty(&node->waiters_list));
 	GEM_BUG_ON(!list_empty(&node->link));
@@ -414,7 +507,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 	spin_lock(&signal->lock);
 
 	if (!node_signaled(signal)) {
-		INIT_LIST_HEAD(&dep->dfs_link);
 		dep->signaler = signal;
 		dep->waiter = node_get(node);
 		dep->flags = flags;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index a045be784c67..2870fa3e089e 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -14,6 +14,7 @@
 #include "i915_scheduler_types.h"
 
 struct drm_printer;
+struct intel_engine_cs;
 
 #define priolist_for_each_request(it, plist) \
 	list_for_each_entry(it, &(plist)->requests, sched.link)
@@ -35,6 +36,8 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
 
 void i915_sched_node_retire(struct i915_sched_node *node);
 
+void i915_sched_init_ipi(struct i915_sched_ipi *ipi);
+
 void i915_request_set_priority(struct i915_request *request, int prio);
 
 struct list_head *
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 623bf41fcf35..2a5265d9aff1 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -8,13 +8,17 @@
 #define _I915_SCHEDULER_TYPES_H_
 
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
-#include "gt/intel_engine_types.h"
 #include "i915_priolist_types.h"
 
-struct drm_i915_private;
 struct i915_request;
-struct intel_engine_cs;
+
+/* Inter-engine scheduling delegation */
+struct i915_sched_ipi {
+	struct i915_request *list;
+	struct work_struct work;
+};
 
 struct i915_sched_attr {
 	/**
@@ -61,13 +65,19 @@ struct i915_sched_attr {
  */
 struct i915_sched_node {
 	spinlock_t lock; /* protect the lists */
+
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct list_head link;
+	struct list_head link; /* guarded by engine->active.lock */
+	struct list_head dfs; /* guarded by engine->active.lock */
 	struct i915_sched_attr attr;
-	unsigned int flags;
+	unsigned long flags;
 #define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(0)
-	intel_engine_mask_t semaphores;
+	unsigned long semaphores;
+
+	/* handle being scheduled for PI from outside of our active.lock */
+	struct i915_request *ipi_link;
+	int ipi_priority;
 };
 
 struct i915_dependency {
@@ -75,7 +85,6 @@ struct i915_dependency {
 	struct i915_sched_node *waiter;
 	struct list_head signal_link;
 	struct list_head wait_link;
-	struct list_head dfs_link;
 	struct rcu_head rcu;
 	unsigned long flags;
 #define I915_DEPENDENCY_ALLOC		BIT(0)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 06/14] drm/i915/selftests: Measure set-priority duration
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (3 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 05/14] drm/i915: Restructure priority inheritance Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 16:49   ` Tvrtko Ursulin
  2021-02-02 15:14 ` [Intel-gfx] [CI 07/14] drm/i915/selftests: Exercise priority inheritance around an engine loop Chris Wilson
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

As a topological sort, we expect it to run in linear graph time,
O(V+E). In removing the recursion, it is no longer a DFS but rather a
BFS, and performs as O(VE). Let's demonstrate how bad this is with a few
examples, and build a few test cases to verify a potential fix.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_scheduler.c         |   4 +
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 .../gpu/drm/i915/selftests/i915_scheduler.c   | 672 ++++++++++++++++++
 4 files changed, 678 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_scheduler.c

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 035e4be5d573..27bda7617b29 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -609,6 +609,10 @@ void i915_request_show_with_schedule(struct drm_printer *m,
 	rcu_read_unlock();
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_scheduler.c"
+#endif
+
 static void i915_global_scheduler_shrink(void)
 {
 	kmem_cache_shrink(global.slab_dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index a92c0e9b7e6b..2200a5baa68e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests)
 selftest(gt_pm, intel_gt_pm_live_selftests)
 selftest(gt_heartbeat, intel_heartbeat_live_selftests)
 selftest(requests, i915_request_live_selftests)
+selftest(scheduler, i915_scheduler_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(mman, i915_gem_mman_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index c2389f8a257d..137e35283fee 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -17,5 +17,6 @@
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
 selftest(request, i915_request_perf_selftests)
+selftest(scheduler, i915_scheduler_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
new file mode 100644
index 000000000000..d095fab2ccec
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_context.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/selftest_engine_heartbeat.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/i915_random.h"
+
+static void scheduling_disable(struct intel_engine_cs *engine)
+{
+	engine->props.preempt_timeout_ms = 0;
+	engine->props.timeslice_duration_ms = 0;
+
+	st_engine_heartbeat_disable(engine);
+}
+
+static void scheduling_enable(struct intel_engine_cs *engine)
+{
+	st_engine_heartbeat_enable(engine);
+
+	engine->props.preempt_timeout_ms =
+		engine->defaults.preempt_timeout_ms;
+	engine->props.timeslice_duration_ms =
+		engine->defaults.timeslice_duration_ms;
+}
+
+static int first_engine(struct drm_i915_private *i915,
+			int (*chain)(struct intel_engine_cs *engine,
+				     unsigned long param,
+				     bool (*fn)(struct i915_request *rq,
+						unsigned long v,
+						unsigned long e)),
+			unsigned long param,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		return chain(engine, param, fn);
+	}
+
+	return 0;
+}
+
+static int all_engines(struct drm_i915_private *i915,
+		       int (*chain)(struct intel_engine_cs *engine,
+				    unsigned long param,
+				    bool (*fn)(struct i915_request *rq,
+					       unsigned long v,
+					       unsigned long e)),
+		       unsigned long param,
+		       bool (*fn)(struct i915_request *rq,
+				  unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+	int err;
+
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		err = chain(engine, param, fn);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool check_context_order(struct intel_engine_cs *engine)
+{
+	u64 last_seqno, last_context;
+	unsigned long count;
+	bool result = false;
+	struct rb_node *rb;
+	int last_prio;
+
+	/* We expect the execution order to follow ascending fence-context */
+	spin_lock_irq(&engine->active.lock);
+
+	count = 0;
+	last_context = 0;
+	last_seqno = 0;
+	last_prio = 0;
+	for (rb = rb_first_cached(&engine->execlists.queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct i915_request *rq;
+
+		priolist_for_each_request(rq, p) {
+			if (rq->fence.context < last_context ||
+			    (rq->fence.context == last_context &&
+			     rq->fence.seqno < last_seqno)) {
+				pr_err("[%lu] %llx:%lld [prio:%d] after %llx:%lld [prio:%d]\n",
+				       count,
+				       rq->fence.context,
+				       rq->fence.seqno,
+				       rq_prio(rq),
+				       last_context,
+				       last_seqno,
+				       last_prio);
+				goto out_unlock;
+			}
+
+			last_context = rq->fence.context;
+			last_seqno = rq->fence.seqno;
+			last_prio = rq_prio(rq);
+			count++;
+		}
+	}
+	result = true;
+out_unlock:
+	spin_unlock_irq(&engine->active.lock);
+
+	return result;
+}
+
+static int __single_chain(struct intel_engine_cs *engine, unsigned long length,
+			  bool (*fn)(struct i915_request *rq,
+				     unsigned long v, unsigned long e))
+{
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	unsigned long count;
+	unsigned long min;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto err_spin;
+	}
+	ce->ring = __intel_context_ring_size(SZ_512K);
+
+	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_context;
+	}
+	i915_request_add(rq);
+	min = ce->ring->size - ce->ring->space;
+
+	count = 1;
+	while (count < length && ce->ring->space > min) {
+		rq = intel_context_create_request(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+		i915_request_add(rq);
+		count++;
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq, count, count - 1) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_context:
+	intel_context_put(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __wide_chain(struct intel_engine_cs *engine, unsigned long width,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+		for (j = 0; j < i; j++) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			if (err)
+				break;
+			count++;
+		}
+		i915_request_add(rq[i]);
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __inv_chain(struct intel_engine_cs *engine, unsigned long width,
+		       bool (*fn)(struct i915_request *rq,
+				  unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+		for (j = i; j > 0; j--) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j - 1]->fence);
+			if (err)
+				break;
+			count++;
+		}
+		i915_request_add(rq[i]);
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __sparse_chain(struct intel_engine_cs *engine, unsigned long width,
+			  bool (*fn)(struct i915_request *rq,
+				     unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	I915_RND_STATE(prng);
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+
+		if (err == 0 && i > 1) {
+			j = i915_prandom_u32_max_state(i - 1, &prng);
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			count++;
+		}
+
+		if (err == 0) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[i - 1]->fence);
+			count++;
+		}
+
+		if (err == 0 && i > 2) {
+			j = i915_prandom_u32_max_state(i - 2, &prng);
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			count++;
+		}
+
+		i915_request_add(rq[i]);
+		if (err)
+			break;
+	}
+	intel_engine_flush_submission(engine);
+
+	execlists_active_lock_bh(&engine->execlists);
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	execlists_active_unlock_bh(&engine->execlists);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int igt_schedule_chains(struct drm_i915_private *i915,
+			       bool (*fn)(struct i915_request *rq,
+					  unsigned long v, unsigned long e))
+{
+	static int (* const chains[])(struct intel_engine_cs *engine,
+				      unsigned long length,
+				      bool (*fn)(struct i915_request *rq,
+						 unsigned long v, unsigned long e)) = {
+		__single_chain,
+		__wide_chain,
+		__inv_chain,
+		__sparse_chain,
+	};
+	int n, err;
+
+	for (n = 0; n < ARRAY_SIZE(chains); n++) {
+		err = all_engines(i915, chains[n], 17, fn);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool igt_priority(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	i915_request_set_priority(rq, I915_PRIORITY_BARRIER);
+	GEM_BUG_ON(rq_prio(rq) != I915_PRIORITY_BARRIER);
+	return true;
+}
+
+static int igt_priority_chains(void *arg)
+{
+	return igt_schedule_chains(arg, igt_priority);
+}
+
+int i915_scheduler_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_priority_chains),
+	};
+
+	return i915_subtests(tests, i915);
+}
+
+static int chains(struct drm_i915_private *i915,
+		  int (*chain)(struct drm_i915_private *i915,
+			       unsigned long length,
+			       bool (*fn)(struct i915_request *rq,
+					  unsigned long v, unsigned long e)),
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	unsigned long x[] = { 1, 4, 16, 64, 128, 256, 512, 1024, 4096 };
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(x); i++) {
+		IGT_TIMEOUT(end_time);
+
+		err = chain(i915, x[i], fn);
+		if (err)
+			return err;
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	return 0;
+}
+
+static int single_chain(struct drm_i915_private *i915,
+			unsigned long length,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __single_chain, length, fn);
+}
+
+static int single(struct drm_i915_private *i915,
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	return chains(i915, single_chain, fn);
+}
+
+static int wide_chain(struct drm_i915_private *i915,
+		      unsigned long width,
+		      bool (*fn)(struct i915_request *rq,
+				 unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __wide_chain, width, fn);
+}
+
+static int wide(struct drm_i915_private *i915,
+		bool (*fn)(struct i915_request *rq,
+			   unsigned long v, unsigned long e))
+{
+	return chains(i915, wide_chain, fn);
+}
+
+static int inv_chain(struct drm_i915_private *i915,
+		     unsigned long width,
+		     bool (*fn)(struct i915_request *rq,
+				unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __inv_chain, width, fn);
+}
+
+static int inv(struct drm_i915_private *i915,
+	       bool (*fn)(struct i915_request *rq,
+			  unsigned long v, unsigned long e))
+{
+	return chains(i915, inv_chain, fn);
+}
+
+static int sparse_chain(struct drm_i915_private *i915,
+			unsigned long width,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __sparse_chain, width, fn);
+}
+
+static int sparse(struct drm_i915_private *i915,
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	return chains(i915, sparse_chain, fn);
+}
+
+static void report(const char *what, unsigned long v, unsigned long e, u64 dt)
+{
+	pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt);
+}
+
+static u64 __set_priority(struct i915_request *rq, int prio)
+{
+	u64 dt;
+
+	preempt_disable();
+	dt = ktime_get_raw_fast_ns();
+	i915_request_set_priority(rq, prio);
+	dt = ktime_get_raw_fast_ns() - dt;
+	preempt_enable();
+
+	return dt;
+}
+
+static bool set_priority(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER));
+	return true;
+}
+
+static int single_priority(void *arg)
+{
+	return single(arg, set_priority);
+}
+
+static int wide_priority(void *arg)
+{
+	return wide(arg, set_priority);
+}
+
+static int inv_priority(void *arg)
+{
+	return inv(arg, set_priority);
+}
+
+static int sparse_priority(void *arg)
+{
+	return sparse(arg, set_priority);
+}
+
+int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(single_priority),
+		SUBTEST(wide_priority),
+		SUBTEST(inv_priority),
+		SUBTEST(sparse_priority),
+	};
+	static const struct {
+		const char *name;
+		size_t sz;
+	} types[] = {
+#define T(t) { #t, sizeof(struct t) }
+		T(i915_priolist),
+		T(i915_sched_attr),
+		T(i915_sched_node),
+		T(i915_dependency),
+#undef T
+		{}
+	};
+	typeof(*types) *t;
+
+	for (t = types; t->name; t++)
+		pr_info("sizeof(%s): %zd\n", t->name, t->sz);
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 07/14] drm/i915/selftests: Exercise priority inheritance around an engine loop
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (4 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 06/14] drm/i915/selftests: Measure set-priority duration Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 16:44   ` Tvrtko Ursulin
  2021-02-02 15:14 ` [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed Chris Wilson
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

Exercise rescheduling priority inheritance around a sequence of requests
that wrap around all the engines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/selftests/i915_scheduler.c   | 225 ++++++++++++++++++
 1 file changed, 225 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
index d095fab2ccec..acc666f755d7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -7,6 +7,7 @@
 
 #include "gt/intel_context.h"
 #include "gt/intel_gpu_commands.h"
+#include "gt/intel_ring.h"
 #include "gt/selftest_engine_heartbeat.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/i915_random.h"
@@ -504,10 +505,234 @@ static int igt_priority_chains(void *arg)
 	return igt_schedule_chains(arg, igt_priority);
 }
 
+static struct i915_request *
+__write_timestamp(struct intel_engine_cs *engine,
+		  struct drm_i915_gem_object *obj,
+		  int slot,
+		  struct i915_request *prev)
+{
+	struct i915_request *rq = ERR_PTR(-EINVAL);
+	bool use_64b = INTEL_GEN(engine->i915) >= 8;
+	struct intel_context *ce;
+	struct i915_vma *vma;
+	int err = 0;
+	u32 *cs;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_ce;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto out_ce;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto out_request;
+
+	if (prev) {
+		err = i915_request_await_dma_fence(rq, &prev->fence);
+		if (err)
+			goto out_request;
+	}
+
+	if (engine->emit_init_breadcrumb) {
+		err = engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto out_request;
+	}
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto out_request;
+	}
+
+	*cs++ = MI_STORE_REGISTER_MEM + use_64b;
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(engine->mmio_base));
+	*cs++ = lower_32_bits(vma->node.start) + sizeof(u32) * slot;
+	*cs++ = upper_32_bits(vma->node.start);
+	intel_ring_advance(rq, cs);
+
+	i915_request_get(rq);
+out_request:
+	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+out_ce:
+	intel_context_put(ce);
+	i915_request_put(prev);
+	return err ? ERR_PTR(err) : rq;
+}
+
+static struct i915_request *create_spinner(struct drm_i915_private *i915,
+					   struct igt_spinner *spin)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+		struct i915_request *rq;
+
+		if (igt_spinner_init(spin, engine->gt))
+			return ERR_PTR(-ENOMEM);
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			return ERR_CAST(ce);
+
+		rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+		intel_context_put(ce);
+		if (rq == ERR_PTR(-ENODEV))
+			continue;
+		if (IS_ERR(rq))
+			return rq;
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+		return rq;
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
+static bool has_timestamp(const struct drm_i915_private *i915)
+{
+	return INTEL_GEN(i915) >= 7;
+}
+
+static int __igt_schedule_cycle(struct drm_i915_private *i915,
+				bool (*fn)(struct i915_request *rq,
+					   unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+	struct drm_i915_gem_object *obj;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	unsigned long count, n;
+	u32 *time, last;
+	int err;
+
+	/*
+	 * Queue a bunch of ordered requests (each waiting on the previous)
+	 * around the engines a couple of times. Each request will write
+	 * the timestamp it executes at into the scratch, with the expectation
+	 * that the timestamp will be in our desired execution order.
+	 */
+
+	if (!i915->caps.scheduler || !has_timestamp(i915))
+		return 0;
+
+	obj = i915_gem_object_create_internal(i915, SZ_64K);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	time = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(time)) {
+		err = PTR_ERR(time);
+		goto out_obj;
+	}
+
+	rq = create_spinner(i915, &spin);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_obj;
+	}
+
+	err = 0;
+	count = 0;
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		rq = __write_timestamp(engine, obj, count, rq);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		count++;
+	}
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		rq = __write_timestamp(engine, obj, count, rq);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		count++;
+	}
+	GEM_BUG_ON(count * sizeof(u32) > obj->base.size);
+	if (err || !count)
+		goto out_spin;
+
+	fn(rq, count + 1, count);
+	igt_spinner_end(&spin);
+
+	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+		err = -ETIME;
+		goto out_request;
+	}
+
+	last = time[0];
+	for (n = 1; n < count; n++) {
+		if (i915_seqno_passed(last, time[n])) {
+			pr_err("Timestamp[%lu] %x before previous %x\n",
+			       n, time[n], last);
+			err = -EINVAL;
+			break;
+		}
+		last = time[n];
+	}
+
+out_request:
+	i915_request_put(rq);
+out_spin:
+	igt_spinner_fini(&spin);
+out_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static bool noop(struct i915_request *rq, unsigned long v, unsigned long e)
+{
+	return true;
+}
+
+static int igt_schedule_cycle(void *arg)
+{
+	return __igt_schedule_cycle(arg, noop);
+}
+
+static int igt_priority_cycle(void *arg)
+{
+	return __igt_schedule_cycle(arg, igt_priority);
+}
+
 int i915_scheduler_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_priority_chains),
+
+		SUBTEST(igt_schedule_cycle),
+		SUBTEST(igt_priority_cycle),
 	};
 
 	return i915_subtests(tests, i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (5 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 07/14] drm/i915/selftests: Exercise priority inheritance around an engine loop Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 16:52   ` Tvrtko Ursulin
  2021-02-02 15:14 ` [Intel-gfx] [CI 09/14] drm/i915: Improve DFS for priority inheritance Chris Wilson
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

live_timeslice_rewind assumes a particular traversal and reordering
after the first timeslice yield. However, the outcome can be either
(A1, A2, B1) or (A1, B2, A2) depending on the path taken through the
dependency graph. So if we do not get the outcome we need at first, give
it a priority kick to force a rewind.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_execlists.c | 21 +++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 951e2bf867e1..68e1398704a4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -1107,6 +1107,7 @@ static int live_timeslice_rewind(void *arg)
 		struct i915_request *rq[3] = {};
 		struct intel_context *ce;
 		unsigned long timeslice;
+		unsigned long timeout;
 		int i, err = 0;
 		u32 *slot;
 
@@ -1173,11 +1174,29 @@ static int live_timeslice_rewind(void *arg)
 
 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
-		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
+		i = 0;
+		timeout = jiffies + HZ;
+		while (i915_request_is_active(rq[A2]) &&
+		       time_before(jiffies, timeout)) { /* semaphore yield! */
 			/* Wait for the timeslice to kick in */
 			del_timer(&engine->execlists.timer);
 			tasklet_hi_schedule(&engine->execlists.tasklet);
 			intel_engine_flush_submission(engine);
+
+			/*
+			 * Unfortunately this assumes that during the
+			 * search of the wait tree it sees the requests
+			 * in a particular order. That order is not
+			 * strictly determined and it may pick either
+			 * A2 or B1 to immediately follow A1.
+			 *
+			 * Break the tie with a set-priority. This defeats
+			 * the goal of trying to cause a rewind with a
+			 * timeslice, but alas, a rewind is better than
+			 * none.
+			 */
+			if (i++)
+				i915_request_set_priority(rq[B1], 1);
 		}
 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 09/14] drm/i915: Improve DFS for priority inheritance
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (6 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 10/14] drm/i915: Extract request submission from execlists Chris Wilson
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

The core of the scheduling algorithm is that we compute the topological
order of the fence DAG. Knowing that we have a DAG, we should be able to
use a DFS to compute the topological sort in linear time. However,
during the conversion of the recursive algorithm into an iterative one,
the memoization of how far we had progressed down a branch was
forgotten. The result was that instead of running in linear time, it was
running in geometric time and could easily run for a few hundred
milliseconds given a wide enough graph, not the microseconds as required.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Andi Shyti <andi.shyti@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c       | 58 ++++++++++++---------
 drivers/gpu/drm/i915/i915_scheduler_types.h |  6 ++-
 2 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 27bda7617b29..9e88417bf451 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -242,6 +242,26 @@ void __i915_priolist_free(struct i915_priolist *p)
 	kmem_cache_free(global.slab_priorities, p);
 }
 
+static struct i915_request *
+stack_push(struct i915_request *rq,
+	   struct i915_request *prev,
+	   struct list_head *pos)
+{
+	prev->sched.dfs.pos = pos;
+	rq->sched.dfs.prev = prev;
+	return rq;
+}
+
+static struct i915_request *
+stack_pop(struct i915_request *rq,
+	  struct list_head **pos)
+{
+	rq = rq->sched.dfs.prev;
+	if (rq)
+		*pos = rq->sched.dfs.pos;
+	return rq;
+}
+
 static inline bool need_preempt(int prio, int active)
 {
 	/*
@@ -306,11 +326,10 @@ static void ipi_priority(struct i915_request *rq, int prio)
 static void __i915_request_set_priority(struct i915_request *rq, int prio)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_request *rn;
+	struct list_head *pos = &rq->sched.signalers_list;
 	struct list_head *plist;
-	LIST_HEAD(dfs);
 
-	list_add(&rq->sched.dfs, &dfs);
+	plist = i915_sched_lookup_priolist(engine, prio);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
@@ -330,40 +349,31 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 	 * end result is a topological list of requests in reverse order, the
 	 * last element in the list is the request we must execute first.
 	 */
-	list_for_each_entry(rq, &dfs, sched.dfs) {
-		struct i915_dependency *p;
-
-		/* Also release any children on this engine that are ready */
-		GEM_BUG_ON(rq->engine != engine);
-
-		for_each_signaler(p, rq) {
+	rq->sched.dfs.prev = NULL;
+	do {
+		list_for_each_continue(pos, &rq->sched.signalers_list) {
+			struct i915_dependency *p =
+				list_entry(pos, typeof(*p), signal_link);
 			struct i915_request *s =
 				container_of(p->signaler, typeof(*s), sched);
 
-			GEM_BUG_ON(s == rq);
-
 			if (rq_prio(s) >= prio)
 				continue;
 
 			if (__i915_request_is_complete(s))
 				continue;
 
-			if (s->engine != rq->engine) {
+			if (s->engine != engine) {
 				ipi_priority(s, prio);
 				continue;
 			}
 
-			list_move_tail(&s->sched.dfs, &dfs);
+			/* Remember our position along this branch */
+			rq = stack_push(s, rq, pos);
+			pos = &rq->sched.signalers_list;
 		}
-	}
 
-	plist = i915_sched_lookup_priolist(engine, prio);
-
-	/* Fifo and depth-first replacement ensure our deps execute first */
-	list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) {
-		GEM_BUG_ON(rq->engine != engine);
-
-		INIT_LIST_HEAD(&rq->sched.dfs);
+		RQ_TRACE(rq, "set-priority:%d\n", prio);
 		WRITE_ONCE(rq->sched.attr.priority, prio);
 
 		/*
@@ -377,12 +387,13 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 		if (!i915_request_is_ready(rq))
 			continue;
 
+		GEM_BUG_ON(rq->engine != engine);
 		if (i915_request_in_priority_queue(rq))
 			list_move_tail(&rq->sched.link, plist);
 
 		/* Defer (tasklet) submission until after all updates. */
 		kick_submission(engine, rq, prio);
-	}
+	} while ((rq = stack_pop(rq, &pos)));
 }
 
 #define all_signalers_checked(p, rq) \
@@ -456,7 +467,6 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
-	INIT_LIST_HEAD(&node->dfs);
 
 	node->ipi_link = NULL;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2a5265d9aff1..28138c3fcc81 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -69,7 +69,11 @@ struct i915_sched_node {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
 	struct list_head link; /* guarded by engine->active.lock */
-	struct list_head dfs; /* guarded by engine->active.lock */
+	struct i915_sched_stack {
+		/* Branch memoization used during depth-first search */
+		struct i915_request *prev;
+		struct list_head *pos;
+	} dfs; /* guarded by engine->active.lock */
 	struct i915_sched_attr attr;
 	unsigned long flags;
 #define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(0)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 10/14] drm/i915: Extract request submission from execlists
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (7 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 09/14] drm/i915: Improve DFS for priority inheritance Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 11/14] drm/i915: Extract request rewinding " Chris Wilson
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

In the process of preparing to reuse the request submission logic for
other backends, lift it out of the execlists backend. It already
operates on the common structs, so just a matter of moving and renaming.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 55 +------------
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 30 +------
 drivers/gpu/drm/i915/i915_scheduler.c         | 82 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  2 +
 4 files changed, 86 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 6b8984c64b60..62e83acc7221 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2452,59 +2452,6 @@ static void execlists_preempt(struct timer_list *timer)
 	execlists_kick(timer, preempt);
 }
 
-static void queue_request(struct intel_engine_cs *engine,
-			  struct i915_request *rq)
-{
-	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link,
-		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
-	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
-static bool submit_queue(struct intel_engine_cs *engine,
-			 const struct i915_request *rq)
-{
-	struct intel_engine_execlists *execlists = &engine->execlists;
-
-	if (rq_prio(rq) <= execlists->queue_priority_hint)
-		return false;
-
-	execlists->queue_priority_hint = rq_prio(rq);
-	return true;
-}
-
-static bool ancestor_on_hold(const struct intel_engine_cs *engine,
-			     const struct i915_request *rq)
-{
-	GEM_BUG_ON(i915_request_on_hold(rq));
-	return !list_empty(&engine->active.hold) && hold_request(rq);
-}
-
-static void execlists_submit_request(struct i915_request *request)
-{
-	struct intel_engine_cs *engine = request->engine;
-	unsigned long flags;
-
-	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->active.lock, flags);
-
-	if (unlikely(ancestor_on_hold(engine, request))) {
-		RQ_TRACE(request, "ancestor on hold\n");
-		list_add_tail(&request->sched.link, &engine->active.hold);
-		i915_request_set_hold(request);
-	} else {
-		queue_request(engine, request);
-
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-		GEM_BUG_ON(list_empty(&request->sched.link));
-
-		if (submit_queue(engine, request))
-			__execlists_kick(&engine->execlists);
-	}
-
-	spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
 static int execlists_context_pre_pin(struct intel_context *ce,
 				     struct i915_gem_ww_ctx *ww,
 				     void **vaddr)
@@ -3124,7 +3071,7 @@ static bool can_preempt(struct intel_engine_cs *engine)
 
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = execlists_submit_request;
+	engine->submit_request = i915_request_enqueue;
 	engine->execlists.tasklet.callback = execlists_submission_tasklet;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 7db2c9decf21..f5b8f89d30bc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -519,34 +519,6 @@ static int guc_request_alloc(struct i915_request *request)
 	return 0;
 }
 
-static inline void queue_request(struct intel_engine_cs *engine,
-				 struct i915_request *rq,
-				 int prio)
-{
-	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link,
-		      i915_sched_lookup_priolist(engine, prio));
-	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
-static void guc_submit_request(struct i915_request *rq)
-{
-	struct intel_engine_cs *engine = rq->engine;
-	unsigned long flags;
-
-	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->active.lock, flags);
-
-	queue_request(engine, rq, rq_prio(rq));
-
-	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-	GEM_BUG_ON(list_empty(&rq->sched.link));
-
-	tasklet_hi_schedule(&engine->execlists.tasklet);
-
-	spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
 static void sanitize_hwsp(struct intel_engine_cs *engine)
 {
 	struct intel_timeline *tl;
@@ -615,7 +587,7 @@ static int guc_resume(struct intel_engine_cs *engine)
 
 static void guc_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = guc_submit_request;
+	engine->submit_request = i915_request_enqueue;
 }
 
 static void guc_release(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 9e88417bf451..a56252d3546a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -460,6 +460,88 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void queue_request(struct intel_engine_cs *engine,
+			  struct i915_request *rq)
+{
+	GEM_BUG_ON(!list_empty(&rq->sched.link));
+	list_add_tail(&rq->sched.link,
+		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
+static bool submit_queue(struct intel_engine_cs *engine,
+			 const struct i915_request *rq)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	if (rq_prio(rq) <= execlists->queue_priority_hint)
+		return false;
+
+	execlists->queue_priority_hint = rq_prio(rq);
+	return true;
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+	struct i915_dependency *p;
+	bool result = false;
+
+	/*
+	 * If one of our ancestors is on hold, we must also be put on hold,
+	 * otherwise we will bypass it and execute before it.
+	 */
+	rcu_read_lock();
+	for_each_signaler(p, rq) {
+		const struct i915_request *s =
+			container_of(p->signaler, typeof(*s), sched);
+
+		if (s->engine != rq->engine)
+			continue;
+
+		result = i915_request_on_hold(s);
+		if (result)
+			break;
+	}
+	rcu_read_unlock();
+
+	return result;
+}
+
+static bool ancestor_on_hold(const struct intel_engine_cs *engine,
+			     const struct i915_request *rq)
+{
+	GEM_BUG_ON(i915_request_on_hold(rq));
+	return unlikely(!list_empty(&engine->active.hold)) && hold_request(rq);
+}
+
+void i915_request_enqueue(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	unsigned long flags;
+	bool kick = false;
+
+	/* Will be called from irq-context when using foreign fences. */
+	spin_lock_irqsave(&engine->active.lock, flags);
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
+
+	if (unlikely(ancestor_on_hold(engine, rq))) {
+		RQ_TRACE(rq, "ancestor on hold\n");
+		list_add_tail(&rq->sched.link, &engine->active.hold);
+		i915_request_set_hold(rq);
+	} else {
+		queue_request(engine, rq);
+
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+
+		kick = submit_queue(engine, rq);
+	}
+
+	GEM_BUG_ON(list_empty(&rq->sched.link));
+	spin_unlock_irqrestore(&engine->active.lock, flags);
+	if (kick)
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 void i915_sched_node_init(struct i915_sched_node *node)
 {
 	spin_lock_init(&node->lock);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 2870fa3e089e..89d998f226e0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -40,6 +40,8 @@ void i915_sched_init_ipi(struct i915_sched_ipi *ipi);
 
 void i915_request_set_priority(struct i915_request *request, int prio);
 
+void i915_request_enqueue(struct i915_request *request);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 11/14] drm/i915: Extract request rewinding from execlists
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (8 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 10/14] drm/i915: Extract request submission from execlists Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 12/14] drm/i915: Extract request suspension from the execlists Chris Wilson
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

In the process of preparing to reuse the request submission logic for
other backends, lift it out of the execlists backend.

While this operates on the common structs, we do have a bit of backend
knowledge, which is harmless for !lrc but still unsightly.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  3 -
 .../drm/i915/gt/intel_execlists_submission.c  | 58 ++-----------------
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |  3 +
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 44 ++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  3 +
 7 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 8d9184920c51..cc2df80eb449 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -137,9 +137,6 @@ execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
 	local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
 }
 
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
-
 static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 62e83acc7221..4add205ec30e 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -359,56 +359,6 @@ assert_priority_queue(const struct i915_request *prev,
 	return rq_prio(prev) >= rq_prio(next);
 }
 
-static struct i915_request *
-__unwind_incomplete_requests(struct intel_engine_cs *engine)
-{
-	struct i915_request *rq, *rn, *active = NULL;
-	struct list_head *pl;
-	int prio = I915_PRIORITY_INVALID;
-
-	lockdep_assert_held(&engine->active.lock);
-
-	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->active.requests,
-					 sched.link) {
-		if (__i915_request_is_complete(rq)) {
-			list_del_init(&rq->sched.link);
-			continue;
-		}
-
-		__i915_request_unsubmit(rq);
-
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-
-		list_move(&rq->sched.link, pl);
-		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-
-		/* Check in case we rollback so far we wrap [size/2] */
-		if (intel_ring_direction(rq->ring,
-					 rq->tail,
-					 rq->ring->tail + 8) > 0)
-			rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
-
-		active = rq;
-	}
-
-	return active;
-}
-
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
-{
-	struct intel_engine_cs *engine =
-		container_of(execlists, typeof(*engine), execlists);
-
-	return __unwind_incomplete_requests(engine);
-}
-
 static void
 execlists_context_status_change(struct i915_request *rq, unsigned long status)
 {
@@ -1080,7 +1030,7 @@ static void defer_active(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
 
-	rq = __unwind_incomplete_requests(engine);
+	rq = __i915_sched_rewind_requests(engine);
 	if (!rq)
 		return;
 
@@ -1292,7 +1242,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * the preemption, some of the unwound requests may
 			 * complete!
 			 */
-			__unwind_incomplete_requests(engine);
+			__i915_sched_rewind_requests(engine);
 
 			last = NULL;
 		} else if (timeslice_expired(engine, last)) {
@@ -2287,7 +2237,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
 	 * which we return it to the queue for signaling.
 	 *
 	 * By removing them from the execlists queue, we also remove the
-	 * requests from being processed by __unwind_incomplete_requests()
+	 * requests from being processed by __intel_engine_rewind_requests()
 	 * during the intel_engine_reset(), and so they will *not* be replayed
 	 * afterwards.
 	 *
@@ -2917,7 +2867,7 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	/* Push back any incomplete requests for replay after the reset. */
 	rcu_read_lock();
 	spin_lock_irqsave(&engine->active.lock, flags);
-	__unwind_incomplete_requests(engine);
+	__i915_sched_rewind_requests(engine);
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 	rcu_read_unlock();
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 41e5350a7a05..364656bedec7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -92,4 +92,7 @@
 /* in Gen12 ID 0x7FF is reserved to indicate idle */
 #define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
 
+#define CTX_DESC_RELOAD_PD BIT_ULL(1)
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
 #endif /* _INTEL_LRC_REG_H_ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 68e1398704a4..73340a96548f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -4601,7 +4601,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
 
 	/* Fake a preemption event; failed of course */
 	spin_lock_irq(&engine->active.lock);
-	__unwind_incomplete_requests(engine);
+	__i915_sched_rewind_requests(engine);
 	spin_unlock_irq(&engine->active.lock);
 	GEM_BUG_ON(rq->engine != engine);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f5b8f89d30bc..3a1126764484 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -314,14 +314,13 @@ static void guc_reset_state(struct intel_context *ce,
 
 static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_request *rq;
 	unsigned long flags;
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	rq = execlists_unwind_incomplete_requests(execlists);
+	rq = __i915_sched_rewind_requests(engine);
 	if (!rq)
 		goto out_unlock;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index a56252d3546a..6c0654bc49a8 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -6,6 +6,9 @@
 
 #include <linux/mutex.h>
 
+#include "gt/intel_ring.h"
+#include "gt/intel_lrc_reg.h"
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_request.h"
@@ -542,6 +545,47 @@ void i915_request_enqueue(struct i915_request *rq)
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
+struct i915_request *
+__i915_sched_rewind_requests(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq, *rn, *active = NULL;
+	struct list_head *pl;
+	int prio = I915_PRIORITY_INVALID;
+
+	lockdep_assert_held(&engine->active.lock);
+
+	list_for_each_entry_safe_reverse(rq, rn,
+					 &engine->active.requests,
+					 sched.link) {
+		if (__i915_request_is_complete(rq)) {
+			list_del_init(&rq->sched.link);
+			continue;
+		}
+
+		__i915_request_unsubmit(rq);
+
+		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+		if (rq_prio(rq) != prio) {
+			prio = rq_prio(rq);
+			pl = i915_sched_lookup_priolist(engine, prio);
+		}
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+
+		list_move(&rq->sched.link, pl);
+		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
+		/* Check in case we rollback so far we wrap [size/2] */
+		if (intel_ring_direction(rq->ring,
+					 rq->tail,
+					 rq->ring->tail + 8) > 0)
+			rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
+
+		active = rq;
+	}
+
+	return active;
+}
+
 void i915_sched_node_init(struct i915_sched_node *node)
 {
 	spin_lock_init(&node->lock);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 89d998f226e0..d3984f65b3a6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -42,6 +42,9 @@ void i915_request_set_priority(struct i915_request *request, int prio);
 
 void i915_request_enqueue(struct i915_request *request);
 
+struct i915_request *
+__i915_sched_rewind_requests(struct intel_engine_cs *engine);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 12/14] drm/i915: Extract request suspension from the execlists
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (9 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 11/14] drm/i915: Extract request rewinding " Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 13/14] drm/i915: Extract the ability to defer and rerun a request later Chris Wilson
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

Make the ability to suspend and resume a request and its dependents
generic.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 167 +-----------------
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |   8 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 153 ++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  10 ++
 4 files changed, 169 insertions(+), 169 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 4add205ec30e..a971b3bee532 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1921,169 +1921,6 @@ static void post_process_csb(struct i915_request **port,
 		execlists_schedule_out(*port++);
 }
 
-static void __execlists_hold(struct i915_request *rq)
-{
-	LIST_HEAD(list);
-
-	do {
-		struct i915_dependency *p;
-
-		if (i915_request_is_active(rq))
-			__i915_request_unsubmit(rq);
-
-		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
-		i915_request_set_hold(rq);
-		RQ_TRACE(rq, "on hold\n");
-
-		for_each_waiter(p, rq) {
-			struct i915_request *w =
-				container_of(p->waiter, typeof(*w), sched);
-
-			if (p->flags & I915_DEPENDENCY_WEAK)
-				continue;
-
-			/* Leave semaphores spinning on the other engines */
-			if (w->engine != rq->engine)
-				continue;
-
-			if (!i915_request_is_ready(w))
-				continue;
-
-			if (__i915_request_is_complete(w))
-				continue;
-
-			if (i915_request_on_hold(w))
-				continue;
-
-			list_move_tail(&w->sched.link, &list);
-		}
-
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
-}
-
-static bool execlists_hold(struct intel_engine_cs *engine,
-			   struct i915_request *rq)
-{
-	if (i915_request_on_hold(rq))
-		return false;
-
-	spin_lock_irq(&engine->active.lock);
-
-	if (__i915_request_is_complete(rq)) { /* too late! */
-		rq = NULL;
-		goto unlock;
-	}
-
-	/*
-	 * Transfer this request onto the hold queue to prevent it
-	 * being resumbitted to HW (and potentially completed) before we have
-	 * released it. Since we may have already submitted following
-	 * requests, we need to remove those as well.
-	 */
-	GEM_BUG_ON(i915_request_on_hold(rq));
-	GEM_BUG_ON(rq->engine != engine);
-	__execlists_hold(rq);
-	GEM_BUG_ON(list_empty(&engine->active.hold));
-
-unlock:
-	spin_unlock_irq(&engine->active.lock);
-	return rq;
-}
-
-static bool hold_request(const struct i915_request *rq)
-{
-	struct i915_dependency *p;
-	bool result = false;
-
-	/*
-	 * If one of our ancestors is on hold, we must also be on hold,
-	 * otherwise we will bypass it and execute before it.
-	 */
-	rcu_read_lock();
-	for_each_signaler(p, rq) {
-		const struct i915_request *s =
-			container_of(p->signaler, typeof(*s), sched);
-
-		if (s->engine != rq->engine)
-			continue;
-
-		result = i915_request_on_hold(s);
-		if (result)
-			break;
-	}
-	rcu_read_unlock();
-
-	return result;
-}
-
-static void __execlists_unhold(struct i915_request *rq)
-{
-	LIST_HEAD(list);
-
-	do {
-		struct i915_dependency *p;
-
-		RQ_TRACE(rq, "hold release\n");
-
-		GEM_BUG_ON(!i915_request_on_hold(rq));
-		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
-
-		i915_request_clear_hold(rq);
-		list_move_tail(&rq->sched.link,
-			       i915_sched_lookup_priolist(rq->engine,
-							  rq_prio(rq)));
-		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-
-		/* Also release any children on this engine that are ready */
-		for_each_waiter(p, rq) {
-			struct i915_request *w =
-				container_of(p->waiter, typeof(*w), sched);
-
-			if (p->flags & I915_DEPENDENCY_WEAK)
-				continue;
-
-			/* Propagate any change in error status */
-			if (rq->fence.error)
-				i915_request_set_error_once(w, rq->fence.error);
-
-			if (w->engine != rq->engine)
-				continue;
-
-			if (!i915_request_on_hold(w))
-				continue;
-
-			/* Check that no other parents are also on hold */
-			if (hold_request(w))
-				continue;
-
-			list_move_tail(&w->sched.link, &list);
-		}
-
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
-}
-
-static void execlists_unhold(struct intel_engine_cs *engine,
-			     struct i915_request *rq)
-{
-	spin_lock_irq(&engine->active.lock);
-
-	/*
-	 * Move this request back to the priority queue, and all of its
-	 * children and grandchildren that were suspended along with it.
-	 */
-	__execlists_unhold(rq);
-
-	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = rq_prio(rq);
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-	}
-
-	spin_unlock_irq(&engine->active.lock);
-}
-
 struct execlists_capture {
 	struct work_struct work;
 	struct i915_request *rq;
@@ -2116,7 +1953,7 @@ static void execlists_capture_work(struct work_struct *work)
 	i915_gpu_coredump_put(cap->error);
 
 	/* Return this request and all that depend upon it for signaling */
-	execlists_unhold(engine, cap->rq);
+	i915_sched_resume_request(engine, cap->rq);
 	i915_request_put(cap->rq);
 
 	kfree(cap);
@@ -2250,7 +2087,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
 	 * simply hold that request accountable for being non-preemptible
 	 * long enough to force the reset.
 	 */
-	if (!execlists_hold(engine, cap->rq))
+	if (!i915_sched_suspend_request(engine, cap->rq))
 		goto err_rq;
 
 	INIT_WORK(&cap->work, execlists_capture_work);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 73340a96548f..64f6a49a5c22 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -608,7 +608,7 @@ static int live_hold_reset(void *arg)
 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 
 		i915_request_get(rq);
-		execlists_hold(engine, rq);
+		i915_sched_suspend_request(engine, rq);
 		GEM_BUG_ON(!i915_request_on_hold(rq));
 
 		__intel_engine_reset_bh(engine, NULL);
@@ -630,7 +630,7 @@ static int live_hold_reset(void *arg)
 		GEM_BUG_ON(!i915_request_on_hold(rq));
 
 		/* But is resubmitted on release */
-		execlists_unhold(engine, rq);
+		i915_sched_resume_request(engine, rq);
 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("%s: held request did not complete!\n",
 			       engine->name);
@@ -4606,7 +4606,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
 	GEM_BUG_ON(rq->engine != engine);
 
 	/* Reset the engine while keeping our active request on hold */
-	execlists_hold(engine, rq);
+	i915_sched_suspend_request(engine, rq);
 	GEM_BUG_ON(!i915_request_on_hold(rq));
 
 	__intel_engine_reset_bh(engine, NULL);
@@ -4629,7 +4629,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
 	GEM_BUG_ON(!i915_request_on_hold(rq));
 
 	/* But is resubmitted on release */
-	execlists_unhold(engine, rq);
+	i915_sched_resume_request(engine, rq);
 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 		pr_err("%s: held request did not complete!\n",
 		       engine->name);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 6c0654bc49a8..a5df27061c3c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -586,6 +586,159 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
 	return active;
 }
 
+bool __i915_sched_suspend_request(struct intel_engine_cs *engine,
+				  struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	lockdep_assert_held(&engine->active.lock);
+	GEM_BUG_ON(rq->engine != engine);
+
+	if (__i915_request_is_complete(rq)) /* too late! */
+		return false;
+
+	if (i915_request_on_hold(rq))
+		return false;
+
+	ENGINE_TRACE(engine, "suspending request %llx:%lld\n",
+		     rq->fence.context, rq->fence.seqno);
+
+	/*
+	 * Transfer this request onto the hold queue to prevent it
+	 * being resumbitted to HW (and potentially completed) before we have
+	 * released it. Since we may have already submitted following
+	 * requests, we need to remove those as well.
+	 */
+	do {
+		struct i915_dependency *p;
+
+		if (i915_request_is_active(rq))
+			__i915_request_unsubmit(rq);
+
+		list_move_tail(&rq->sched.link, &engine->active.hold);
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		i915_request_set_hold(rq);
+		RQ_TRACE(rq, "on hold\n");
+
+		for_each_waiter(p, rq) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (p->flags & I915_DEPENDENCY_WEAK)
+				continue;
+
+			/* Leave semaphores spinning on the other engines */
+			if (w->engine != engine)
+				continue;
+
+			if (!i915_request_is_ready(w))
+				continue;
+
+			if (__i915_request_is_complete(w))
+				continue;
+
+			if (i915_request_on_hold(w)) /* acts as a visited bit */
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+
+	GEM_BUG_ON(list_empty(&engine->active.hold));
+
+	return true;
+}
+
+bool i915_sched_suspend_request(struct intel_engine_cs *engine,
+				struct i915_request *rq)
+{
+	bool result;
+
+	if (i915_request_on_hold(rq))
+		return false;
+
+	spin_lock_irq(&engine->active.lock);
+	result = __i915_sched_suspend_request(engine, rq);
+	spin_unlock_irq(&engine->active.lock);
+
+	return result;
+}
+
+void __i915_sched_resume_request(struct intel_engine_cs *engine,
+				 struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	lockdep_assert_held(&engine->active.lock);
+
+	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+		engine->execlists.queue_priority_hint = rq_prio(rq);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+
+	if (!i915_request_on_hold(rq))
+		return;
+
+	ENGINE_TRACE(engine, "resuming request %llx:%lld\n",
+		     rq->fence.context, rq->fence.seqno);
+
+	/*
+	 * Move this request back to the priority queue, and all of its
+	 * children and grandchildren that were suspended along with it.
+	 */
+	do {
+		struct i915_dependency *p;
+
+		RQ_TRACE(rq, "hold release\n");
+
+		GEM_BUG_ON(!i915_request_on_hold(rq));
+		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+		i915_request_clear_hold(rq);
+		list_del_init(&rq->sched.link);
+
+		queue_request(engine, rq);
+
+		/* Also release any children on this engine that are ready */
+		for_each_waiter(p, rq) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (p->flags & I915_DEPENDENCY_WEAK)
+				continue;
+
+			/* Propagate any change in error status */
+			if (rq->fence.error)
+				i915_request_set_error_once(w, rq->fence.error);
+
+			if (w->engine != engine)
+				continue;
+
+			/* We also treat the on-hold status as a visited bit */
+			if (!i915_request_on_hold(w))
+				continue;
+
+			/* Check that no other parents are also on hold [BFS] */
+			if (hold_request(w))
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+}
+
+void i915_sched_resume_request(struct intel_engine_cs *engine,
+			       struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+	__i915_sched_resume_request(engine, rq);
+	spin_unlock_irq(&engine->active.lock);
+}
+
 void i915_sched_node_init(struct i915_sched_node *node)
 {
 	spin_lock_init(&node->lock);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index d3984f65b3a6..9860459fedb1 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -45,6 +45,16 @@ void i915_request_enqueue(struct i915_request *request);
 struct i915_request *
 __i915_sched_rewind_requests(struct intel_engine_cs *engine);
 
+bool __i915_sched_suspend_request(struct intel_engine_cs *engine,
+				  struct i915_request *rq);
+void __i915_sched_resume_request(struct intel_engine_cs *engine,
+				 struct i915_request *request);
+
+bool i915_sched_suspend_request(struct intel_engine_cs *engine,
+				struct i915_request *request);
+void i915_sched_resume_request(struct intel_engine_cs *engine,
+			       struct i915_request *rq);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 13/14] drm/i915: Extract the ability to defer and rerun a request later
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (10 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 12/14] drm/i915: Extract request suspension from the execlists Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 15:14 ` [Intel-gfx] [CI 14/14] drm/i915: Fix the iterative dfs for defering requests Chris Wilson
  2021-02-02 18:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/14] drm/i915/gt: Move engine setup out of set_default_submission (rev2) Patchwork
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

Lift the ability to defer a request until later from execlists into the
common layer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 57 +++--------------
 drivers/gpu/drm/i915/i915_scheduler.c         | 63 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h         |  5 +-
 3 files changed, 67 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index a971b3bee532..b1761d937a5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -978,54 +978,6 @@ static void virtual_xfer_context(struct virtual_engine *ve,
 	}
 }
 
-static void defer_request(struct i915_request *rq, struct list_head * const pl)
-{
-	LIST_HEAD(list);
-
-	/*
-	 * We want to move the interrupted request to the back of
-	 * the round-robin list (i.e. its priority level), but
-	 * in doing so, we must then move all requests that were in
-	 * flight and were waiting for the interrupted request to
-	 * be run after it again.
-	 */
-	do {
-		struct i915_dependency *p;
-
-		GEM_BUG_ON(i915_request_is_active(rq));
-		list_move_tail(&rq->sched.link, pl);
-
-		for_each_waiter(p, rq) {
-			struct i915_request *w =
-				container_of(p->waiter, typeof(*w), sched);
-
-			if (p->flags & I915_DEPENDENCY_WEAK)
-				continue;
-
-			/* Leave semaphores spinning on the other engines */
-			if (w->engine != rq->engine)
-				continue;
-
-			/* No waiter should start before its signaler */
-			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
-				   __i915_request_has_started(w) &&
-				   !__i915_request_is_complete(rq));
-
-			if (!i915_request_is_ready(w))
-				continue;
-
-			if (rq_prio(w) < rq_prio(rq))
-				continue;
-
-			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
-			GEM_BUG_ON(i915_request_is_active(w));
-			list_move_tail(&w->sched.link, &list);
-		}
-
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
-}
-
 static void defer_active(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
@@ -1034,7 +986,14 @@ static void defer_active(struct intel_engine_cs *engine)
 	if (!rq)
 		return;
 
-	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	/*
+	 * We want to move the interrupted request to the back of
+	 * the round-robin list (i.e. its priority level), but
+	 * in doing so, we must then move all requests that were in
+	 * flight and were waiting for the interrupted request to
+	 * be run after it again.
+	 */
+	__i915_sched_defer_request(engine, rq);
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index a5df27061c3c..641141f3ce10 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -179,8 +179,8 @@ static void assert_priolists(struct intel_engine_execlists * const execlists)
 	}
 }
 
-struct list_head *
-i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
+static struct list_head *
+lookup_priolist(struct intel_engine_cs *engine, int prio)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_priolist *p;
@@ -332,7 +332,7 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
 	struct list_head *pos = &rq->sched.signalers_list;
 	struct list_head *plist;
 
-	plist = i915_sched_lookup_priolist(engine, prio);
+	plist = lookup_priolist(engine, prio);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
@@ -463,12 +463,63 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+void __i915_sched_defer_request(struct intel_engine_cs *engine,
+				struct i915_request *rq)
+{
+	struct list_head *pl;
+	LIST_HEAD(list);
+
+	lockdep_assert_held(&engine->active.lock);
+	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
+
+	/*
+	 * When we defer a request, we must maintain its order with respect
+	 * to those that are waiting upon it. So we traverse its chain of
+	 * waiters and move any that are earlier than the request to after it.
+	 */
+	pl = lookup_priolist(engine, rq_prio(rq));
+	do {
+		struct i915_dependency *p;
+
+		GEM_BUG_ON(i915_request_is_active(rq));
+		list_move_tail(&rq->sched.link, pl);
+
+		for_each_waiter(p, rq) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (p->flags & I915_DEPENDENCY_WEAK)
+				continue;
+
+			/* Leave semaphores spinning on the other engines */
+			if (w->engine != engine)
+				continue;
+
+			/* No waiter should start before its signaler */
+			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
+				   __i915_request_has_started(w) &&
+				   !__i915_request_is_complete(rq));
+
+			if (!i915_request_is_ready(w))
+				continue;
+
+			if (rq_prio(w) < rq_prio(rq))
+				continue;
+
+			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+			GEM_BUG_ON(i915_request_is_active(w));
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+}
+
 static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_request *rq)
 {
 	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link,
-		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	list_add_tail(&rq->sched.link, lookup_priolist(engine, rq_prio(rq)));
 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 }
 
@@ -567,7 +618,7 @@ __i915_sched_rewind_requests(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != prio) {
 			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
+			pl = lookup_priolist(engine, prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 9860459fedb1..00ce0a9d519d 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -44,6 +44,8 @@ void i915_request_enqueue(struct i915_request *request);
 
 struct i915_request *
 __i915_sched_rewind_requests(struct intel_engine_cs *engine);
+void __i915_sched_defer_request(struct intel_engine_cs *engine,
+				struct i915_request *request);
 
 bool __i915_sched_suspend_request(struct intel_engine_cs *engine,
 				  struct i915_request *rq);
@@ -55,9 +57,6 @@ bool i915_sched_suspend_request(struct intel_engine_cs *engine,
 void i915_sched_resume_request(struct intel_engine_cs *engine,
 			       struct i915_request *rq);
 
-struct list_head *
-i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
-
 void __i915_priolist_free(struct i915_priolist *p);
 static inline void i915_priolist_free(struct i915_priolist *p)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [CI 14/14] drm/i915: Fix the iterative dfs for defering requests
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (11 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 13/14] drm/i915: Extract the ability to defer and rerun a request later Chris Wilson
@ 2021-02-02 15:14 ` Chris Wilson
  2021-02-02 18:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/14] drm/i915/gt: Move engine setup out of set_default_submission (rev2) Patchwork
  13 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:14 UTC (permalink / raw)
  To: intel-gfx

The current implementation of walking the children of a deferred
requests lacks the backtracking required to reduce the dfs to linear.
Having pulled it from execlists into the common layer, we can reuse the
dfs code for priority inheritance.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 56 +++++++++++++++++++--------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 641141f3ce10..8dd999f09412 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -466,8 +466,10 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
 void __i915_sched_defer_request(struct intel_engine_cs *engine,
 				struct i915_request *rq)
 {
-	struct list_head *pl;
-	LIST_HEAD(list);
+	struct list_head *pos = &rq->sched.waiters_list;
+	const int prio = rq_prio(rq);
+	struct i915_request *rn;
+	LIST_HEAD(dfs);
 
 	lockdep_assert_held(&engine->active.lock);
 	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags));
@@ -477,14 +479,11 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 	 * to those that are waiting upon it. So we traverse its chain of
 	 * waiters and move any that are earlier than the request to after it.
 	 */
-	pl = lookup_priolist(engine, rq_prio(rq));
+	rq->sched.dfs.prev = NULL;
 	do {
-		struct i915_dependency *p;
-
-		GEM_BUG_ON(i915_request_is_active(rq));
-		list_move_tail(&rq->sched.link, pl);
-
-		for_each_waiter(p, rq) {
+		list_for_each_continue(pos, &rq->sched.waiters_list) {
+			struct i915_dependency *p =
+				list_entry(pos, typeof(*p), wait_link);
 			struct i915_request *w =
 				container_of(p->waiter, typeof(*w), sched);
 
@@ -500,19 +499,44 @@ void __i915_sched_defer_request(struct intel_engine_cs *engine,
 				   __i915_request_has_started(w) &&
 				   !__i915_request_is_complete(rq));
 
-			if (!i915_request_is_ready(w))
+			if (!i915_request_in_priority_queue(w))
 				continue;
 
-			if (rq_prio(w) < rq_prio(rq))
+			/*
+			 * We also need to reorder within the same priority.
+			 *
+			 * This is unlike priority-inheritance, where if the
+			 * signaler already has a higher priority [earlier
+			 * deadline] than us, we can ignore as it will be
+			 * scheduled first. If a waiter already has the
+			 * same priority, we still have to push it to the end
+			 * of the list. This unfortunately means we cannot
+			 * use the rq_deadline() itself as a 'visited' bit.
+			 */
+			if (rq_prio(w) < prio)
 				continue;
 
-			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
-			GEM_BUG_ON(i915_request_is_active(w));
-			list_move_tail(&w->sched.link, &list);
+			GEM_BUG_ON(rq_prio(w) != prio);
+
+			/* Remember our position along this branch */
+			rq = stack_push(w, rq, pos);
+			pos = &rq->sched.waiters_list;
 		}
 
-		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
-	} while (rq);
+		/* Note list is reversed for waiters wrt signal hierarchy */
+		GEM_BUG_ON(rq->engine != engine);
+		GEM_BUG_ON(!i915_request_in_priority_queue(rq));
+		list_move(&rq->sched.link, &dfs);
+
+		/* Track our visit, and prevent duplicate processing */
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+	} while ((rq = stack_pop(rq, &pos)));
+
+	pos = lookup_priolist(engine, prio);
+	list_for_each_entry_safe(rq, rn, &dfs, sched.link) {
+		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		list_add_tail(&rq->sched.link, pos);
+	}
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend
  2021-02-02 15:14 ` [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
@ 2021-02-02 15:49   ` Tvrtko Ursulin
  2021-02-02 15:53     ` Chris Wilson
  2021-02-02 16:15   ` [Intel-gfx] [PATCH v2] " Chris Wilson
  1 sibling, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2021-02-02 15:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/02/2021 15:14, Chris Wilson wrote:
> The different submission backends each have their own preferred
> behaviour and interrupt setup. Let each handle their own interrupts.
> 
> This becomes more useful later as we to extract the use of auxiliary
> state in the interrupt handler that is backend specific.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  7 ++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  | 14 +---
>   .../drm/i915/gt/intel_execlists_submission.c  | 40 +++++++++
>   drivers/gpu/drm/i915/gt/intel_gt_irq.c        | 82 ++++++-------------
>   drivers/gpu/drm/i915/gt/intel_gt_irq.h        |  7 ++
>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++
>   drivers/gpu/drm/i915/gt/intel_rps.c           |  2 +-
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 ++-
>   drivers/gpu/drm/i915/i915_irq.c               |  8 +-
>   9 files changed, 103 insertions(+), 74 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index dab8d734e272..2a453ba5f25a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
>   	intel_engine_set_hwsp_writemask(engine, ~0u);
>   }
>   
> +static void nop_irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	GEM_DEBUG_WARN_ON(iir);
> +}
> +
>   static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   {
>   	const struct engine_info *info = &intel_engines[id];
> @@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   	engine->hw_id = info->hw_id;
>   	engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
>   
> +	engine->irq_handler = nop_irq_handler;
> +
>   	engine->class = info->class;
>   	engine->instance = info->instance;
>   	__sprint_engine_name(engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 9d59de5c559a..7fd035d45263 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -402,6 +402,7 @@ struct intel_engine_cs {
>   	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
>   	void		(*irq_enable)(struct intel_engine_cs *engine);
>   	void		(*irq_disable)(struct intel_engine_cs *engine);
> +	void		(*irq_handler)(struct intel_engine_cs *engine, u32 iir);
>   
>   	void		(*sanitize)(struct intel_engine_cs *engine);
>   	int		(*resume)(struct intel_engine_cs *engine);
> @@ -481,10 +482,9 @@ struct intel_engine_cs {
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
>   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
>   #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
> -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
> -#define I915_ENGINE_IS_VIRTUAL       BIT(6)
> -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
> -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
> +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
> +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
> +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
>   	unsigned int flags;
>   
>   	/*
> @@ -588,12 +588,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_HAS_TIMESLICES;
>   }
>   
> -static inline bool
> -intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
> -{
> -	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> -}
> -
>   static inline bool
>   intel_engine_is_virtual(const struct intel_engine_cs *engine)
>   {
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 4ddd2099a931..ed62e4b549d2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -2394,6 +2394,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
>   	rcu_read_unlock();
>   }
>   
> +static void execlists_irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	bool tasklet = false;
> +
> +	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> +		u32 eir;
> +
> +		/* Upper 16b are the enabling mask, rsvd for internal errors */
> +		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> +		ENGINE_TRACE(engine, "CS error: %x\n", eir);
> +
> +		/* Disable the error interrupt until after the reset */
> +		if (likely(eir)) {
> +			ENGINE_WRITE(engine, RING_EMR, ~0u);
> +			ENGINE_WRITE(engine, RING_EIR, eir);
> +			WRITE_ONCE(engine->execlists.error_interrupt, eir);
> +			tasklet = true;
> +		}
> +	}
> +
> +	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> +		WRITE_ONCE(engine->execlists.yield,
> +			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> +		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> +			     engine->execlists.yield);
> +		if (del_timer(&engine->execlists.timer))
> +			tasklet = true;
> +	}
> +
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> +		tasklet = true;
> +
> +	if (iir & GT_RENDER_USER_INTERRUPT)
> +		intel_engine_signal_breadcrumbs(engine);
> +
> +	if (tasklet)
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +}
> +
>   static void __execlists_kick(struct intel_engine_execlists *execlists)
>   {
>   	/* Kick the tasklet for some interrupt coalescing and reset handling */
> @@ -3146,6 +3185,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   		 * until a more refined solution exists.
>   		 */
>   	}
> +	engine->irq_handler = execlists_irq_handler;
>   
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>   	if (!intel_vgpu_active(engine->i915)) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index 9fc6c912a4e5..f5aa31ae8f6c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
>   		intel_guc_to_host_event_handler(guc);
>   }
>   
> -static void
> -cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> -{
> -	bool tasklet = false;
> -
> -	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> -		u32 eir;
> -
> -		/* Upper 16b are the enabling mask, rsvd for internal errors */
> -		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> -		ENGINE_TRACE(engine, "CS error: %x\n", eir);
> -
> -		/* Disable the error interrupt until after the reset */
> -		if (likely(eir)) {
> -			ENGINE_WRITE(engine, RING_EMR, ~0u);
> -			ENGINE_WRITE(engine, RING_EIR, eir);
> -			WRITE_ONCE(engine->execlists.error_interrupt, eir);
> -			tasklet = true;
> -		}
> -	}
> -
> -	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> -		WRITE_ONCE(engine->execlists.yield,
> -			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> -		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> -			     engine->execlists.yield);
> -		if (del_timer(&engine->execlists.timer))
> -			tasklet = true;
> -	}
> -
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> -		tasklet = true;
> -
> -	if (iir & GT_RENDER_USER_INTERRUPT) {
> -		intel_engine_signal_breadcrumbs(engine);
> -		tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
> -	}
> -
> -	if (tasklet)
> -		tasklet_hi_schedule(&engine->execlists.tasklet);
> -}
> -
>   static u32
>   gen11_gt_engine_identity(struct intel_gt *gt,
>   			 const unsigned int bank, const unsigned int bit)
> @@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
>   		engine = NULL;
>   
>   	if (likely(engine))
> -		return cs_irq_handler(engine, iir);
> +		return intel_engine_cs_irq(engine, iir);
>   
>   	WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
>   		  class, instance);
> @@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>   void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
>   {
>   	if (gt_iir & GT_RENDER_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> +				    gt_iir);
> +
>   	if (gt_iir & ILK_BSD_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> +				    gt_iir);
>   }
>   
>   static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
> @@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
>   void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
>   {
>   	if (gt_iir & GT_RENDER_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> +				    gt_iir);
> +
>   	if (gt_iir & GT_BSD_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> +				    gt_iir);
> +
>   	if (gt_iir & GT_BLT_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> +				    gt_iir);
>   
>   	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
>   		      GT_BSD_CS_ERROR_INTERRUPT |
> @@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
>   	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
>   		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
>   		if (likely(iir)) {
> -			cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
> -				       iir >> GEN8_RCS_IRQ_SHIFT);
> -			cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
> -				       iir >> GEN8_BCS_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> +					    iir >> GEN8_RCS_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> +					    iir >> GEN8_BCS_IRQ_SHIFT);
>   			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
>   		}
>   	}
> @@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
>   	if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
>   		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
>   		if (likely(iir)) {
> -			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
> -				       iir >> GEN8_VCS0_IRQ_SHIFT);
> -			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
> -				       iir >> GEN8_VCS1_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> +					    iir >> GEN8_VCS0_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
> +					    iir >> GEN8_VCS1_IRQ_SHIFT);
>   			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
>   		}
>   	}
> @@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
>   	if (master_ctl & GEN8_GT_VECS_IRQ) {
>   		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
>   		if (likely(iir)) {
> -			cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> -				       iir >> GEN8_VECS_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> +					    iir >> GEN8_VECS_IRQ_SHIFT);
>   			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
>   		}
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> index f667e976fb2b..601473fe9df9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> @@ -8,6 +8,8 @@
>   
>   #include <linux/types.h>
>   
> +#include "intel_engine_types.h"
> +
>   struct intel_gt;
>   
>   #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
> @@ -39,4 +41,9 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
>   void gen8_gt_irq_reset(struct intel_gt *gt);
>   void gen8_gt_irq_postinstall(struct intel_gt *gt);
>   
> +static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u32 iir)
> +{
> +	engine->irq_handler(engine, iir);
> +}
> +
>   #endif /* INTEL_GT_IRQ_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 3cb2ce503544..9b5bfbe79347 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -997,10 +997,17 @@ static void ring_release(struct intel_engine_cs *engine)
>   	intel_timeline_put(engine->legacy.timeline);
>   }
>   
> +static void irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	intel_engine_signal_breadcrumbs(engine);
> +}
> +
>   static void setup_irq(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *i915 = engine->i915;
>   
> +	engine->irq_handler = irq_handler;
> +
>   	if (INTEL_GEN(i915) >= 6) {
>   		engine->irq_enable = gen6_irq_enable;
>   		engine->irq_disable = gen6_irq_disable;
> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
> index 405d814e9040..4ba6a33f65cf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> @@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
>   		return;
>   
>   	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
> +		intel_engine_cs_irq(gt->engine[VECS0], pm_iir);
>   
>   	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
>   		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 17b551a0c89f..96a38466299e 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -264,6 +264,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
>   	spin_unlock_irqrestore(&engine->active.lock, flags);
>   }
>   
> +static void cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	if (iir & GT_RENDER_USER_INTERRUPT) {
> +		intel_engine_signal_breadcrumbs(engine);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
> +}
> +
>   static void guc_reset_prepare(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -645,7 +653,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
>   	}
>   	engine->set_default_submission = guc_set_default_submission;
>   
> -	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
>   	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   
>   	/*
> @@ -681,6 +688,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
>   static inline void guc_default_irqs(struct intel_engine_cs *engine)
>   {
>   	engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
> +	engine->irq_handler = cs_irq_handler;
>   }
>   
>   int intel_guc_submission_setup(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 9d47da8ec86d..37a48402adc1 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3954,7 +3954,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
>   		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
>   
>   		if (iir & I915_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
>   
>   		if (iir & I915_MASTER_ERROR_INTERRUPT)
>   			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
> @@ -4062,7 +4062,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
>   		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
>   
>   		if (iir & I915_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
>   
>   		if (iir & I915_MASTER_ERROR_INTERRUPT)
>   			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> @@ -4207,10 +4207,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
>   		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
>   
>   		if (iir & I915_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
>   
>   		if (iir & I915_BSD_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[VCS0], iir);
>   
>   		if (iir & I915_MASTER_ERROR_INTERRUPT)
>   			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> 

Looks believable as design cleanup.

Wonder if some barrier is neded after overwriting the nop handler to be 
sure it propagates to all CPUs by the time interrupts get enabled. Maybe 
we have enough sync points between the two events. Or add a paranoid 
setter with some barrier - what would be required to flush the write to 
all cores? smp_store_mb?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend
  2021-02-02 15:49   ` Tvrtko Ursulin
@ 2021-02-02 15:53     ` Chris Wilson
  2021-02-02 16:08       ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 15:53 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-02 15:49:59)
> 
> On 02/02/2021 15:14, Chris Wilson wrote:
> > The different submission backends each have their own preferred
> > behaviour and interrupt setup. Let each handle their own interrupts.
> > 
> > This becomes more useful later as we to extract the use of auxiliary
> > state in the interrupt handler that is backend specific.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  7 ++
> >   drivers/gpu/drm/i915/gt/intel_engine_types.h  | 14 +---
> >   .../drm/i915/gt/intel_execlists_submission.c  | 40 +++++++++
> >   drivers/gpu/drm/i915/gt/intel_gt_irq.c        | 82 ++++++-------------
> >   drivers/gpu/drm/i915/gt/intel_gt_irq.h        |  7 ++
> >   .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++
> >   drivers/gpu/drm/i915/gt/intel_rps.c           |  2 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 ++-
> >   drivers/gpu/drm/i915/i915_irq.c               |  8 +-
> >   9 files changed, 103 insertions(+), 74 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index dab8d734e272..2a453ba5f25a 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
> >       intel_engine_set_hwsp_writemask(engine, ~0u);
> >   }
> >   
> > +static void nop_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > +{
> > +     GEM_DEBUG_WARN_ON(iir);
> > +}
> > +
> >   static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> >   {
> >       const struct engine_info *info = &intel_engines[id];
> > @@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> >       engine->hw_id = info->hw_id;
> >       engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
> >   
> > +     engine->irq_handler = nop_irq_handler;
> > +
> >       engine->class = info->class;
> >       engine->instance = info->instance;
> >       __sprint_engine_name(engine);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index 9d59de5c559a..7fd035d45263 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -402,6 +402,7 @@ struct intel_engine_cs {
> >       u32             irq_enable_mask; /* bitmask to enable ring interrupt */
> >       void            (*irq_enable)(struct intel_engine_cs *engine);
> >       void            (*irq_disable)(struct intel_engine_cs *engine);
> > +     void            (*irq_handler)(struct intel_engine_cs *engine, u32 iir);
> >   
> >       void            (*sanitize)(struct intel_engine_cs *engine);
> >       int             (*resume)(struct intel_engine_cs *engine);
> > @@ -481,10 +482,9 @@ struct intel_engine_cs {
> >   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> >   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
> >   #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
> > -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
> > -#define I915_ENGINE_IS_VIRTUAL       BIT(6)
> > -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
> > -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
> > +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
> > +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
> > +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
> >       unsigned int flags;
> >   
> >       /*
> > @@ -588,12 +588,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
> >       return engine->flags & I915_ENGINE_HAS_TIMESLICES;
> >   }
> >   
> > -static inline bool
> > -intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
> > -{
> > -     return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> > -}
> > -
> >   static inline bool
> >   intel_engine_is_virtual(const struct intel_engine_cs *engine)
> >   {
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > index 4ddd2099a931..ed62e4b549d2 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > @@ -2394,6 +2394,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
> >       rcu_read_unlock();
> >   }
> >   
> > +static void execlists_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > +{
> > +     bool tasklet = false;
> > +
> > +     if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> > +             u32 eir;
> > +
> > +             /* Upper 16b are the enabling mask, rsvd for internal errors */
> > +             eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> > +             ENGINE_TRACE(engine, "CS error: %x\n", eir);
> > +
> > +             /* Disable the error interrupt until after the reset */
> > +             if (likely(eir)) {
> > +                     ENGINE_WRITE(engine, RING_EMR, ~0u);
> > +                     ENGINE_WRITE(engine, RING_EIR, eir);
> > +                     WRITE_ONCE(engine->execlists.error_interrupt, eir);
> > +                     tasklet = true;
> > +             }
> > +     }
> > +
> > +     if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> > +             WRITE_ONCE(engine->execlists.yield,
> > +                        ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> > +             ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> > +                          engine->execlists.yield);
> > +             if (del_timer(&engine->execlists.timer))
> > +                     tasklet = true;
> > +     }
> > +
> > +     if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> > +             tasklet = true;
> > +
> > +     if (iir & GT_RENDER_USER_INTERRUPT)
> > +             intel_engine_signal_breadcrumbs(engine);
> > +
> > +     if (tasklet)
> > +             tasklet_hi_schedule(&engine->execlists.tasklet);
> > +}
> > +
> >   static void __execlists_kick(struct intel_engine_execlists *execlists)
> >   {
> >       /* Kick the tasklet for some interrupt coalescing and reset handling */
> > @@ -3146,6 +3185,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
> >                * until a more refined solution exists.
> >                */
> >       }
> > +     engine->irq_handler = execlists_irq_handler;
> >   
> >       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> >       if (!intel_vgpu_active(engine->i915)) {
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > index 9fc6c912a4e5..f5aa31ae8f6c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > @@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
> >               intel_guc_to_host_event_handler(guc);
> >   }
> >   
> > -static void
> > -cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > -{
> > -     bool tasklet = false;
> > -
> > -     if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> > -             u32 eir;
> > -
> > -             /* Upper 16b are the enabling mask, rsvd for internal errors */
> > -             eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> > -             ENGINE_TRACE(engine, "CS error: %x\n", eir);
> > -
> > -             /* Disable the error interrupt until after the reset */
> > -             if (likely(eir)) {
> > -                     ENGINE_WRITE(engine, RING_EMR, ~0u);
> > -                     ENGINE_WRITE(engine, RING_EIR, eir);
> > -                     WRITE_ONCE(engine->execlists.error_interrupt, eir);
> > -                     tasklet = true;
> > -             }
> > -     }
> > -
> > -     if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> > -             WRITE_ONCE(engine->execlists.yield,
> > -                        ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> > -             ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> > -                          engine->execlists.yield);
> > -             if (del_timer(&engine->execlists.timer))
> > -                     tasklet = true;
> > -     }
> > -
> > -     if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> > -             tasklet = true;
> > -
> > -     if (iir & GT_RENDER_USER_INTERRUPT) {
> > -             intel_engine_signal_breadcrumbs(engine);
> > -             tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
> > -     }
> > -
> > -     if (tasklet)
> > -             tasklet_hi_schedule(&engine->execlists.tasklet);
> > -}
> > -
> >   static u32
> >   gen11_gt_engine_identity(struct intel_gt *gt,
> >                        const unsigned int bank, const unsigned int bit)
> > @@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
> >               engine = NULL;
> >   
> >       if (likely(engine))
> > -             return cs_irq_handler(engine, iir);
> > +             return intel_engine_cs_irq(engine, iir);
> >   
> >       WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
> >                 class, instance);
> > @@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
> >   void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> >   {
> >       if (gt_iir & GT_RENDER_USER_INTERRUPT)
> > -             intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> > +             intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> > +                                 gt_iir);
> > +
> >       if (gt_iir & ILK_BSD_USER_INTERRUPT)
> > -             intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> > +             intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > +                                 gt_iir);
> >   }
> >   
> >   static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
> > @@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
> >   void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> >   {
> >       if (gt_iir & GT_RENDER_USER_INTERRUPT)
> > -             intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> > +             intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> > +                                 gt_iir);
> > +
> >       if (gt_iir & GT_BSD_USER_INTERRUPT)
> > -             intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> > +             intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > +                                 gt_iir);
> > +
> >       if (gt_iir & GT_BLT_USER_INTERRUPT)
> > -             intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
> > +             intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> > +                                 gt_iir);
> >   
> >       if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
> >                     GT_BSD_CS_ERROR_INTERRUPT |
> > @@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
> >       if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
> >               iir = raw_reg_read(regs, GEN8_GT_IIR(0));
> >               if (likely(iir)) {
> > -                     cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
> > -                                    iir >> GEN8_RCS_IRQ_SHIFT);
> > -                     cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
> > -                                    iir >> GEN8_BCS_IRQ_SHIFT);
> > +                     intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> > +                                         iir >> GEN8_RCS_IRQ_SHIFT);
> > +                     intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> > +                                         iir >> GEN8_BCS_IRQ_SHIFT);
> >                       raw_reg_write(regs, GEN8_GT_IIR(0), iir);
> >               }
> >       }
> > @@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
> >       if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
> >               iir = raw_reg_read(regs, GEN8_GT_IIR(1));
> >               if (likely(iir)) {
> > -                     cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > -                                    iir >> GEN8_VCS0_IRQ_SHIFT);
> > -                     cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
> > -                                    iir >> GEN8_VCS1_IRQ_SHIFT);
> > +                     intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > +                                         iir >> GEN8_VCS0_IRQ_SHIFT);
> > +                     intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
> > +                                         iir >> GEN8_VCS1_IRQ_SHIFT);
> >                       raw_reg_write(regs, GEN8_GT_IIR(1), iir);
> >               }
> >       }
> > @@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
> >       if (master_ctl & GEN8_GT_VECS_IRQ) {
> >               iir = raw_reg_read(regs, GEN8_GT_IIR(3));
> >               if (likely(iir)) {
> > -                     cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> > -                                    iir >> GEN8_VECS_IRQ_SHIFT);
> > +                     intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> > +                                         iir >> GEN8_VECS_IRQ_SHIFT);
> >                       raw_reg_write(regs, GEN8_GT_IIR(3), iir);
> >               }
> >       }
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> > index f667e976fb2b..601473fe9df9 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> > @@ -8,6 +8,8 @@
> >   
> >   #include <linux/types.h>
> >   
> > +#include "intel_engine_types.h"
> > +
> >   struct intel_gt;
> >   
> >   #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
> > @@ -39,4 +41,9 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
> >   void gen8_gt_irq_reset(struct intel_gt *gt);
> >   void gen8_gt_irq_postinstall(struct intel_gt *gt);
> >   
> > +static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u32 iir)
> > +{
> > +     engine->irq_handler(engine, iir);
> > +}
> > +
> >   #endif /* INTEL_GT_IRQ_H */
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> > index 3cb2ce503544..9b5bfbe79347 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> > @@ -997,10 +997,17 @@ static void ring_release(struct intel_engine_cs *engine)
> >       intel_timeline_put(engine->legacy.timeline);
> >   }
> >   
> > +static void irq_handler(struct intel_engine_cs *engine, u32 iir)
> > +{
> > +     intel_engine_signal_breadcrumbs(engine);
> > +}
> > +
> >   static void setup_irq(struct intel_engine_cs *engine)
> >   {
> >       struct drm_i915_private *i915 = engine->i915;
> >   
> > +     engine->irq_handler = irq_handler;
> > +
> >       if (INTEL_GEN(i915) >= 6) {
> >               engine->irq_enable = gen6_irq_enable;
> >               engine->irq_disable = gen6_irq_disable;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
> > index 405d814e9040..4ba6a33f65cf 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> > @@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
> >               return;
> >   
> >       if (pm_iir & PM_VEBOX_USER_INTERRUPT)
> > -             intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
> > +             intel_engine_cs_irq(gt->engine[VECS0], pm_iir);
> >   
> >       if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
> >               DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 17b551a0c89f..96a38466299e 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -264,6 +264,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
> >       spin_unlock_irqrestore(&engine->active.lock, flags);
> >   }
> >   
> > +static void cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > +{
> > +     if (iir & GT_RENDER_USER_INTERRUPT) {
> > +             intel_engine_signal_breadcrumbs(engine);
> > +             tasklet_hi_schedule(&engine->execlists.tasklet);
> > +     }
> > +}
> > +
> >   static void guc_reset_prepare(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > @@ -645,7 +653,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
> >       }
> >       engine->set_default_submission = guc_set_default_submission;
> >   
> > -     engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> >       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> >   
> >       /*
> > @@ -681,6 +688,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
> >   static inline void guc_default_irqs(struct intel_engine_cs *engine)
> >   {
> >       engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
> > +     engine->irq_handler = cs_irq_handler;
> >   }
> >   
> >   int intel_guc_submission_setup(struct intel_engine_cs *engine)
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 9d47da8ec86d..37a48402adc1 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -3954,7 +3954,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
> >               intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
> >   
> >               if (iir & I915_USER_INTERRUPT)
> > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> > +                     intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
> >   
> >               if (iir & I915_MASTER_ERROR_INTERRUPT)
> >                       i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
> > @@ -4062,7 +4062,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
> >               intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
> >   
> >               if (iir & I915_USER_INTERRUPT)
> > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> > +                     intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
> >   
> >               if (iir & I915_MASTER_ERROR_INTERRUPT)
> >                       i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> > @@ -4207,10 +4207,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
> >               intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
> >   
> >               if (iir & I915_USER_INTERRUPT)
> > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> > +                     intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
> >   
> >               if (iir & I915_BSD_USER_INTERRUPT)
> > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
> > +                     intel_engine_cs_irq(dev_priv->gt.engine[VCS0], iir);
> >   
> >               if (iir & I915_MASTER_ERROR_INTERRUPT)
> >                       i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> > 
> 
> Looks believable as design cleanup.
> 
> Wonder if some barrier is neded after overwriting the nop handler to be 
> sure it propagates to all CPUs by the time interrupts get enabled. Maybe 
> we have enough sync points between the two events. Or add a paranoid 
> setter with some barrier - what would be required to flush the write to 
> all cores? smp_store_mb?

Hmm. Indeed, that seems justified. So justified I expect it's already
taken care of for us on installing the irq handler. Let's have a look.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend
  2021-02-02 15:53     ` Chris Wilson
@ 2021-02-02 16:08       ` Chris Wilson
  0 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 16:08 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2021-02-02 15:53:41)
> Quoting Tvrtko Ursulin (2021-02-02 15:49:59)
> > 
> > On 02/02/2021 15:14, Chris Wilson wrote:
> > > The different submission backends each have their own preferred
> > > behaviour and interrupt setup. Let each handle their own interrupts.
> > > 
> > > This becomes more useful later as we to extract the use of auxiliary
> > > state in the interrupt handler that is backend specific.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  7 ++
> > >   drivers/gpu/drm/i915/gt/intel_engine_types.h  | 14 +---
> > >   .../drm/i915/gt/intel_execlists_submission.c  | 40 +++++++++
> > >   drivers/gpu/drm/i915/gt/intel_gt_irq.c        | 82 ++++++-------------
> > >   drivers/gpu/drm/i915/gt/intel_gt_irq.h        |  7 ++
> > >   .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++
> > >   drivers/gpu/drm/i915/gt/intel_rps.c           |  2 +-
> > >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 ++-
> > >   drivers/gpu/drm/i915/i915_irq.c               |  8 +-
> > >   9 files changed, 103 insertions(+), 74 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > index dab8d734e272..2a453ba5f25a 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > @@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
> > >       intel_engine_set_hwsp_writemask(engine, ~0u);
> > >   }
> > >   
> > > +static void nop_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > > +{
> > > +     GEM_DEBUG_WARN_ON(iir);
> > > +}
> > > +
> > >   static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > >   {
> > >       const struct engine_info *info = &intel_engines[id];
> > > @@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > >       engine->hw_id = info->hw_id;
> > >       engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
> > >   
> > > +     engine->irq_handler = nop_irq_handler;
> > > +
> > >       engine->class = info->class;
> > >       engine->instance = info->instance;
> > >       __sprint_engine_name(engine);
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > index 9d59de5c559a..7fd035d45263 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > @@ -402,6 +402,7 @@ struct intel_engine_cs {
> > >       u32             irq_enable_mask; /* bitmask to enable ring interrupt */
> > >       void            (*irq_enable)(struct intel_engine_cs *engine);
> > >       void            (*irq_disable)(struct intel_engine_cs *engine);
> > > +     void            (*irq_handler)(struct intel_engine_cs *engine, u32 iir);
> > >   
> > >       void            (*sanitize)(struct intel_engine_cs *engine);
> > >       int             (*resume)(struct intel_engine_cs *engine);
> > > @@ -481,10 +482,9 @@ struct intel_engine_cs {
> > >   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> > >   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
> > >   #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
> > > -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
> > > -#define I915_ENGINE_IS_VIRTUAL       BIT(6)
> > > -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
> > > -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
> > > +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
> > > +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
> > > +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
> > >       unsigned int flags;
> > >   
> > >       /*
> > > @@ -588,12 +588,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
> > >       return engine->flags & I915_ENGINE_HAS_TIMESLICES;
> > >   }
> > >   
> > > -static inline bool
> > > -intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
> > > -{
> > > -     return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> > > -}
> > > -
> > >   static inline bool
> > >   intel_engine_is_virtual(const struct intel_engine_cs *engine)
> > >   {
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > index 4ddd2099a931..ed62e4b549d2 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > @@ -2394,6 +2394,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
> > >       rcu_read_unlock();
> > >   }
> > >   
> > > +static void execlists_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > > +{
> > > +     bool tasklet = false;
> > > +
> > > +     if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> > > +             u32 eir;
> > > +
> > > +             /* Upper 16b are the enabling mask, rsvd for internal errors */
> > > +             eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> > > +             ENGINE_TRACE(engine, "CS error: %x\n", eir);
> > > +
> > > +             /* Disable the error interrupt until after the reset */
> > > +             if (likely(eir)) {
> > > +                     ENGINE_WRITE(engine, RING_EMR, ~0u);
> > > +                     ENGINE_WRITE(engine, RING_EIR, eir);
> > > +                     WRITE_ONCE(engine->execlists.error_interrupt, eir);
> > > +                     tasklet = true;
> > > +             }
> > > +     }
> > > +
> > > +     if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> > > +             WRITE_ONCE(engine->execlists.yield,
> > > +                        ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> > > +             ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> > > +                          engine->execlists.yield);
> > > +             if (del_timer(&engine->execlists.timer))
> > > +                     tasklet = true;
> > > +     }
> > > +
> > > +     if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> > > +             tasklet = true;
> > > +
> > > +     if (iir & GT_RENDER_USER_INTERRUPT)
> > > +             intel_engine_signal_breadcrumbs(engine);
> > > +
> > > +     if (tasklet)
> > > +             tasklet_hi_schedule(&engine->execlists.tasklet);
> > > +}
> > > +
> > >   static void __execlists_kick(struct intel_engine_execlists *execlists)
> > >   {
> > >       /* Kick the tasklet for some interrupt coalescing and reset handling */
> > > @@ -3146,6 +3185,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
> > >                * until a more refined solution exists.
> > >                */
> > >       }
> > > +     engine->irq_handler = execlists_irq_handler;
> > >   
> > >       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> > >       if (!intel_vgpu_active(engine->i915)) {
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > > index 9fc6c912a4e5..f5aa31ae8f6c 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > > @@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
> > >               intel_guc_to_host_event_handler(guc);
> > >   }
> > >   
> > > -static void
> > > -cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > > -{
> > > -     bool tasklet = false;
> > > -
> > > -     if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> > > -             u32 eir;
> > > -
> > > -             /* Upper 16b are the enabling mask, rsvd for internal errors */
> > > -             eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> > > -             ENGINE_TRACE(engine, "CS error: %x\n", eir);
> > > -
> > > -             /* Disable the error interrupt until after the reset */
> > > -             if (likely(eir)) {
> > > -                     ENGINE_WRITE(engine, RING_EMR, ~0u);
> > > -                     ENGINE_WRITE(engine, RING_EIR, eir);
> > > -                     WRITE_ONCE(engine->execlists.error_interrupt, eir);
> > > -                     tasklet = true;
> > > -             }
> > > -     }
> > > -
> > > -     if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> > > -             WRITE_ONCE(engine->execlists.yield,
> > > -                        ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> > > -             ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> > > -                          engine->execlists.yield);
> > > -             if (del_timer(&engine->execlists.timer))
> > > -                     tasklet = true;
> > > -     }
> > > -
> > > -     if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> > > -             tasklet = true;
> > > -
> > > -     if (iir & GT_RENDER_USER_INTERRUPT) {
> > > -             intel_engine_signal_breadcrumbs(engine);
> > > -             tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
> > > -     }
> > > -
> > > -     if (tasklet)
> > > -             tasklet_hi_schedule(&engine->execlists.tasklet);
> > > -}
> > > -
> > >   static u32
> > >   gen11_gt_engine_identity(struct intel_gt *gt,
> > >                        const unsigned int bank, const unsigned int bit)
> > > @@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
> > >               engine = NULL;
> > >   
> > >       if (likely(engine))
> > > -             return cs_irq_handler(engine, iir);
> > > +             return intel_engine_cs_irq(engine, iir);
> > >   
> > >       WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
> > >                 class, instance);
> > > @@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
> > >   void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> > >   {
> > >       if (gt_iir & GT_RENDER_USER_INTERRUPT)
> > > -             intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> > > +             intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> > > +                                 gt_iir);
> > > +
> > >       if (gt_iir & ILK_BSD_USER_INTERRUPT)
> > > -             intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> > > +             intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > > +                                 gt_iir);
> > >   }
> > >   
> > >   static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
> > > @@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
> > >   void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> > >   {
> > >       if (gt_iir & GT_RENDER_USER_INTERRUPT)
> > > -             intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> > > +             intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> > > +                                 gt_iir);
> > > +
> > >       if (gt_iir & GT_BSD_USER_INTERRUPT)
> > > -             intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> > > +             intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > > +                                 gt_iir);
> > > +
> > >       if (gt_iir & GT_BLT_USER_INTERRUPT)
> > > -             intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
> > > +             intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> > > +                                 gt_iir);
> > >   
> > >       if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
> > >                     GT_BSD_CS_ERROR_INTERRUPT |
> > > @@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
> > >       if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
> > >               iir = raw_reg_read(regs, GEN8_GT_IIR(0));
> > >               if (likely(iir)) {
> > > -                     cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
> > > -                                    iir >> GEN8_RCS_IRQ_SHIFT);
> > > -                     cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
> > > -                                    iir >> GEN8_BCS_IRQ_SHIFT);
> > > +                     intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> > > +                                         iir >> GEN8_RCS_IRQ_SHIFT);
> > > +                     intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> > > +                                         iir >> GEN8_BCS_IRQ_SHIFT);
> > >                       raw_reg_write(regs, GEN8_GT_IIR(0), iir);
> > >               }
> > >       }
> > > @@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
> > >       if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
> > >               iir = raw_reg_read(regs, GEN8_GT_IIR(1));
> > >               if (likely(iir)) {
> > > -                     cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > > -                                    iir >> GEN8_VCS0_IRQ_SHIFT);
> > > -                     cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
> > > -                                    iir >> GEN8_VCS1_IRQ_SHIFT);
> > > +                     intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> > > +                                         iir >> GEN8_VCS0_IRQ_SHIFT);
> > > +                     intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
> > > +                                         iir >> GEN8_VCS1_IRQ_SHIFT);
> > >                       raw_reg_write(regs, GEN8_GT_IIR(1), iir);
> > >               }
> > >       }
> > > @@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
> > >       if (master_ctl & GEN8_GT_VECS_IRQ) {
> > >               iir = raw_reg_read(regs, GEN8_GT_IIR(3));
> > >               if (likely(iir)) {
> > > -                     cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> > > -                                    iir >> GEN8_VECS_IRQ_SHIFT);
> > > +                     intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> > > +                                         iir >> GEN8_VECS_IRQ_SHIFT);
> > >                       raw_reg_write(regs, GEN8_GT_IIR(3), iir);
> > >               }
> > >       }
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> > > index f667e976fb2b..601473fe9df9 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> > > @@ -8,6 +8,8 @@
> > >   
> > >   #include <linux/types.h>
> > >   
> > > +#include "intel_engine_types.h"
> > > +
> > >   struct intel_gt;
> > >   
> > >   #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
> > > @@ -39,4 +41,9 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
> > >   void gen8_gt_irq_reset(struct intel_gt *gt);
> > >   void gen8_gt_irq_postinstall(struct intel_gt *gt);
> > >   
> > > +static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u32 iir)
> > > +{
> > > +     engine->irq_handler(engine, iir);
> > > +}
> > > +
> > >   #endif /* INTEL_GT_IRQ_H */
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> > > index 3cb2ce503544..9b5bfbe79347 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> > > @@ -997,10 +997,17 @@ static void ring_release(struct intel_engine_cs *engine)
> > >       intel_timeline_put(engine->legacy.timeline);
> > >   }
> > >   
> > > +static void irq_handler(struct intel_engine_cs *engine, u32 iir)
> > > +{
> > > +     intel_engine_signal_breadcrumbs(engine);
> > > +}
> > > +
> > >   static void setup_irq(struct intel_engine_cs *engine)
> > >   {
> > >       struct drm_i915_private *i915 = engine->i915;
> > >   
> > > +     engine->irq_handler = irq_handler;
> > > +
> > >       if (INTEL_GEN(i915) >= 6) {
> > >               engine->irq_enable = gen6_irq_enable;
> > >               engine->irq_disable = gen6_irq_disable;
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
> > > index 405d814e9040..4ba6a33f65cf 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> > > @@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
> > >               return;
> > >   
> > >       if (pm_iir & PM_VEBOX_USER_INTERRUPT)
> > > -             intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
> > > +             intel_engine_cs_irq(gt->engine[VECS0], pm_iir);
> > >   
> > >       if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
> > >               DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > index 17b551a0c89f..96a38466299e 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > @@ -264,6 +264,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
> > >       spin_unlock_irqrestore(&engine->active.lock, flags);
> > >   }
> > >   
> > > +static void cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> > > +{
> > > +     if (iir & GT_RENDER_USER_INTERRUPT) {
> > > +             intel_engine_signal_breadcrumbs(engine);
> > > +             tasklet_hi_schedule(&engine->execlists.tasklet);
> > > +     }
> > > +}
> > > +
> > >   static void guc_reset_prepare(struct intel_engine_cs *engine)
> > >   {
> > >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > > @@ -645,7 +653,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
> > >       }
> > >       engine->set_default_submission = guc_set_default_submission;
> > >   
> > > -     engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> > >       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> > >   
> > >       /*
> > > @@ -681,6 +688,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
> > >   static inline void guc_default_irqs(struct intel_engine_cs *engine)
> > >   {
> > >       engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
> > > +     engine->irq_handler = cs_irq_handler;
> > >   }
> > >   
> > >   int intel_guc_submission_setup(struct intel_engine_cs *engine)
> > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > > index 9d47da8ec86d..37a48402adc1 100644
> > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > @@ -3954,7 +3954,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
> > >               intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
> > >   
> > >               if (iir & I915_USER_INTERRUPT)
> > > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> > > +                     intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
> > >   
> > >               if (iir & I915_MASTER_ERROR_INTERRUPT)
> > >                       i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
> > > @@ -4062,7 +4062,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
> > >               intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
> > >   
> > >               if (iir & I915_USER_INTERRUPT)
> > > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> > > +                     intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
> > >   
> > >               if (iir & I915_MASTER_ERROR_INTERRUPT)
> > >                       i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> > > @@ -4207,10 +4207,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
> > >               intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
> > >   
> > >               if (iir & I915_USER_INTERRUPT)
> > > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> > > +                     intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
> > >   
> > >               if (iir & I915_BSD_USER_INTERRUPT)
> > > -                     intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
> > > +                     intel_engine_cs_irq(dev_priv->gt.engine[VCS0], iir);
> > >   
> > >               if (iir & I915_MASTER_ERROR_INTERRUPT)
> > >                       i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> > > 
> > 
> > Looks believable as design cleanup.
> > 
> > Wonder if some barrier is neded after overwriting the nop handler to be 
> > sure it propagates to all CPUs by the time interrupts get enabled. Maybe 
> > we have enough sync points between the two events. Or add a paranoid 
> > setter with some barrier - what would be required to flush the write to 
> > all cores? smp_store_mb?
> 
> Hmm. Indeed, that seems justified. So justified I expect it's already
> taken care of for us on installing the irq handler. Let's have a look.

Ok. There are plenty of mutex/spin/mmio to install an interrupt.

i915_driver_mmio_probe -> intel_gt_init_mmio -> intel_engine_init_mmio -> nop_irq_handler

intel_irq_install

i915_gem_init -> intel_gt_init -> intel_engines_init -> assign real engine->irq_handler

So all that is irrelevant. We change the engine->irq_handler while the
interrupts are live. And we may get an interrupt as soon as we unmask
the engines. So err on smp_store_mb(engine->irq_handler,
real_irq_handler) does not seem to be wholly paranoid.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH v2] drm/i915/gt: Move CS interrupt handler to the backend
  2021-02-02 15:14 ` [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
  2021-02-02 15:49   ` Tvrtko Ursulin
@ 2021-02-02 16:15   ` Chris Wilson
  2021-02-02 16:33     ` Tvrtko Ursulin
  1 sibling, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 16:15 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

The different submission backends each have their own preferred
behaviour and interrupt setup. Let each handle their own interrupts.

This becomes more useful later as we to extract the use of auxiliary
state in the interrupt handler that is backend specific.

v2: An overabundance of caution is always justified; put a barrier on
updating the irq handler so that we know that the next interrupt will
be redirected towards ourselves.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  7 ++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 14 +---
 .../drm/i915/gt/intel_execlists_submission.c  | 40 +++++++++
 drivers/gpu/drm/i915/gt/intel_gt_irq.c        | 82 ++++++-------------
 drivers/gpu/drm/i915/gt/intel_gt_irq.h        | 22 +++++
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++
 drivers/gpu/drm/i915/gt/intel_rps.c           |  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 ++-
 drivers/gpu/drm/i915/i915_irq.c               |  8 +-
 9 files changed, 118 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c7d17f8767a1..e06ae4ae1710 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 }
 
+static void nop_irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	GEM_DEBUG_WARN_ON(iir);
+}
+
 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
@@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->hw_id = info->hw_id;
 	engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
 
+	engine->irq_handler = nop_irq_handler;
+
 	engine->class = info->class;
 	engine->instance = info->instance;
 	__sprint_engine_name(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9d59de5c559a..7fd035d45263 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -402,6 +402,7 @@ struct intel_engine_cs {
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
 	void		(*irq_enable)(struct intel_engine_cs *engine);
 	void		(*irq_disable)(struct intel_engine_cs *engine);
+	void		(*irq_handler)(struct intel_engine_cs *engine, u32 iir);
 
 	void		(*sanitize)(struct intel_engine_cs *engine);
 	int		(*resume)(struct intel_engine_cs *engine);
@@ -481,10 +482,9 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
-#define I915_ENGINE_IS_VIRTUAL       BIT(6)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
 	unsigned int flags;
 
 	/*
@@ -588,12 +588,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_TIMESLICES;
 }
 
-static inline bool
-intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-}
-
 static inline bool
 intel_engine_is_virtual(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 4ddd2099a931..05846f97f1af 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2394,6 +2394,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
 	rcu_read_unlock();
 }
 
+static void execlists_irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	bool tasklet = false;
+
+	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
+		u32 eir;
+
+		/* Upper 16b are the enabling mask, rsvd for internal errors */
+		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
+		ENGINE_TRACE(engine, "CS error: %x\n", eir);
+
+		/* Disable the error interrupt until after the reset */
+		if (likely(eir)) {
+			ENGINE_WRITE(engine, RING_EMR, ~0u);
+			ENGINE_WRITE(engine, RING_EIR, eir);
+			WRITE_ONCE(engine->execlists.error_interrupt, eir);
+			tasklet = true;
+		}
+	}
+
+	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+		WRITE_ONCE(engine->execlists.yield,
+			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+			     engine->execlists.yield);
+		if (del_timer(&engine->execlists.timer))
+			tasklet = true;
+	}
+
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
+		tasklet = true;
+
+	if (iir & GT_RENDER_USER_INTERRUPT)
+		intel_engine_signal_breadcrumbs(engine);
+
+	if (tasklet)
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 static void __execlists_kick(struct intel_engine_execlists *execlists)
 {
 	/* Kick the tasklet for some interrupt coalescing and reset handling */
@@ -3146,6 +3185,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		 * until a more refined solution exists.
 		 */
 	}
+	intel_engine_set_irq_handler(engine, execlists_irq_handler);
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 9fc6c912a4e5..f5aa31ae8f6c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
 		intel_guc_to_host_event_handler(guc);
 }
 
-static void
-cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
-{
-	bool tasklet = false;
-
-	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
-		u32 eir;
-
-		/* Upper 16b are the enabling mask, rsvd for internal errors */
-		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
-		ENGINE_TRACE(engine, "CS error: %x\n", eir);
-
-		/* Disable the error interrupt until after the reset */
-		if (likely(eir)) {
-			ENGINE_WRITE(engine, RING_EMR, ~0u);
-			ENGINE_WRITE(engine, RING_EIR, eir);
-			WRITE_ONCE(engine->execlists.error_interrupt, eir);
-			tasklet = true;
-		}
-	}
-
-	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
-		WRITE_ONCE(engine->execlists.yield,
-			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
-		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
-			     engine->execlists.yield);
-		if (del_timer(&engine->execlists.timer))
-			tasklet = true;
-	}
-
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
-		tasklet = true;
-
-	if (iir & GT_RENDER_USER_INTERRUPT) {
-		intel_engine_signal_breadcrumbs(engine);
-		tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
-	}
-
-	if (tasklet)
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-}
-
 static u32
 gen11_gt_engine_identity(struct intel_gt *gt,
 			 const unsigned int bank, const unsigned int bit)
@@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
 		engine = NULL;
 
 	if (likely(engine))
-		return cs_irq_handler(engine, iir);
+		return intel_engine_cs_irq(engine, iir);
 
 	WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
 		  class, instance);
@@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+				    gt_iir);
 }
 
 static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
@@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
 void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+				    gt_iir);
+
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
+		intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+				    gt_iir);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
-				       iir >> GEN8_RCS_IRQ_SHIFT);
-			cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
-				       iir >> GEN8_BCS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
+					    iir >> GEN8_RCS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
+					    iir >> GEN8_BCS_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
 		}
 	}
@@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
-				       iir >> GEN8_VCS0_IRQ_SHIFT);
-			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
-				       iir >> GEN8_VCS1_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
+					    iir >> GEN8_VCS0_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
+					    iir >> GEN8_VCS1_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
 		}
 	}
@@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
 		if (likely(iir)) {
-			cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
-				       iir >> GEN8_VECS_IRQ_SHIFT);
+			intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+					    iir >> GEN8_VECS_IRQ_SHIFT);
 			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
index f667e976fb2b..894efd471e93 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#include "intel_engine_types.h"
+
 struct intel_gt;
 
 #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
@@ -39,4 +41,24 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
 void gen8_gt_irq_reset(struct intel_gt *gt);
 void gen8_gt_irq_postinstall(struct intel_gt *gt);
 
+static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u32 iir)
+{
+	engine->irq_handler(engine, iir);
+}
+
+static inline void
+intel_engine_set_irq_handler(struct intel_engine_cs *engine,
+			     void (*fn)(struct intel_engine_cs *engine,
+					u32 iir))
+{
+	/*
+	 * As the interrupt is live as allocate and setup the engines,
+	 * err on the side of caution and apply barriers to updating
+	 * the irq handler callback, so that we assured that before
+	 * we do use the engine, we will receive interrupts to ourselves.
+	 */
+	smp_store_mb(engine->irq_handler, fn);
+}
+
+
 #endif /* INTEL_GT_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 3cb2ce503544..3673f61c0813 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -997,10 +997,17 @@ static void ring_release(struct intel_engine_cs *engine)
 	intel_timeline_put(engine->legacy.timeline);
 }
 
+static void irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	intel_engine_signal_breadcrumbs(engine);
+}
+
 static void setup_irq(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
+	intel_engine_set_irq_handler(engine, irq_handler);
+
 	if (INTEL_GEN(i915) >= 6) {
 		engine->irq_enable = gen6_irq_enable;
 		engine->irq_disable = gen6_irq_disable;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 405d814e9040..4ba6a33f65cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 		return;
 
 	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
+		intel_engine_cs_irq(gt->engine[VECS0], pm_iir);
 
 	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 17b551a0c89f..838e12b42009 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -264,6 +264,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+	if (iir & GT_RENDER_USER_INTERRUPT) {
+		intel_engine_signal_breadcrumbs(engine);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+}
+
 static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -645,7 +653,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 	}
 	engine->set_default_submission = guc_set_default_submission;
 
-	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	/*
@@ -681,6 +688,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
 static inline void guc_default_irqs(struct intel_engine_cs *engine)
 {
 	engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
+	intel_engine_set_irq_handler(engine, cs_irq_handler);
 }
 
 int intel_guc_submission_setup(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9d47da8ec86d..37a48402adc1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3954,7 +3954,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4062,7 +4062,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4207,10 +4207,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
+			intel_engine_cs_irq(dev_priv->gt.engine[VCS0], iir);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915/gt: Move CS interrupt handler to the backend
  2021-02-02 16:15   ` [Intel-gfx] [PATCH v2] " Chris Wilson
@ 2021-02-02 16:33     ` Tvrtko Ursulin
  0 siblings, 0 replies; 28+ messages in thread
From: Tvrtko Ursulin @ 2021-02-02 16:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/02/2021 16:15, Chris Wilson wrote:
> The different submission backends each have their own preferred
> behaviour and interrupt setup. Let each handle their own interrupts.
> 
> This becomes more useful later as we to extract the use of auxiliary
> state in the interrupt handler that is backend specific.
> 
> v2: An overabundance of caution is always justified; put a barrier on
> updating the irq handler so that we know that the next interrupt will
> be redirected towards ourselves.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  7 ++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  | 14 +---
>   .../drm/i915/gt/intel_execlists_submission.c  | 40 +++++++++
>   drivers/gpu/drm/i915/gt/intel_gt_irq.c        | 82 ++++++-------------
>   drivers/gpu/drm/i915/gt/intel_gt_irq.h        | 22 +++++
>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++
>   drivers/gpu/drm/i915/gt/intel_rps.c           |  2 +-
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 ++-
>   drivers/gpu/drm/i915/i915_irq.c               |  8 +-
>   9 files changed, 118 insertions(+), 74 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index c7d17f8767a1..e06ae4ae1710 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -255,6 +255,11 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
>   	intel_engine_set_hwsp_writemask(engine, ~0u);
>   }
>   
> +static void nop_irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	GEM_DEBUG_WARN_ON(iir);
> +}
> +
>   static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   {
>   	const struct engine_info *info = &intel_engines[id];
> @@ -292,6 +297,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   	engine->hw_id = info->hw_id;
>   	engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
>   
> +	engine->irq_handler = nop_irq_handler;
> +
>   	engine->class = info->class;
>   	engine->instance = info->instance;
>   	__sprint_engine_name(engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 9d59de5c559a..7fd035d45263 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -402,6 +402,7 @@ struct intel_engine_cs {
>   	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
>   	void		(*irq_enable)(struct intel_engine_cs *engine);
>   	void		(*irq_disable)(struct intel_engine_cs *engine);
> +	void		(*irq_handler)(struct intel_engine_cs *engine, u32 iir);
>   
>   	void		(*sanitize)(struct intel_engine_cs *engine);
>   	int		(*resume)(struct intel_engine_cs *engine);
> @@ -481,10 +482,9 @@ struct intel_engine_cs {
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
>   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
>   #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
> -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
> -#define I915_ENGINE_IS_VIRTUAL       BIT(6)
> -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
> -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
> +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
> +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
> +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
>   	unsigned int flags;
>   
>   	/*
> @@ -588,12 +588,6 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_HAS_TIMESLICES;
>   }
>   
> -static inline bool
> -intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
> -{
> -	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> -}
> -
>   static inline bool
>   intel_engine_is_virtual(const struct intel_engine_cs *engine)
>   {
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 4ddd2099a931..05846f97f1af 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -2394,6 +2394,45 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
>   	rcu_read_unlock();
>   }
>   
> +static void execlists_irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	bool tasklet = false;
> +
> +	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> +		u32 eir;
> +
> +		/* Upper 16b are the enabling mask, rsvd for internal errors */
> +		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> +		ENGINE_TRACE(engine, "CS error: %x\n", eir);
> +
> +		/* Disable the error interrupt until after the reset */
> +		if (likely(eir)) {
> +			ENGINE_WRITE(engine, RING_EMR, ~0u);
> +			ENGINE_WRITE(engine, RING_EIR, eir);
> +			WRITE_ONCE(engine->execlists.error_interrupt, eir);
> +			tasklet = true;
> +		}
> +	}
> +
> +	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> +		WRITE_ONCE(engine->execlists.yield,
> +			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> +		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> +			     engine->execlists.yield);
> +		if (del_timer(&engine->execlists.timer))
> +			tasklet = true;
> +	}
> +
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> +		tasklet = true;
> +
> +	if (iir & GT_RENDER_USER_INTERRUPT)
> +		intel_engine_signal_breadcrumbs(engine);
> +
> +	if (tasklet)
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +}
> +
>   static void __execlists_kick(struct intel_engine_execlists *execlists)
>   {
>   	/* Kick the tasklet for some interrupt coalescing and reset handling */
> @@ -3146,6 +3185,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   		 * until a more refined solution exists.
>   		 */
>   	}
> +	intel_engine_set_irq_handler(engine, execlists_irq_handler);
>   
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>   	if (!intel_vgpu_active(engine->i915)) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index 9fc6c912a4e5..f5aa31ae8f6c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -20,48 +20,6 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir)
>   		intel_guc_to_host_event_handler(guc);
>   }
>   
> -static void
> -cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> -{
> -	bool tasklet = false;
> -
> -	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
> -		u32 eir;
> -
> -		/* Upper 16b are the enabling mask, rsvd for internal errors */
> -		eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
> -		ENGINE_TRACE(engine, "CS error: %x\n", eir);
> -
> -		/* Disable the error interrupt until after the reset */
> -		if (likely(eir)) {
> -			ENGINE_WRITE(engine, RING_EMR, ~0u);
> -			ENGINE_WRITE(engine, RING_EIR, eir);
> -			WRITE_ONCE(engine->execlists.error_interrupt, eir);
> -			tasklet = true;
> -		}
> -	}
> -
> -	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> -		WRITE_ONCE(engine->execlists.yield,
> -			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> -		ENGINE_TRACE(engine, "semaphore yield: %08x\n",
> -			     engine->execlists.yield);
> -		if (del_timer(&engine->execlists.timer))
> -			tasklet = true;
> -	}
> -
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> -		tasklet = true;
> -
> -	if (iir & GT_RENDER_USER_INTERRUPT) {
> -		intel_engine_signal_breadcrumbs(engine);
> -		tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
> -	}
> -
> -	if (tasklet)
> -		tasklet_hi_schedule(&engine->execlists.tasklet);
> -}
> -
>   static u32
>   gen11_gt_engine_identity(struct intel_gt *gt,
>   			 const unsigned int bank, const unsigned int bit)
> @@ -122,7 +80,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
>   		engine = NULL;
>   
>   	if (likely(engine))
> -		return cs_irq_handler(engine, iir);
> +		return intel_engine_cs_irq(engine, iir);
>   
>   	WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
>   		  class, instance);
> @@ -275,9 +233,12 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>   void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
>   {
>   	if (gt_iir & GT_RENDER_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> +				    gt_iir);
> +
>   	if (gt_iir & ILK_BSD_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> +				    gt_iir);
>   }
>   
>   static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
> @@ -301,11 +262,16 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
>   void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
>   {
>   	if (gt_iir & GT_RENDER_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> +				    gt_iir);
> +
>   	if (gt_iir & GT_BSD_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> +				    gt_iir);
> +
>   	if (gt_iir & GT_BLT_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
> +		intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> +				    gt_iir);
>   
>   	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
>   		      GT_BSD_CS_ERROR_INTERRUPT |
> @@ -324,10 +290,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
>   	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
>   		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
>   		if (likely(iir)) {
> -			cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
> -				       iir >> GEN8_RCS_IRQ_SHIFT);
> -			cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
> -				       iir >> GEN8_BCS_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[RENDER_CLASS][0],
> +					    iir >> GEN8_RCS_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0],
> +					    iir >> GEN8_BCS_IRQ_SHIFT);
>   			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
>   		}
>   	}
> @@ -335,10 +301,10 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
>   	if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
>   		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
>   		if (likely(iir)) {
> -			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
> -				       iir >> GEN8_VCS0_IRQ_SHIFT);
> -			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
> -				       iir >> GEN8_VCS1_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0],
> +					    iir >> GEN8_VCS0_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][1],
> +					    iir >> GEN8_VCS1_IRQ_SHIFT);
>   			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
>   		}
>   	}
> @@ -346,8 +312,8 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
>   	if (master_ctl & GEN8_GT_VECS_IRQ) {
>   		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
>   		if (likely(iir)) {
> -			cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> -				       iir >> GEN8_VECS_IRQ_SHIFT);
> +			intel_engine_cs_irq(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
> +					    iir >> GEN8_VECS_IRQ_SHIFT);
>   			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
>   		}
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> index f667e976fb2b..894efd471e93 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
> @@ -8,6 +8,8 @@
>   
>   #include <linux/types.h>
>   
> +#include "intel_engine_types.h"
> +
>   struct intel_gt;
>   
>   #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
> @@ -39,4 +41,24 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
>   void gen8_gt_irq_reset(struct intel_gt *gt);
>   void gen8_gt_irq_postinstall(struct intel_gt *gt);
>   
> +static inline void intel_engine_cs_irq(struct intel_engine_cs *engine, u32 iir)
> +{
> +	engine->irq_handler(engine, iir);
> +}
> +
> +static inline void
> +intel_engine_set_irq_handler(struct intel_engine_cs *engine,
> +			     void (*fn)(struct intel_engine_cs *engine,
> +					u32 iir))
> +{
> +	/*
> +	 * As the interrupt is live as allocate and setup the engines,
> +	 * err on the side of caution and apply barriers to updating
> +	 * the irq handler callback, so that we assured that before
> +	 * we do use the engine, we will receive interrupts to ourselves.
> +	 */
> +	smp_store_mb(engine->irq_handler, fn);
> +}
> +
> +
>   #endif /* INTEL_GT_IRQ_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 3cb2ce503544..3673f61c0813 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -997,10 +997,17 @@ static void ring_release(struct intel_engine_cs *engine)
>   	intel_timeline_put(engine->legacy.timeline);
>   }
>   
> +static void irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	intel_engine_signal_breadcrumbs(engine);
> +}
> +
>   static void setup_irq(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *i915 = engine->i915;
>   
> +	intel_engine_set_irq_handler(engine, irq_handler);
> +
>   	if (INTEL_GEN(i915) >= 6) {
>   		engine->irq_enable = gen6_irq_enable;
>   		engine->irq_disable = gen6_irq_disable;
> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
> index 405d814e9040..4ba6a33f65cf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> @@ -1774,7 +1774,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
>   		return;
>   
>   	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
> -		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
> +		intel_engine_cs_irq(gt->engine[VECS0], pm_iir);
>   
>   	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
>   		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 17b551a0c89f..838e12b42009 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -264,6 +264,14 @@ static void guc_submission_tasklet(struct tasklet_struct *t)
>   	spin_unlock_irqrestore(&engine->active.lock, flags);
>   }
>   
> +static void cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> +{
> +	if (iir & GT_RENDER_USER_INTERRUPT) {
> +		intel_engine_signal_breadcrumbs(engine);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
> +}
> +
>   static void guc_reset_prepare(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -645,7 +653,6 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
>   	}
>   	engine->set_default_submission = guc_set_default_submission;
>   
> -	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
>   	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   
>   	/*
> @@ -681,6 +688,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
>   static inline void guc_default_irqs(struct intel_engine_cs *engine)
>   {
>   	engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
> +	intel_engine_set_irq_handler(engine, cs_irq_handler);
>   }
>   
>   int intel_guc_submission_setup(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 9d47da8ec86d..37a48402adc1 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3954,7 +3954,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
>   		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
>   
>   		if (iir & I915_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
>   
>   		if (iir & I915_MASTER_ERROR_INTERRUPT)
>   			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
> @@ -4062,7 +4062,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
>   		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
>   
>   		if (iir & I915_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
>   
>   		if (iir & I915_MASTER_ERROR_INTERRUPT)
>   			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> @@ -4207,10 +4207,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
>   		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
>   
>   		if (iir & I915_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
>   
>   		if (iir & I915_BSD_USER_INTERRUPT)
> -			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
> +			intel_engine_cs_irq(dev_priv->gt.engine[VCS0], iir);
>   
>   		if (iir & I915_MASTER_ERROR_INTERRUPT)
>   			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 07/14] drm/i915/selftests: Exercise priority inheritance around an engine loop
  2021-02-02 15:14 ` [Intel-gfx] [CI 07/14] drm/i915/selftests: Exercise priority inheritance around an engine loop Chris Wilson
@ 2021-02-02 16:44   ` Tvrtko Ursulin
  2021-02-02 17:22     ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2021-02-02 16:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/02/2021 15:14, Chris Wilson wrote:
> Exercise rescheduling priority inheritance around a sequence of requests
> that wrap around all the engines.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../gpu/drm/i915/selftests/i915_scheduler.c   | 225 ++++++++++++++++++
>   1 file changed, 225 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> index d095fab2ccec..acc666f755d7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> @@ -7,6 +7,7 @@
>   
>   #include "gt/intel_context.h"
>   #include "gt/intel_gpu_commands.h"
> +#include "gt/intel_ring.h"
>   #include "gt/selftest_engine_heartbeat.h"
>   #include "selftests/igt_spinner.h"
>   #include "selftests/i915_random.h"
> @@ -504,10 +505,234 @@ static int igt_priority_chains(void *arg)
>   	return igt_schedule_chains(arg, igt_priority);
>   }
>   
> +static struct i915_request *
> +__write_timestamp(struct intel_engine_cs *engine,
> +		  struct drm_i915_gem_object *obj,
> +		  int slot,
> +		  struct i915_request *prev)
> +{
> +	struct i915_request *rq = ERR_PTR(-EINVAL);
> +	bool use_64b = INTEL_GEN(engine->i915) >= 8;
> +	struct intel_context *ce;
> +	struct i915_vma *vma;
> +	int err = 0;
> +	u32 *cs;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return ERR_CAST(ce);
> +
> +	vma = i915_vma_instance(obj, ce->vm, NULL);
> +	if (IS_ERR(vma)) {
> +		err = PTR_ERR(vma);
> +		goto out_ce;
> +	}
> +
> +	err = i915_vma_pin(vma, 0, 0, PIN_USER);
> +	if (err)
> +		goto out_ce;
> +
> +	rq = intel_context_create_request(ce);
> +	if (IS_ERR(rq)) {
> +		err = PTR_ERR(rq);
> +		goto out_unpin;
> +	}
> +
> +	i915_vma_lock(vma);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	i915_vma_unlock(vma);
> +	if (err)
> +		goto out_request;
> +
> +	if (prev) {
> +		err = i915_request_await_dma_fence(rq, &prev->fence);
> +		if (err)
> +			goto out_request;
> +	}
> +
> +	if (engine->emit_init_breadcrumb) {
> +		err = engine->emit_init_breadcrumb(rq);
> +		if (err)
> +			goto out_request;
> +	}
> +
> +	cs = intel_ring_begin(rq, 4);
> +	if (IS_ERR(cs)) {
> +		err = PTR_ERR(cs);
> +		goto out_request;
> +	}
> +
> +	*cs++ = MI_STORE_REGISTER_MEM + use_64b;
> +	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(engine->mmio_base));
> +	*cs++ = lower_32_bits(vma->node.start) + sizeof(u32) * slot;
> +	*cs++ = upper_32_bits(vma->node.start);
> +	intel_ring_advance(rq, cs);
> +
> +	i915_request_get(rq);
> +out_request:
> +	i915_request_add(rq);
> +out_unpin:
> +	i915_vma_unpin(vma);
> +out_ce:
> +	intel_context_put(ce);
> +	i915_request_put(prev);
> +	return err ? ERR_PTR(err) : rq;
> +}
> +
> +static struct i915_request *create_spinner(struct drm_i915_private *i915,
> +					   struct igt_spinner *spin)
> +{
> +	struct intel_engine_cs *engine;
> +
> +	for_each_uabi_engine(engine, i915) {
> +		struct intel_context *ce;
> +		struct i915_request *rq;
> +
> +		if (igt_spinner_init(spin, engine->gt))
> +			return ERR_PTR(-ENOMEM);
> +
> +		ce = intel_context_create(engine);
> +		if (IS_ERR(ce))
> +			return ERR_CAST(ce);
> +
> +		rq = igt_spinner_create_request(spin, ce, MI_NOOP);
> +		intel_context_put(ce);
> +		if (rq == ERR_PTR(-ENODEV))
> +			continue;
> +		if (IS_ERR(rq))
> +			return rq;
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +		return rq;
> +	}
> +
> +	return ERR_PTR(-ENODEV);
> +}
> +
> +static bool has_timestamp(const struct drm_i915_private *i915)
> +{
> +	return INTEL_GEN(i915) >= 7;
> +}
> +
> +static int __igt_schedule_cycle(struct drm_i915_private *i915,
> +				bool (*fn)(struct i915_request *rq,
> +					   unsigned long v, unsigned long e))
> +{
> +	struct intel_engine_cs *engine;
> +	struct drm_i915_gem_object *obj;
> +	struct igt_spinner spin;
> +	struct i915_request *rq;
> +	unsigned long count, n;
> +	u32 *time, last;
> +	int err;
> +
> +	/*
> +	 * Queue a bunch of ordered requests (each waiting on the previous)
> +	 * around the engines a couple of times. Each request will write
> +	 * the timestamp it executes at into the scratch, with the expectation
> +	 * that the timestamp will be in our desired execution order.
> +	 */
> +
> +	if (!i915->caps.scheduler || !has_timestamp(i915))
> +		return 0;
> +
> +	obj = i915_gem_object_create_internal(i915, SZ_64K);
> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
> +
> +	time = i915_gem_object_pin_map(obj, I915_MAP_WC);
> +	if (IS_ERR(time)) {
> +		err = PTR_ERR(time);
> +		goto out_obj;
> +	}
> +
> +	rq = create_spinner(i915, &spin);
> +	if (IS_ERR(rq)) {
> +		err = PTR_ERR(rq);
> +		goto out_obj;
> +	}
> +
> +	err = 0;
> +	count = 0;
> +	for_each_uabi_engine(engine, i915) {
> +		if (!intel_engine_has_scheduler(engine))
> +			continue;
> +
> +		rq = __write_timestamp(engine, obj, count, rq);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		count++;
> +	}

^^^^ vvvv - two of the same by copy&paste error or couldn't be bothered 
with outer loop?

> +	for_each_uabi_engine(engine, i915) {
> +		if (!intel_engine_has_scheduler(engine))
> +			continue;
> +
> +		rq = __write_timestamp(engine, obj, count, rq);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		count++;
> +	}
> +	GEM_BUG_ON(count * sizeof(u32) > obj->base.size);
> +	if (err || !count)
> +		goto out_spin;
> +
> +	fn(rq, count + 1, count);
> +	igt_spinner_end(&spin);
> +
> +	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
> +		err = -ETIME;
> +		goto out_request;
> +	}
> +
> +	last = time[0];
> +	for (n = 1; n < count; n++) {
> +		if (i915_seqno_passed(last, time[n])) {
> +			pr_err("Timestamp[%lu] %x before previous %x\n",
> +			       n, time[n], last);
> +			err = -EINVAL;
> +			break;
> +		}
> +		last = time[n];
> +	}
> +
> +out_request:
> +	i915_request_put(rq);
> +out_spin:
> +	igt_spinner_fini(&spin);
> +out_obj:
> +	i915_gem_object_put(obj);
> +	return err;
> +}
> +
> +static bool noop(struct i915_request *rq, unsigned long v, unsigned long e)
> +{
> +	return true;
> +}
> +
> +static int igt_schedule_cycle(void *arg)
> +{
> +	return __igt_schedule_cycle(arg, noop);
> +}
> +
> +static int igt_priority_cycle(void *arg)
> +{
> +	return __igt_schedule_cycle(arg, igt_priority);
> +}
> +
>   int i915_scheduler_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(igt_priority_chains),
> +
> +		SUBTEST(igt_schedule_cycle),
> +		SUBTEST(igt_priority_cycle),
>   	};
>   
>   	return i915_subtests(tests, i915);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 06/14] drm/i915/selftests: Measure set-priority duration
  2021-02-02 15:14 ` [Intel-gfx] [CI 06/14] drm/i915/selftests: Measure set-priority duration Chris Wilson
@ 2021-02-02 16:49   ` Tvrtko Ursulin
  0 siblings, 0 replies; 28+ messages in thread
From: Tvrtko Ursulin @ 2021-02-02 16:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/02/2021 15:14, Chris Wilson wrote:
> As a topological sort, we expect it to run in linear graph time,
> O(V+E). In removing the recursion, it is no longer a DFS but rather a
> BFS, and performs as O(VE). Let's demonstrate how bad this is with a few
> examples, and build a few test cases to verify a potential fix.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_scheduler.c         |   4 +
>   .../drm/i915/selftests/i915_live_selftests.h  |   1 +
>   .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
>   .../gpu/drm/i915/selftests/i915_scheduler.c   | 672 ++++++++++++++++++
>   4 files changed, 678 insertions(+)
>   create mode 100644 drivers/gpu/drm/i915/selftests/i915_scheduler.c
> 
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 035e4be5d573..27bda7617b29 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -609,6 +609,10 @@ void i915_request_show_with_schedule(struct drm_printer *m,
>   	rcu_read_unlock();
>   }
>   
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftests/i915_scheduler.c"
> +#endif
> +
>   static void i915_global_scheduler_shrink(void)
>   {
>   	kmem_cache_shrink(global.slab_dependencies);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index a92c0e9b7e6b..2200a5baa68e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests)
>   selftest(gt_pm, intel_gt_pm_live_selftests)
>   selftest(gt_heartbeat, intel_heartbeat_live_selftests)
>   selftest(requests, i915_request_live_selftests)
> +selftest(scheduler, i915_scheduler_live_selftests)
>   selftest(active, i915_active_live_selftests)
>   selftest(objects, i915_gem_object_live_selftests)
>   selftest(mman, i915_gem_mman_live_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> index c2389f8a257d..137e35283fee 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> @@ -17,5 +17,6 @@
>    */
>   selftest(engine_cs, intel_engine_cs_perf_selftests)
>   selftest(request, i915_request_perf_selftests)
> +selftest(scheduler, i915_scheduler_perf_selftests)
>   selftest(blt, i915_gem_object_blt_perf_selftests)
>   selftest(region, intel_memory_region_perf_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> new file mode 100644
> index 000000000000..d095fab2ccec
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
> @@ -0,0 +1,672 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#include "i915_selftest.h"
> +
> +#include "gt/intel_context.h"
> +#include "gt/intel_gpu_commands.h"
> +#include "gt/selftest_engine_heartbeat.h"
> +#include "selftests/igt_spinner.h"
> +#include "selftests/i915_random.h"
> +
> +static void scheduling_disable(struct intel_engine_cs *engine)
> +{
> +	engine->props.preempt_timeout_ms = 0;
> +	engine->props.timeslice_duration_ms = 0;
> +
> +	st_engine_heartbeat_disable(engine);
> +}
> +
> +static void scheduling_enable(struct intel_engine_cs *engine)
> +{
> +	st_engine_heartbeat_enable(engine);
> +
> +	engine->props.preempt_timeout_ms =
> +		engine->defaults.preempt_timeout_ms;
> +	engine->props.timeslice_duration_ms =
> +		engine->defaults.timeslice_duration_ms;
> +}
> +
> +static int first_engine(struct drm_i915_private *i915,
> +			int (*chain)(struct intel_engine_cs *engine,
> +				     unsigned long param,
> +				     bool (*fn)(struct i915_request *rq,
> +						unsigned long v,
> +						unsigned long e)),
> +			unsigned long param,
> +			bool (*fn)(struct i915_request *rq,
> +				   unsigned long v, unsigned long e))
> +{
> +	struct intel_engine_cs *engine;
> +
> +	for_each_uabi_engine(engine, i915) {
> +		if (!intel_engine_has_scheduler(engine))
> +			continue;
> +
> +		return chain(engine, param, fn);
> +	}
> +
> +	return 0;
> +}
> +
> +static int all_engines(struct drm_i915_private *i915,
> +		       int (*chain)(struct intel_engine_cs *engine,
> +				    unsigned long param,
> +				    bool (*fn)(struct i915_request *rq,
> +					       unsigned long v,
> +					       unsigned long e)),
> +		       unsigned long param,
> +		       bool (*fn)(struct i915_request *rq,
> +				  unsigned long v, unsigned long e))
> +{
> +	struct intel_engine_cs *engine;
> +	int err;
> +
> +	for_each_uabi_engine(engine, i915) {
> +		if (!intel_engine_has_scheduler(engine))
> +			continue;
> +
> +		err = chain(engine, param, fn);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static bool check_context_order(struct intel_engine_cs *engine)
> +{
> +	u64 last_seqno, last_context;
> +	unsigned long count;
> +	bool result = false;
> +	struct rb_node *rb;
> +	int last_prio;
> +
> +	/* We expect the execution order to follow ascending fence-context */
> +	spin_lock_irq(&engine->active.lock);
> +
> +	count = 0;
> +	last_context = 0;
> +	last_seqno = 0;
> +	last_prio = 0;
> +	for (rb = rb_first_cached(&engine->execlists.queue); rb; rb = rb_next(rb)) {
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		struct i915_request *rq;
> +
> +		priolist_for_each_request(rq, p) {
> +			if (rq->fence.context < last_context ||
> +			    (rq->fence.context == last_context &&
> +			     rq->fence.seqno < last_seqno)) {
> +				pr_err("[%lu] %llx:%lld [prio:%d] after %llx:%lld [prio:%d]\n",
> +				       count,
> +				       rq->fence.context,
> +				       rq->fence.seqno,
> +				       rq_prio(rq),
> +				       last_context,
> +				       last_seqno,
> +				       last_prio);
> +				goto out_unlock;
> +			}
> +
> +			last_context = rq->fence.context;
> +			last_seqno = rq->fence.seqno;
> +			last_prio = rq_prio(rq);
> +			count++;
> +		}
> +	}
> +	result = true;
> +out_unlock:
> +	spin_unlock_irq(&engine->active.lock);
> +
> +	return result;
> +}
> +
> +static int __single_chain(struct intel_engine_cs *engine, unsigned long length,
> +			  bool (*fn)(struct i915_request *rq,
> +				     unsigned long v, unsigned long e))
> +{
> +	struct intel_context *ce;
> +	struct igt_spinner spin;
> +	struct i915_request *rq;
> +	unsigned long count;
> +	unsigned long min;
> +	int err = 0;
> +
> +	if (!intel_engine_can_store_dword(engine))
> +		return 0;
> +
> +	scheduling_disable(engine);
> +
> +	if (igt_spinner_init(&spin, engine->gt)) {
> +		err = -ENOMEM;
> +		goto err_heartbeat;
> +	}
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce)) {
> +		err = PTR_ERR(ce);
> +		goto err_spin;
> +	}
> +	ce->ring = __intel_context_ring_size(SZ_512K);
> +
> +	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
> +	if (IS_ERR(rq)) {
> +		err = PTR_ERR(rq);
> +		goto err_context;
> +	}
> +	i915_request_add(rq);
> +	min = ce->ring->size - ce->ring->space;
> +
> +	count = 1;
> +	while (count < length && ce->ring->space > min) {
> +		rq = intel_context_create_request(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +		i915_request_add(rq);
> +		count++;
> +	}
> +	intel_engine_flush_submission(engine);
> +
> +	execlists_active_lock_bh(&engine->execlists);
> +	if (fn(rq, count, count - 1) && !check_context_order(engine))
> +		err = -EINVAL;
> +	execlists_active_unlock_bh(&engine->execlists);
> +
> +	igt_spinner_end(&spin);
> +err_context:
> +	intel_context_put(ce);
> +err_spin:
> +	igt_spinner_fini(&spin);
> +err_heartbeat:
> +	scheduling_enable(engine);
> +	return err;
> +}
> +
> +static int __wide_chain(struct intel_engine_cs *engine, unsigned long width,
> +			bool (*fn)(struct i915_request *rq,
> +				   unsigned long v, unsigned long e))
> +{
> +	struct intel_context **ce;
> +	struct i915_request **rq;
> +	struct igt_spinner spin;
> +	unsigned long count;
> +	unsigned long i, j;
> +	int err = 0;
> +
> +	if (!intel_engine_can_store_dword(engine))
> +		return 0;
> +
> +	scheduling_disable(engine);
> +
> +	if (igt_spinner_init(&spin, engine->gt)) {
> +		err = -ENOMEM;
> +		goto err_heartbeat;
> +	}
> +
> +	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
> +	if (!ce) {
> +		err = -ENOMEM;
> +		goto err_spin;
> +	}
> +
> +	for (i = 0; i < width; i++) {
> +		ce[i] = intel_context_create(engine);
> +		if (IS_ERR(ce[i])) {
> +			err = PTR_ERR(ce[i]);
> +			width = i;
> +			goto err_context;
> +		}
> +	}
> +
> +	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
> +	if (!rq) {
> +		err = -ENOMEM;
> +		goto err_context;
> +	}
> +
> +	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
> +	if (IS_ERR(rq[0])) {
> +		err = PTR_ERR(rq[0]);
> +		goto err_free;
> +	}
> +	i915_request_add(rq[0]);
> +
> +	count = 0;
> +	for (i = 1; i < width; i++) {
> +		GEM_BUG_ON(i915_request_completed(rq[0]));
> +
> +		rq[i] = intel_context_create_request(ce[i]);
> +		if (IS_ERR(rq[i])) {
> +			err = PTR_ERR(rq[i]);
> +			break;
> +		}
> +		for (j = 0; j < i; j++) {
> +			err = i915_request_await_dma_fence(rq[i],
> +							   &rq[j]->fence);
> +			if (err)
> +				break;
> +			count++;
> +		}
> +		i915_request_add(rq[i]);
> +	}
> +	intel_engine_flush_submission(engine);
> +
> +	execlists_active_lock_bh(&engine->execlists);
> +	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
> +		err = -EINVAL;
> +	execlists_active_unlock_bh(&engine->execlists);
> +
> +	igt_spinner_end(&spin);
> +err_free:
> +	kfree(rq);
> +err_context:
> +	for (i = 0; i < width; i++)
> +		intel_context_put(ce[i]);
> +	kfree(ce);
> +err_spin:
> +	igt_spinner_fini(&spin);
> +err_heartbeat:
> +	scheduling_enable(engine);
> +	return err;
> +}
> +
> +static int __inv_chain(struct intel_engine_cs *engine, unsigned long width,
> +		       bool (*fn)(struct i915_request *rq,
> +				  unsigned long v, unsigned long e))
> +{
> +	struct intel_context **ce;
> +	struct i915_request **rq;
> +	struct igt_spinner spin;
> +	unsigned long count;
> +	unsigned long i, j;
> +	int err = 0;
> +
> +	if (!intel_engine_can_store_dword(engine))
> +		return 0;
> +
> +	scheduling_disable(engine);
> +
> +	if (igt_spinner_init(&spin, engine->gt)) {
> +		err = -ENOMEM;
> +		goto err_heartbeat;
> +	}
> +
> +	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
> +	if (!ce) {
> +		err = -ENOMEM;
> +		goto err_spin;
> +	}
> +
> +	for (i = 0; i < width; i++) {
> +		ce[i] = intel_context_create(engine);
> +		if (IS_ERR(ce[i])) {
> +			err = PTR_ERR(ce[i]);
> +			width = i;
> +			goto err_context;
> +		}
> +	}
> +
> +	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
> +	if (!rq) {
> +		err = -ENOMEM;
> +		goto err_context;
> +	}
> +
> +	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
> +	if (IS_ERR(rq[0])) {
> +		err = PTR_ERR(rq[0]);
> +		goto err_free;
> +	}
> +	i915_request_add(rq[0]);
> +
> +	count = 0;
> +	for (i = 1; i < width; i++) {
> +		GEM_BUG_ON(i915_request_completed(rq[0]));
> +
> +		rq[i] = intel_context_create_request(ce[i]);
> +		if (IS_ERR(rq[i])) {
> +			err = PTR_ERR(rq[i]);
> +			break;
> +		}
> +		for (j = i; j > 0; j--) {
> +			err = i915_request_await_dma_fence(rq[i],
> +							   &rq[j - 1]->fence);
> +			if (err)
> +				break;
> +			count++;
> +		}
> +		i915_request_add(rq[i]);
> +	}
> +	intel_engine_flush_submission(engine);
> +
> +	execlists_active_lock_bh(&engine->execlists);
> +	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
> +		err = -EINVAL;
> +	execlists_active_unlock_bh(&engine->execlists);
> +
> +	igt_spinner_end(&spin);
> +err_free:
> +	kfree(rq);
> +err_context:
> +	for (i = 0; i < width; i++)
> +		intel_context_put(ce[i]);
> +	kfree(ce);
> +err_spin:
> +	igt_spinner_fini(&spin);
> +err_heartbeat:
> +	scheduling_enable(engine);
> +	return err;
> +}
> +
> +static int __sparse_chain(struct intel_engine_cs *engine, unsigned long width,
> +			  bool (*fn)(struct i915_request *rq,
> +				     unsigned long v, unsigned long e))
> +{
> +	struct intel_context **ce;
> +	struct i915_request **rq;
> +	struct igt_spinner spin;
> +	I915_RND_STATE(prng);
> +	unsigned long count;
> +	unsigned long i, j;
> +	int err = 0;
> +
> +	if (!intel_engine_can_store_dword(engine))
> +		return 0;
> +
> +	scheduling_disable(engine);
> +
> +	if (igt_spinner_init(&spin, engine->gt)) {
> +		err = -ENOMEM;
> +		goto err_heartbeat;
> +	}
> +
> +	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
> +	if (!ce) {
> +		err = -ENOMEM;
> +		goto err_spin;
> +	}
> +
> +	for (i = 0; i < width; i++) {
> +		ce[i] = intel_context_create(engine);
> +		if (IS_ERR(ce[i])) {
> +			err = PTR_ERR(ce[i]);
> +			width = i;
> +			goto err_context;
> +		}
> +	}
> +
> +	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
> +	if (!rq) {
> +		err = -ENOMEM;
> +		goto err_context;
> +	}
> +
> +	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
> +	if (IS_ERR(rq[0])) {
> +		err = PTR_ERR(rq[0]);
> +		goto err_free;
> +	}
> +	i915_request_add(rq[0]);
> +
> +	count = 0;
> +	for (i = 1; i < width; i++) {
> +		GEM_BUG_ON(i915_request_completed(rq[0]));
> +
> +		rq[i] = intel_context_create_request(ce[i]);
> +		if (IS_ERR(rq[i])) {
> +			err = PTR_ERR(rq[i]);
> +			break;
> +		}
> +
> +		if (err == 0 && i > 1) {
> +			j = i915_prandom_u32_max_state(i - 1, &prng);
> +			err = i915_request_await_dma_fence(rq[i],
> +							   &rq[j]->fence);
> +			count++;
> +		}
> +
> +		if (err == 0) {
> +			err = i915_request_await_dma_fence(rq[i],
> +							   &rq[i - 1]->fence);
> +			count++;
> +		}
> +
> +		if (err == 0 && i > 2) {
> +			j = i915_prandom_u32_max_state(i - 2, &prng);
> +			err = i915_request_await_dma_fence(rq[i],
> +							   &rq[j]->fence);
> +			count++;
> +		}
> +
> +		i915_request_add(rq[i]);
> +		if (err)
> +			break;
> +	}
> +	intel_engine_flush_submission(engine);
> +
> +	execlists_active_lock_bh(&engine->execlists);
> +	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
> +		err = -EINVAL;
> +	execlists_active_unlock_bh(&engine->execlists);
> +
> +	igt_spinner_end(&spin);
> +err_free:
> +	kfree(rq);
> +err_context:
> +	for (i = 0; i < width; i++)
> +		intel_context_put(ce[i]);
> +	kfree(ce);
> +err_spin:
> +	igt_spinner_fini(&spin);
> +err_heartbeat:
> +	scheduling_enable(engine);
> +	return err;
> +}
> +
> +static int igt_schedule_chains(struct drm_i915_private *i915,
> +			       bool (*fn)(struct i915_request *rq,
> +					  unsigned long v, unsigned long e))
> +{
> +	static int (* const chains[])(struct intel_engine_cs *engine,
> +				      unsigned long length,
> +				      bool (*fn)(struct i915_request *rq,
> +						 unsigned long v, unsigned long e)) = {
> +		__single_chain,
> +		__wide_chain,
> +		__inv_chain,
> +		__sparse_chain,
> +	};
> +	int n, err;
> +
> +	for (n = 0; n < ARRAY_SIZE(chains); n++) {
> +		err = all_engines(i915, chains[n], 17, fn);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static bool igt_priority(struct i915_request *rq,
> +			 unsigned long v, unsigned long e)
> +{
> +	i915_request_set_priority(rq, I915_PRIORITY_BARRIER);
> +	GEM_BUG_ON(rq_prio(rq) != I915_PRIORITY_BARRIER);
> +	return true;
> +}
> +
> +static int igt_priority_chains(void *arg)
> +{
> +	return igt_schedule_chains(arg, igt_priority);
> +}
> +
> +int i915_scheduler_live_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_priority_chains),
> +	};
> +
> +	return i915_subtests(tests, i915);
> +}
> +
> +static int chains(struct drm_i915_private *i915,
> +		  int (*chain)(struct drm_i915_private *i915,
> +			       unsigned long length,
> +			       bool (*fn)(struct i915_request *rq,
> +					  unsigned long v, unsigned long e)),
> +		  bool (*fn)(struct i915_request *rq,
> +			     unsigned long v, unsigned long e))
> +{
> +	unsigned long x[] = { 1, 4, 16, 64, 128, 256, 512, 1024, 4096 };
> +	int i, err;
> +
> +	for (i = 0; i < ARRAY_SIZE(x); i++) {
> +		IGT_TIMEOUT(end_time);
> +
> +		err = chain(i915, x[i], fn);
> +		if (err)
> +			return err;
> +
> +		if (__igt_timeout(end_time, NULL))
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static int single_chain(struct drm_i915_private *i915,
> +			unsigned long length,
> +			bool (*fn)(struct i915_request *rq,
> +				   unsigned long v, unsigned long e))
> +{
> +	return first_engine(i915, __single_chain, length, fn);
> +}
> +
> +static int single(struct drm_i915_private *i915,
> +		  bool (*fn)(struct i915_request *rq,
> +			     unsigned long v, unsigned long e))
> +{
> +	return chains(i915, single_chain, fn);
> +}
> +
> +static int wide_chain(struct drm_i915_private *i915,
> +		      unsigned long width,
> +		      bool (*fn)(struct i915_request *rq,
> +				 unsigned long v, unsigned long e))
> +{
> +	return first_engine(i915, __wide_chain, width, fn);
> +}
> +
> +static int wide(struct drm_i915_private *i915,
> +		bool (*fn)(struct i915_request *rq,
> +			   unsigned long v, unsigned long e))
> +{
> +	return chains(i915, wide_chain, fn);
> +}
> +
> +static int inv_chain(struct drm_i915_private *i915,
> +		     unsigned long width,
> +		     bool (*fn)(struct i915_request *rq,
> +				unsigned long v, unsigned long e))
> +{
> +	return first_engine(i915, __inv_chain, width, fn);
> +}
> +
> +static int inv(struct drm_i915_private *i915,
> +	       bool (*fn)(struct i915_request *rq,
> +			  unsigned long v, unsigned long e))
> +{
> +	return chains(i915, inv_chain, fn);
> +}
> +
> +static int sparse_chain(struct drm_i915_private *i915,
> +			unsigned long width,
> +			bool (*fn)(struct i915_request *rq,
> +				   unsigned long v, unsigned long e))
> +{
> +	return first_engine(i915, __sparse_chain, width, fn);
> +}
> +
> +static int sparse(struct drm_i915_private *i915,
> +		  bool (*fn)(struct i915_request *rq,
> +			     unsigned long v, unsigned long e))
> +{
> +	return chains(i915, sparse_chain, fn);
> +}
> +
> +static void report(const char *what, unsigned long v, unsigned long e, u64 dt)
> +{
> +	pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt);
> +}
> +
> +static u64 __set_priority(struct i915_request *rq, int prio)
> +{
> +	u64 dt;
> +
> +	preempt_disable();
> +	dt = ktime_get_raw_fast_ns();
> +	i915_request_set_priority(rq, prio);
> +	dt = ktime_get_raw_fast_ns() - dt;
> +	preempt_enable();
> +
> +	return dt;
> +}
> +
> +static bool set_priority(struct i915_request *rq,
> +			 unsigned long v, unsigned long e)
> +{
> +	report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER));
> +	return true;
> +}
> +
> +static int single_priority(void *arg)
> +{
> +	return single(arg, set_priority);
> +}
> +
> +static int wide_priority(void *arg)
> +{
> +	return wide(arg, set_priority);
> +}
> +
> +static int inv_priority(void *arg)
> +{
> +	return inv(arg, set_priority);
> +}
> +
> +static int sparse_priority(void *arg)
> +{
> +	return sparse(arg, set_priority);
> +}
> +
> +int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(single_priority),
> +		SUBTEST(wide_priority),
> +		SUBTEST(inv_priority),
> +		SUBTEST(sparse_priority),
> +	};
> +	static const struct {
> +		const char *name;
> +		size_t sz;
> +	} types[] = {
> +#define T(t) { #t, sizeof(struct t) }
> +		T(i915_priolist),
> +		T(i915_sched_attr),
> +		T(i915_sched_node),
> +		T(i915_dependency),
> +#undef T
> +		{}
> +	};
> +	typeof(*types) *t;
> +
> +	for (t = types; t->name; t++)
> +		pr_info("sizeof(%s): %zd\n", t->name, t->sz);
> +
> +	return i915_subtests(tests, i915);
> +}
> 

Too responsible to write that much test code. :) Looks tidy but I did 
not muster doing a full review so, if it will cut it:

Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed
  2021-02-02 15:14 ` [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed Chris Wilson
@ 2021-02-02 16:52   ` Tvrtko Ursulin
  2021-02-02 17:43     ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2021-02-02 16:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/02/2021 15:14, Chris Wilson wrote:
> live_timeslice_rewind assumes a particular traversal and reordering
> after the first timeslice yield. However, the outcome can be either
> (A1, A2, B1) or (A1, B2, A2) depending on the path taken through the
> dependency graph. So if we do not get the outcome we need at first, give
> it a priority kick to force a rewind.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/selftest_execlists.c | 21 +++++++++++++++++++-
>   1 file changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index 951e2bf867e1..68e1398704a4 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -1107,6 +1107,7 @@ static int live_timeslice_rewind(void *arg)
>   		struct i915_request *rq[3] = {};
>   		struct intel_context *ce;
>   		unsigned long timeslice;
> +		unsigned long timeout;
>   		int i, err = 0;
>   		u32 *slot;
>   
> @@ -1173,11 +1174,29 @@ static int live_timeslice_rewind(void *arg)
>   
>   		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
>   		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
> -		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
> +		i = 0;
> +		timeout = jiffies + HZ;
> +		while (i915_request_is_active(rq[A2]) &&
> +		       time_before(jiffies, timeout)) { /* semaphore yield! */
>   			/* Wait for the timeslice to kick in */
>   			del_timer(&engine->execlists.timer);
>   			tasklet_hi_schedule(&engine->execlists.tasklet);
>   			intel_engine_flush_submission(engine);
> +
> +			/*
> +			 * Unfortunately this assumes that during the
> +			 * search of the wait tree it sees the requests
> +			 * in a particular order. That order is not
> +			 * strictly determined and it may pick either
> +			 * A2 or B1 to immediately follow A1.
> +			 *
> +			 * Break the tie with a set-priority. This defeats
> +			 * the goal of trying to cause a rewind with a
> +			 * timeslice, but alas, a rewind is better than
> +			 * none.
> +			 */
> +			if (i++)
> +				i915_request_set_priority(rq[B1], 1);
>   		}
>   		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
>   		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
> 

Didn't fully get the intricacies of the test, but, how about not messing 
with priorities but just kicking it for longer until it eventually 
re-orders to the desired sequence? Surely if it keeps insisting of the 
same order which is making no progress there is a flaw in timeslicing 
anyway? Or if it fails skip the test.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 07/14] drm/i915/selftests: Exercise priority inheritance around an engine loop
  2021-02-02 16:44   ` Tvrtko Ursulin
@ 2021-02-02 17:22     ` Chris Wilson
  0 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 17:22 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-02 16:44:26)
> 
> On 02/02/2021 15:14, Chris Wilson wrote:
> > +     err = 0;
> > +     count = 0;
> > +     for_each_uabi_engine(engine, i915) {
> > +             if (!intel_engine_has_scheduler(engine))
> > +                     continue;
> > +
> > +             rq = __write_timestamp(engine, obj, count, rq);
> > +             if (IS_ERR(rq)) {
> > +                     err = PTR_ERR(rq);
> > +                     break;
> > +             }
> > +
> > +             count++;
> > +     }
> 
> ^^^^ vvvv - two of the same by copy&paste error or couldn't be bothered 
> with outer loop?

It was just my thought process at the time, I wanted the
A->Z; A->Z pair so that it clear that it was cyclic and just didn't
think of putting it inside another loop.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed
  2021-02-02 16:52   ` Tvrtko Ursulin
@ 2021-02-02 17:43     ` Chris Wilson
  2021-02-02 21:14       ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 17:43 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2021-02-02 16:52:18)
> 
> On 02/02/2021 15:14, Chris Wilson wrote:
> > live_timeslice_rewind assumes a particular traversal and reordering
> > after the first timeslice yield. However, the outcome can be either
> > (A1, A2, B1) or (A1, B2, A2) depending on the path taken through the
> > dependency graph. So if we do not get the outcome we need at first, give
> > it a priority kick to force a rewind.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/gt/selftest_execlists.c | 21 +++++++++++++++++++-
> >   1 file changed, 20 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> > index 951e2bf867e1..68e1398704a4 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> > @@ -1107,6 +1107,7 @@ static int live_timeslice_rewind(void *arg)
> >               struct i915_request *rq[3] = {};
> >               struct intel_context *ce;
> >               unsigned long timeslice;
> > +             unsigned long timeout;
> >               int i, err = 0;
> >               u32 *slot;
> >   
> > @@ -1173,11 +1174,29 @@ static int live_timeslice_rewind(void *arg)
> >   
> >               /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
> >               ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
> > -             while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
> > +             i = 0;
> > +             timeout = jiffies + HZ;
> > +             while (i915_request_is_active(rq[A2]) &&
> > +                    time_before(jiffies, timeout)) { /* semaphore yield! */
> >                       /* Wait for the timeslice to kick in */
> >                       del_timer(&engine->execlists.timer);
> >                       tasklet_hi_schedule(&engine->execlists.tasklet);
> >                       intel_engine_flush_submission(engine);
> > +
> > +                     /*
> > +                      * Unfortunately this assumes that during the
> > +                      * search of the wait tree it sees the requests
> > +                      * in a particular order. That order is not
> > +                      * strictly determined and it may pick either
> > +                      * A2 or B1 to immediately follow A1.
> > +                      *
> > +                      * Break the tie with a set-priority. This defeats
> > +                      * the goal of trying to cause a rewind with a
> > +                      * timeslice, but alas, a rewind is better than
> > +                      * none.
> > +                      */
> > +                     if (i++)
> > +                             i915_request_set_priority(rq[B1], 1);
> >               }
> >               /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
> >               GEM_BUG_ON(!i915_request_is_active(rq[A1]));
> > 
> 
> Didn't fully get the intricacies of the test, but, how about not messing 
> with priorities but just kicking it for longer until it eventually 
> re-orders to the desired sequence? Surely if it keeps insisting of the 
> same order which is making no progress there is a flaw in timeslicing 
> anyway? Or if it fails skip the test.

Ah. The test is trying to prove internals of the ELSP[] behave in a
certain manner without forcing it to. However, there's no requirement
for it to do anything of the sort.

[What is the test trying to prove? That on timeslice we are capable of
removing a request from an earlier context to allow early switching to a
second context. This requires us to force the context switch to prevent
the currently executing context from keeping its RING_TAIL (which points
at the A2) but resample it so that it ends at A1. We attempt to prove
that with independent spinners, if we don't reset A2 then it will remain
executing instead of switching to B2 as we expect.]

So what happens is that we queue

[{A1, A2}, {B1}]

trigger a timeslice [by forcing the timer expiry]

and expect us to rearrange ELSP 

as [{A1}, {B2}]

because B2 depends on A1, on every timeslice that pair must be in that
order.

And we are looking for A2 to be back in the queue.

Since A2 has no dependency on B2, and vice versa that is a free
variable. Everytime we walk the graph, we start with deferring
A1, then A2, then B2. Looking at the graph in the same order everytime,
and end up packing {A1, A2} together into the same context submission.

You are right that if we allowed A1 to finish, then the timeslicing would
reverse A2, B2. However, we don't let spinner A1 finish so everything
stays in the same order.

Hmm. The problem is the graph order is determined by order of
construction. Now, we are free to randomise the order of that graph,
though we need to take different locks. Even if we just cycle the graph
one element (that would be enough to break the repetition here, we still
need that lock). Hmm.

The other option is to change the order of the graph by reordering the
construction, and keeping the original packing by deferring the
scheduling.

Let's see how horrible it is to cycle elements on defer. (Curse the
irqlock pollution.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/14] drm/i915/gt: Move engine setup out of set_default_submission (rev2)
  2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
                   ` (12 preceding siblings ...)
  2021-02-02 15:14 ` [Intel-gfx] [CI 14/14] drm/i915: Fix the iterative dfs for defering requests Chris Wilson
@ 2021-02-02 18:37 ` Patchwork
  13 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2021-02-02 18:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,01/14] drm/i915/gt: Move engine setup out of set_default_submission (rev2)
URL   : https://patchwork.freedesktop.org/series/86585/
State : failure

== Summary ==

Applying: drm/i915/gt: Move engine setup out of set_default_submission
Applying: drm/i915/gt: Move submission_method into intel_gt
Applying: drm/i915/gt: Move CS interrupt handler to the backend
Applying: drm/i915: Replace engine->schedule() with a known request operation
error: sha1 information is lacking or useless (drivers/gpu/drm/i915/gt/intel_execlists_submission.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0004 drm/i915: Replace engine->schedule() with a known request operation
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed
  2021-02-02 17:43     ` Chris Wilson
@ 2021-02-02 21:14       ` Chris Wilson
  2021-02-02 21:24         ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 21:14 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2021-02-02 17:43:53)
> Let's see how horrible it is to cycle elements on defer. (Curse the
> irqlock pollution.)

While that did work. I do not have a good idea on how to do list
rotation on an RCU list. I can see that it must require a pair of
synchronize_rcu, and that spells disaster (at least for handling it
inline).

Another way might be to randomize the deadlines along each branch to the
tree... Except we don't have deadlines at this point and we can't so
freely change the priorities.

Hmm.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed
  2021-02-02 21:14       ` Chris Wilson
@ 2021-02-02 21:24         ` Chris Wilson
  2021-02-02 21:32           ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 21:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2021-02-02 21:14:35)
> Quoting Chris Wilson (2021-02-02 17:43:53)
> > Let's see how horrible it is to cycle elements on defer. (Curse the
> > irqlock pollution.)
> 
> While that did work. I do not have a good idea on how to do list
> rotation on an RCU list. I can see that it must require a pair of
> synchronize_rcu, and that spells disaster (at least for handling it
> inline).
> 
> Another way might be to randomize the deadlines along each branch to the
> tree... Except we don't have deadlines at this point and we can't so
> freely change the priorities.

Speaking of which, this is 'fixed' by the deadlines as there we will
reorder ELSP as the test expects. (Which is why I didn't notice this for
so long.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed
  2021-02-02 21:24         ` Chris Wilson
@ 2021-02-02 21:32           ` Chris Wilson
  0 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2021-02-02 21:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2021-02-02 21:24:16)
> Quoting Chris Wilson (2021-02-02 21:14:35)
> > Quoting Chris Wilson (2021-02-02 17:43:53)
> > > Let's see how horrible it is to cycle elements on defer. (Curse the
> > > irqlock pollution.)
> > 
> > While that did work. I do not have a good idea on how to do list
> > rotation on an RCU list. I can see that it must require a pair of
> > synchronize_rcu, and that spells disaster (at least for handling it
> > inline).
> > 
> > Another way might be to randomize the deadlines along each branch to the
> > tree... Except we don't have deadlines at this point and we can't so
> > freely change the priorities.
> 
> Speaking of which, this is 'fixed' by the deadlines as there we will
> reorder ELSP as the test expects. (Which is why I didn't notice this for
> so long.)

And I thinks that's how I am going to handle this, by deferring the dfs
fix for defer_request until we are ready with deadlines.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2021-02-02 21:32 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-02 15:14 [Intel-gfx] [CI 01/14] drm/i915/gt: Move engine setup out of set_default_submission Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 02/14] drm/i915/gt: Move submission_method into intel_gt Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 03/14] drm/i915/gt: Move CS interrupt handler to the backend Chris Wilson
2021-02-02 15:49   ` Tvrtko Ursulin
2021-02-02 15:53     ` Chris Wilson
2021-02-02 16:08       ` Chris Wilson
2021-02-02 16:15   ` [Intel-gfx] [PATCH v2] " Chris Wilson
2021-02-02 16:33     ` Tvrtko Ursulin
2021-02-02 15:14 ` [Intel-gfx] [CI 04/14] drm/i915: Replace engine->schedule() with a known request operation Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 05/14] drm/i915: Restructure priority inheritance Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 06/14] drm/i915/selftests: Measure set-priority duration Chris Wilson
2021-02-02 16:49   ` Tvrtko Ursulin
2021-02-02 15:14 ` [Intel-gfx] [CI 07/14] drm/i915/selftests: Exercise priority inheritance around an engine loop Chris Wilson
2021-02-02 16:44   ` Tvrtko Ursulin
2021-02-02 17:22     ` Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 08/14] drm/i915/selftests: Force a rewind if at first we don't succeed Chris Wilson
2021-02-02 16:52   ` Tvrtko Ursulin
2021-02-02 17:43     ` Chris Wilson
2021-02-02 21:14       ` Chris Wilson
2021-02-02 21:24         ` Chris Wilson
2021-02-02 21:32           ` Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 09/14] drm/i915: Improve DFS for priority inheritance Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 10/14] drm/i915: Extract request submission from execlists Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 11/14] drm/i915: Extract request rewinding " Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 12/14] drm/i915: Extract request suspension from the execlists Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 13/14] drm/i915: Extract the ability to defer and rerun a request later Chris Wilson
2021-02-02 15:14 ` [Intel-gfx] [CI 14/14] drm/i915: Fix the iterative dfs for defering requests Chris Wilson
2021-02-02 18:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [CI,01/14] drm/i915/gt: Move engine setup out of set_default_submission (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.