All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/i915: Consolidate TLB invalidation flow
@ 2023-02-01 16:51 ` Tvrtko Ursulin
  0 siblings, 0 replies; 11+ messages in thread
From: Tvrtko Ursulin @ 2023-02-01 16:51 UTC (permalink / raw)
  To: Intel-gfx, dri-devel; +Cc: Andrzej Hajda, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

As the logic for selecting the register and corresponsing values grew, the
code become a bit unsightly. Consolidate by storing the required values at
engine init time in the engine itself, and by doing so minimise the amount
of invariant platform and engine checks during each and every TLB
invalidation.

v2:
 * Fail engine probe if TLB invlidations registers are unknown.

v3:
 * Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com> # v1
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  96 +++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 ++
 drivers/gpu/drm/i915/gt/intel_gt.c           | 138 +++----------------
 3 files changed, 133 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d4e29da74612..e430945743ec 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -9,6 +9,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_internal.h"
+#include "gt/intel_gt_print.h"
 #include "gt/intel_gt_regs.h"
 
 #include "i915_cmd_parser.h"
@@ -1143,12 +1144,107 @@ static int init_status_page(struct intel_engine_cs *engine)
 	return ret;
 }
 
+static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
+{
+	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
+		[RENDER_CLASS].reg		= GEN8_RTCR,
+		[VIDEO_DECODE_CLASS].reg	= GEN8_M1TCR, /* , GEN8_M2TCR */
+		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN8_VTCR,
+		[COPY_ENGINE_CLASS].reg		= GEN8_BTCR,
+	};
+	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
+		[RENDER_CLASS].reg		= GEN12_GFX_TLB_INV_CR,
+		[VIDEO_DECODE_CLASS].reg	= GEN12_VD_TLB_INV_CR,
+		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN12_VE_TLB_INV_CR,
+		[COPY_ENGINE_CLASS].reg		= GEN12_BLT_TLB_INV_CR,
+		[COMPUTE_CLASS].reg		= GEN12_COMPCTX_TLB_INV_CR,
+	};
+	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
+		[RENDER_CLASS].mcr_reg		  = XEHP_GFX_TLB_INV_CR,
+		[VIDEO_DECODE_CLASS].mcr_reg	  = XEHP_VD_TLB_INV_CR,
+		[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
+		[COPY_ENGINE_CLASS].mcr_reg	  = XEHP_BLT_TLB_INV_CR,
+		[COMPUTE_CLASS].mcr_reg		  = XEHP_COMPCTX_TLB_INV_CR,
+	};
+	struct drm_i915_private *i915 = engine->i915;
+	const union intel_engine_tlb_inv_reg *regs;
+	union intel_engine_tlb_inv_reg reg;
+	unsigned int class = engine->class;
+	unsigned int num = 0;
+	u32 val;
+
+	/*
+	 * New platforms should not be added with catch-all-newer (>=)
+	 * condition so that any later platform added triggers the below warning
+	 * and in turn mandates a human cross-check of whether the invalidation
+	 * flows have compatible semantics.
+	 *
+	 * For instance with the 11.00 -> 12.00 transition three out of five
+	 * respective engine registers were moved to masked type. Then after the
+	 * 12.00 -> 12.50 transition multi cast handling is required too.
+	 */
+
+	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
+	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
+		regs = xehp_regs;
+		num = ARRAY_SIZE(xehp_regs);
+	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
+		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
+		regs = gen12_regs;
+		num = ARRAY_SIZE(gen12_regs);
+	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+		regs = gen8_regs;
+		num = ARRAY_SIZE(gen8_regs);
+	} else if (GRAPHICS_VER(i915) < 8) {
+		return 0;
+	}
+
+	if (gt_WARN_ONCE(engine->gt, !num,
+			 "Platform does not implement TLB invalidation!"))
+		return -ENODEV;
+
+	if (gt_WARN_ON_ONCE(engine->gt,
+			     class >= num ||
+			     (!regs[class].reg.reg &&
+			      !regs[class].mcr_reg.reg)))
+		return -ERANGE;
+
+	reg = regs[class];
+
+	if (GRAPHICS_VER(i915) == 8 && class == VIDEO_DECODE_CLASS) {
+		reg.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+		val = 0;
+	} else {
+		val = engine->instance;
+	}
+
+	val = BIT(val);
+
+	engine->tlb_inv.mcr = regs == xehp_regs;
+	engine->tlb_inv.reg = reg;
+	engine->tlb_inv.done = val;
+
+	if (GRAPHICS_VER(i915) >= 12 &&
+	    (engine->class == VIDEO_DECODE_CLASS ||
+	     engine->class == VIDEO_ENHANCEMENT_CLASS ||
+	     engine->class == COMPUTE_CLASS))
+		engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
+	else
+		engine->tlb_inv.request = val;
+
+	return 0;
+}
+
 static int engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
 	init_llist_head(&engine->barrier_tasks);
 
+	err = intel_engine_init_tlb_invalidation(engine);
+	if (err)
+		return err;
+
 	err = init_status_page(engine);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810f..8c661fe89314 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -341,6 +341,19 @@ struct intel_engine_guc_stats {
 	u64 start_gt_clk;
 };
 
+union intel_engine_tlb_inv_reg {
+	i915_reg_t	reg;
+	i915_mcr_reg_t	mcr_reg;
+};
+
+struct intel_engine_tlb_inv
+{
+	bool mcr;
+	union intel_engine_tlb_inv_reg reg;
+	u32 request;
+	u32 done;
+};
+
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
 	struct intel_gt *gt;
@@ -372,6 +385,8 @@ struct intel_engine_cs {
 	u32 context_size;
 	u32 mmio_base;
 
+	struct intel_engine_tlb_inv tlb_inv;
+
 	/*
 	 * Some w/a require forcewake to be held (which prevents RC6) while
 	 * a particular engine is active. If so, we set fw_domain to which
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 001a7ec5b861..f7f271708fc7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -982,35 +982,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 	intel_sseu_dump(&info->sseu, p);
 }
 
-struct reg_and_bit {
-	union {
-		i915_reg_t reg;
-		i915_mcr_reg_t mcr_reg;
-	};
-	u32 bit;
-};
-
-static struct reg_and_bit
-get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
-		const i915_reg_t *regs, const unsigned int num)
-{
-	const unsigned int class = engine->class;
-	struct reg_and_bit rb = { };
-
-	if (gt_WARN_ON_ONCE(engine->gt, class >= num || !regs[class].reg))
-		return rb;
-
-	rb.reg = regs[class];
-	if (gen8 && class == VIDEO_DECODE_CLASS)
-		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
-	else
-		rb.bit = engine->instance;
-
-	rb.bit = BIT(rb.bit);
-
-	return rb;
-}
-
 /*
  * HW architecture suggest typical invalidation time at 40us,
  * with pessimistic cases up to 100us and a recommendation to
@@ -1024,14 +995,20 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
  * but are now considered MCR registers.  Since they exist within a GAM range,
  * the primary instance of the register rolls up the status from each unit.
  */
-static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
+static int wait_for_invalidate(struct intel_engine_cs *engine)
 {
-	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
+	if (engine->tlb_inv.mcr)
+		return intel_gt_mcr_wait_for_reg(engine->gt,
+						 engine->tlb_inv.reg.mcr_reg,
+						 engine->tlb_inv.done,
+						 0,
 						 TLB_INVAL_TIMEOUT_US,
 						 TLB_INVAL_TIMEOUT_MS);
 	else
-		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
+		return __intel_wait_for_register_fw(engine->gt->uncore,
+						    engine->tlb_inv.reg.reg,
+						    engine->tlb_inv.done,
+						    0,
 						    TLB_INVAL_TIMEOUT_US,
 						    TLB_INVAL_TIMEOUT_MS,
 						    NULL);
@@ -1039,62 +1016,14 @@ static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
 
 static void mmio_invalidate_full(struct intel_gt *gt)
 {
-	static const i915_reg_t gen8_regs[] = {
-		[RENDER_CLASS]			= GEN8_RTCR,
-		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
-		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
-		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
-	};
-	static const i915_reg_t gen12_regs[] = {
-		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
-		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
-		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
-		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
-		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
-	};
-	static const i915_mcr_reg_t xehp_regs[] = {
-		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
-		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
-		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
-		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
-		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
-	};
 	struct drm_i915_private *i915 = gt->i915;
 	struct intel_uncore *uncore = gt->uncore;
 	struct intel_engine_cs *engine;
 	intel_engine_mask_t awake, tmp;
 	enum intel_engine_id id;
-	const i915_reg_t *regs;
-	unsigned int num = 0;
 	unsigned long flags;
 
-	/*
-	 * New platforms should not be added with catch-all-newer (>=)
-	 * condition so that any later platform added triggers the below warning
-	 * and in turn mandates a human cross-check of whether the invalidation
-	 * flows have compatible semantics.
-	 *
-	 * For instance with the 11.00 -> 12.00 transition three out of five
-	 * respective engine registers were moved to masked type. Then after the
-	 * 12.00 -> 12.50 transition multi cast handling is required too.
-	 */
-
-	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
-	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
-		regs = NULL;
-		num = ARRAY_SIZE(xehp_regs);
-	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
-		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
-		regs = gen12_regs;
-		num = ARRAY_SIZE(gen12_regs);
-	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
-		regs = gen8_regs;
-		num = ARRAY_SIZE(gen8_regs);
-	} else if (GRAPHICS_VER(i915) < 8) {
-		return;
-	}
-
-	if (gt_WARN_ONCE(gt, !num, "Platform does not implement TLB invalidation!"))
+	if (GRAPHICS_VER(i915) < 8)
 		return;
 
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
@@ -1104,33 +1033,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 
 	awake = 0;
 	for_each_engine(engine, gt, id) {
-		struct reg_and_bit rb;
-
 		if (!intel_engine_pm_is_awake(engine))
 			continue;
 
-		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-			u32 val = BIT(engine->instance);
-
-			if (engine->class == VIDEO_DECODE_CLASS ||
-			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
-			    engine->class == COMPUTE_CLASS)
-				val = _MASKED_BIT_ENABLE(val);
+		if (engine->tlb_inv.mcr)
 			intel_gt_mcr_multicast_write_fw(gt,
-							xehp_regs[engine->class],
-							val);
-		} else {
-			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-			if (!i915_mmio_reg_offset(rb.reg))
-				continue;
-
-			if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
-			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
-			    engine->class == COMPUTE_CLASS))
-				rb.bit = _MASKED_BIT_ENABLE(rb.bit);
-
-			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
-		}
+							engine->tlb_inv.reg.mcr_reg,
+							engine->tlb_inv.request);
+		else
+			intel_uncore_write_fw(uncore,
+					      engine->tlb_inv.reg.reg,
+					      engine->tlb_inv.request);
+
 		awake |= engine->mask;
 	}
 
@@ -1149,17 +1063,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 	intel_gt_mcr_unlock(gt, flags);
 
 	for_each_engine_masked(engine, gt, awake, tmp) {
-		struct reg_and_bit rb;
-
-		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-			rb.mcr_reg = xehp_regs[engine->class];
-			rb.bit = BIT(engine->instance);
-		} else {
-			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-		}
-
-		if (wait_for_invalidate(gt, rb))
-			gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n",
+		if (wait_for_invalidate(engine))
+			gt_err_ratelimited(gt,
+					   "%s TLB invalidation did not complete in %ums!\n",
 					   engine->name, TLB_INVAL_TIMEOUT_MS);
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] [PATCH v3] drm/i915: Consolidate TLB invalidation flow
@ 2023-02-01 16:51 ` Tvrtko Ursulin
  0 siblings, 0 replies; 11+ messages in thread
From: Tvrtko Ursulin @ 2023-02-01 16:51 UTC (permalink / raw)
  To: Intel-gfx, dri-devel; +Cc: Andrzej Hajda

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

As the logic for selecting the register and corresponsing values grew, the
code become a bit unsightly. Consolidate by storing the required values at
engine init time in the engine itself, and by doing so minimise the amount
of invariant platform and engine checks during each and every TLB
invalidation.

v2:
 * Fail engine probe if TLB invlidations registers are unknown.

v3:
 * Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com> # v1
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  96 +++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 ++
 drivers/gpu/drm/i915/gt/intel_gt.c           | 138 +++----------------
 3 files changed, 133 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d4e29da74612..e430945743ec 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -9,6 +9,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_internal.h"
+#include "gt/intel_gt_print.h"
 #include "gt/intel_gt_regs.h"
 
 #include "i915_cmd_parser.h"
@@ -1143,12 +1144,107 @@ static int init_status_page(struct intel_engine_cs *engine)
 	return ret;
 }
 
+static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
+{
+	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
+		[RENDER_CLASS].reg		= GEN8_RTCR,
+		[VIDEO_DECODE_CLASS].reg	= GEN8_M1TCR, /* , GEN8_M2TCR */
+		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN8_VTCR,
+		[COPY_ENGINE_CLASS].reg		= GEN8_BTCR,
+	};
+	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
+		[RENDER_CLASS].reg		= GEN12_GFX_TLB_INV_CR,
+		[VIDEO_DECODE_CLASS].reg	= GEN12_VD_TLB_INV_CR,
+		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN12_VE_TLB_INV_CR,
+		[COPY_ENGINE_CLASS].reg		= GEN12_BLT_TLB_INV_CR,
+		[COMPUTE_CLASS].reg		= GEN12_COMPCTX_TLB_INV_CR,
+	};
+	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
+		[RENDER_CLASS].mcr_reg		  = XEHP_GFX_TLB_INV_CR,
+		[VIDEO_DECODE_CLASS].mcr_reg	  = XEHP_VD_TLB_INV_CR,
+		[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
+		[COPY_ENGINE_CLASS].mcr_reg	  = XEHP_BLT_TLB_INV_CR,
+		[COMPUTE_CLASS].mcr_reg		  = XEHP_COMPCTX_TLB_INV_CR,
+	};
+	struct drm_i915_private *i915 = engine->i915;
+	const union intel_engine_tlb_inv_reg *regs;
+	union intel_engine_tlb_inv_reg reg;
+	unsigned int class = engine->class;
+	unsigned int num = 0;
+	u32 val;
+
+	/*
+	 * New platforms should not be added with catch-all-newer (>=)
+	 * condition so that any later platform added triggers the below warning
+	 * and in turn mandates a human cross-check of whether the invalidation
+	 * flows have compatible semantics.
+	 *
+	 * For instance with the 11.00 -> 12.00 transition three out of five
+	 * respective engine registers were moved to masked type. Then after the
+	 * 12.00 -> 12.50 transition multi cast handling is required too.
+	 */
+
+	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
+	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
+		regs = xehp_regs;
+		num = ARRAY_SIZE(xehp_regs);
+	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
+		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
+		regs = gen12_regs;
+		num = ARRAY_SIZE(gen12_regs);
+	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+		regs = gen8_regs;
+		num = ARRAY_SIZE(gen8_regs);
+	} else if (GRAPHICS_VER(i915) < 8) {
+		return 0;
+	}
+
+	if (gt_WARN_ONCE(engine->gt, !num,
+			 "Platform does not implement TLB invalidation!"))
+		return -ENODEV;
+
+	if (gt_WARN_ON_ONCE(engine->gt,
+			     class >= num ||
+			     (!regs[class].reg.reg &&
+			      !regs[class].mcr_reg.reg)))
+		return -ERANGE;
+
+	reg = regs[class];
+
+	if (GRAPHICS_VER(i915) == 8 && class == VIDEO_DECODE_CLASS) {
+		reg.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+		val = 0;
+	} else {
+		val = engine->instance;
+	}
+
+	val = BIT(val);
+
+	engine->tlb_inv.mcr = regs == xehp_regs;
+	engine->tlb_inv.reg = reg;
+	engine->tlb_inv.done = val;
+
+	if (GRAPHICS_VER(i915) >= 12 &&
+	    (engine->class == VIDEO_DECODE_CLASS ||
+	     engine->class == VIDEO_ENHANCEMENT_CLASS ||
+	     engine->class == COMPUTE_CLASS))
+		engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
+	else
+		engine->tlb_inv.request = val;
+
+	return 0;
+}
+
 static int engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
 	init_llist_head(&engine->barrier_tasks);
 
+	err = intel_engine_init_tlb_invalidation(engine);
+	if (err)
+		return err;
+
 	err = init_status_page(engine);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810f..8c661fe89314 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -341,6 +341,19 @@ struct intel_engine_guc_stats {
 	u64 start_gt_clk;
 };
 
+union intel_engine_tlb_inv_reg {
+	i915_reg_t	reg;
+	i915_mcr_reg_t	mcr_reg;
+};
+
+struct intel_engine_tlb_inv
+{
+	bool mcr;
+	union intel_engine_tlb_inv_reg reg;
+	u32 request;
+	u32 done;
+};
+
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
 	struct intel_gt *gt;
@@ -372,6 +385,8 @@ struct intel_engine_cs {
 	u32 context_size;
 	u32 mmio_base;
 
+	struct intel_engine_tlb_inv tlb_inv;
+
 	/*
 	 * Some w/a require forcewake to be held (which prevents RC6) while
 	 * a particular engine is active. If so, we set fw_domain to which
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 001a7ec5b861..f7f271708fc7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -982,35 +982,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 	intel_sseu_dump(&info->sseu, p);
 }
 
-struct reg_and_bit {
-	union {
-		i915_reg_t reg;
-		i915_mcr_reg_t mcr_reg;
-	};
-	u32 bit;
-};
-
-static struct reg_and_bit
-get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
-		const i915_reg_t *regs, const unsigned int num)
-{
-	const unsigned int class = engine->class;
-	struct reg_and_bit rb = { };
-
-	if (gt_WARN_ON_ONCE(engine->gt, class >= num || !regs[class].reg))
-		return rb;
-
-	rb.reg = regs[class];
-	if (gen8 && class == VIDEO_DECODE_CLASS)
-		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
-	else
-		rb.bit = engine->instance;
-
-	rb.bit = BIT(rb.bit);
-
-	return rb;
-}
-
 /*
  * HW architecture suggest typical invalidation time at 40us,
  * with pessimistic cases up to 100us and a recommendation to
@@ -1024,14 +995,20 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
  * but are now considered MCR registers.  Since they exist within a GAM range,
  * the primary instance of the register rolls up the status from each unit.
  */
-static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
+static int wait_for_invalidate(struct intel_engine_cs *engine)
 {
-	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
+	if (engine->tlb_inv.mcr)
+		return intel_gt_mcr_wait_for_reg(engine->gt,
+						 engine->tlb_inv.reg.mcr_reg,
+						 engine->tlb_inv.done,
+						 0,
 						 TLB_INVAL_TIMEOUT_US,
 						 TLB_INVAL_TIMEOUT_MS);
 	else
-		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
+		return __intel_wait_for_register_fw(engine->gt->uncore,
+						    engine->tlb_inv.reg.reg,
+						    engine->tlb_inv.done,
+						    0,
 						    TLB_INVAL_TIMEOUT_US,
 						    TLB_INVAL_TIMEOUT_MS,
 						    NULL);
@@ -1039,62 +1016,14 @@ static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
 
 static void mmio_invalidate_full(struct intel_gt *gt)
 {
-	static const i915_reg_t gen8_regs[] = {
-		[RENDER_CLASS]			= GEN8_RTCR,
-		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
-		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
-		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
-	};
-	static const i915_reg_t gen12_regs[] = {
-		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
-		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
-		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
-		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
-		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
-	};
-	static const i915_mcr_reg_t xehp_regs[] = {
-		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
-		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
-		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
-		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
-		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
-	};
 	struct drm_i915_private *i915 = gt->i915;
 	struct intel_uncore *uncore = gt->uncore;
 	struct intel_engine_cs *engine;
 	intel_engine_mask_t awake, tmp;
 	enum intel_engine_id id;
-	const i915_reg_t *regs;
-	unsigned int num = 0;
 	unsigned long flags;
 
-	/*
-	 * New platforms should not be added with catch-all-newer (>=)
-	 * condition so that any later platform added triggers the below warning
-	 * and in turn mandates a human cross-check of whether the invalidation
-	 * flows have compatible semantics.
-	 *
-	 * For instance with the 11.00 -> 12.00 transition three out of five
-	 * respective engine registers were moved to masked type. Then after the
-	 * 12.00 -> 12.50 transition multi cast handling is required too.
-	 */
-
-	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
-	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
-		regs = NULL;
-		num = ARRAY_SIZE(xehp_regs);
-	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
-		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
-		regs = gen12_regs;
-		num = ARRAY_SIZE(gen12_regs);
-	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
-		regs = gen8_regs;
-		num = ARRAY_SIZE(gen8_regs);
-	} else if (GRAPHICS_VER(i915) < 8) {
-		return;
-	}
-
-	if (gt_WARN_ONCE(gt, !num, "Platform does not implement TLB invalidation!"))
+	if (GRAPHICS_VER(i915) < 8)
 		return;
 
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
@@ -1104,33 +1033,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 
 	awake = 0;
 	for_each_engine(engine, gt, id) {
-		struct reg_and_bit rb;
-
 		if (!intel_engine_pm_is_awake(engine))
 			continue;
 
-		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-			u32 val = BIT(engine->instance);
-
-			if (engine->class == VIDEO_DECODE_CLASS ||
-			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
-			    engine->class == COMPUTE_CLASS)
-				val = _MASKED_BIT_ENABLE(val);
+		if (engine->tlb_inv.mcr)
 			intel_gt_mcr_multicast_write_fw(gt,
-							xehp_regs[engine->class],
-							val);
-		} else {
-			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-			if (!i915_mmio_reg_offset(rb.reg))
-				continue;
-
-			if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
-			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
-			    engine->class == COMPUTE_CLASS))
-				rb.bit = _MASKED_BIT_ENABLE(rb.bit);
-
-			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
-		}
+							engine->tlb_inv.reg.mcr_reg,
+							engine->tlb_inv.request);
+		else
+			intel_uncore_write_fw(uncore,
+					      engine->tlb_inv.reg.reg,
+					      engine->tlb_inv.request);
+
 		awake |= engine->mask;
 	}
 
@@ -1149,17 +1063,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 	intel_gt_mcr_unlock(gt, flags);
 
 	for_each_engine_masked(engine, gt, awake, tmp) {
-		struct reg_and_bit rb;
-
-		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-			rb.mcr_reg = xehp_regs[engine->class];
-			rb.bit = BIT(engine->instance);
-		} else {
-			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-		}
-
-		if (wait_for_invalidate(gt, rb))
-			gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n",
+		if (wait_for_invalidate(engine))
+			gt_err_ratelimited(gt,
+					   "%s TLB invalidation did not complete in %ums!\n",
 					   engine->name, TLB_INVAL_TIMEOUT_MS);
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Consolidate TLB invalidation flow
  2023-02-01 16:51 ` [Intel-gfx] " Tvrtko Ursulin
  (?)
@ 2023-02-01 18:04 ` Patchwork
  -1 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2023-02-01 18:04 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Consolidate TLB invalidation flow
URL   : https://patchwork.freedesktop.org/series/113563/
State : warning

== Summary ==

Error: dim checkpatch failed
c0fc33d4e83f drm/i915: Consolidate TLB invalidation flow
-:99: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#99: FILE: drivers/gpu/drm/i915/gt/intel_engine_cs.c:1207:
+	if (gt_WARN_ON_ONCE(engine->gt,
+			     class >= num ||

-:157: ERROR:OPEN_BRACE: open brace '{' following struct go on the same line
#157: FILE: drivers/gpu/drm/i915/gt/intel_engine_types.h:350:
+struct intel_engine_tlb_inv
+{

total: 1 errors, 0 warnings, 1 checks, 324 lines checked



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Consolidate TLB invalidation flow
  2023-02-01 16:51 ` [Intel-gfx] " Tvrtko Ursulin
  (?)
  (?)
@ 2023-02-01 18:20 ` Patchwork
  -1 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2023-02-01 18:20 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 3457 bytes --]

== Series Details ==

Series: drm/i915: Consolidate TLB invalidation flow
URL   : https://patchwork.freedesktop.org/series/113563/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12680 -> Patchwork_113563v1
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/index.html

Participating hosts (26 -> 25)
------------------------------

  Missing    (1): fi-snb-2520m 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_113563v1:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@i915_selftest@live@gt_tlb}:
    - fi-kbl-7567u:       [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/fi-kbl-7567u/igt@i915_selftest@live@gt_tlb.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/fi-kbl-7567u/igt@i915_selftest@live@gt_tlb.html
    - {bat-kbl-2}:        [PASS][3] -> [DMESG-FAIL][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/bat-kbl-2/igt@i915_selftest@live@gt_tlb.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/bat-kbl-2/igt@i915_selftest@live@gt_tlb.html
    - fi-cfl-8109u:       [PASS][5] -> [DMESG-FAIL][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/fi-cfl-8109u/igt@i915_selftest@live@gt_tlb.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/fi-cfl-8109u/igt@i915_selftest@live@gt_tlb.html

  * igt@i915_selftest@live@slpc:
    - {bat-rpls-1}:       [DMESG-FAIL][7] ([i915#7996]) -> [DMESG-FAIL][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/bat-rpls-1/igt@i915_selftest@live@slpc.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/bat-rpls-1/igt@i915_selftest@live@slpc.html

  
Known issues
------------

  Here are the changes found in Patchwork_113563v1 that come from known issues:

### IGT changes ###

#### Possible fixes ####

  * igt@i915_selftest@live@migrate:
    - {bat-dg2-11}:       [DMESG-WARN][9] ([i915#7699]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/bat-dg2-11/igt@i915_selftest@live@migrate.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/bat-dg2-11/igt@i915_selftest@live@migrate.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#4983]: https://gitlab.freedesktop.org/drm/intel/issues/4983
  [i915#7699]: https://gitlab.freedesktop.org/drm/intel/issues/7699
  [i915#7996]: https://gitlab.freedesktop.org/drm/intel/issues/7996


Build changes
-------------

  * Linux: CI_DRM_12680 -> Patchwork_113563v1

  CI-20190529: 20190529
  CI_DRM_12680: 06086656e6f03cbcbb4b99273734a7943e923fc9 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7143: c7b12dcc460fc2348e1fa7f4dcb791bb82e29e44 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_113563v1: 06086656e6f03cbcbb4b99273734a7943e923fc9 @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

eee9e2013646 drm/i915: Consolidate TLB invalidation flow

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/index.html

[-- Attachment #2: Type: text/html, Size: 4104 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915: Consolidate TLB invalidation flow
  2023-02-01 16:51 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  (?)
@ 2023-02-01 19:49 ` Patchwork
  -1 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2023-02-01 19:49 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 17723 bytes --]

== Series Details ==

Series: drm/i915: Consolidate TLB invalidation flow
URL   : https://patchwork.freedesktop.org/series/113563/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12680_full -> Patchwork_113563v1_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/index.html

Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_113563v1_full:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_invalid_mode@bad-vtotal@hdmi-a-4-pipe-a:
    - {shard-dg1}:        [PASS][1] -> [ABORT][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-dg1-15/igt@kms_invalid_mode@bad-vtotal@hdmi-a-4-pipe-a.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-dg1-15/igt@kms_invalid_mode@bad-vtotal@hdmi-a-4-pipe-a.html

  * igt@kms_invalid_mode@bad-vtotal@hdmi-a-4-pipe-d:
    - {shard-dg1}:        [PASS][3] -> [DMESG-WARN][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-dg1-15/igt@kms_invalid_mode@bad-vtotal@hdmi-a-4-pipe-d.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-dg1-15/igt@kms_invalid_mode@bad-vtotal@hdmi-a-4-pipe-d.html

  
Known issues
------------

  Here are the changes found in Patchwork_113563v1_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_flip@2x-plain-flip-fb-recreate@ac-hdmi-a1-hdmi-a2:
    - shard-glk:          [PASS][5] -> [FAIL][6] ([i915#2122])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-glk6/igt@kms_flip@2x-plain-flip-fb-recreate@ac-hdmi-a1-hdmi-a2.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-glk5/igt@kms_flip@2x-plain-flip-fb-recreate@ac-hdmi-a1-hdmi-a2.html

  
#### Possible fixes ####

  * igt@drm_fdinfo@most-busy-check-all@rcs0:
    - {shard-rkl}:        [FAIL][7] ([i915#7742]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-4/igt@drm_fdinfo@most-busy-check-all@rcs0.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-1/igt@drm_fdinfo@most-busy-check-all@rcs0.html

  * igt@drm_read@short-buffer-block:
    - {shard-rkl}:        [SKIP][9] ([i915#4098]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-1/igt@drm_read@short-buffer-block.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@drm_read@short-buffer-block.html

  * igt@fbdev@write:
    - {shard-rkl}:        [SKIP][11] ([i915#2582]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-1/igt@fbdev@write.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@fbdev@write.html

  * igt@feature_discovery@psr2:
    - {shard-rkl}:        [SKIP][13] ([i915#658]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-3/igt@feature_discovery@psr2.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@feature_discovery@psr2.html

  * igt@gem_ctx_persistence@engines-hang@bcs0:
    - {shard-rkl}:        [SKIP][15] ([i915#6252]) -> [PASS][16] +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-5/igt@gem_ctx_persistence@engines-hang@bcs0.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-1/igt@gem_ctx_persistence@engines-hang@bcs0.html

  * igt@gem_exec_endless@dispatch@bcs0:
    - {shard-rkl}:        [SKIP][17] ([i915#6247]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-5/igt@gem_exec_endless@dispatch@bcs0.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-3/igt@gem_exec_endless@dispatch@bcs0.html

  * igt@gem_exec_reloc@basic-gtt-read-noreloc:
    - {shard-rkl}:        [SKIP][19] ([i915#3281]) -> [PASS][20] +5 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-1/igt@gem_exec_reloc@basic-gtt-read-noreloc.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-5/igt@gem_exec_reloc@basic-gtt-read-noreloc.html

  * igt@gem_madvise@dontneed-before-exec:
    - {shard-rkl}:        [SKIP][21] ([i915#3282]) -> [PASS][22] +3 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-4/igt@gem_madvise@dontneed-before-exec.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-5/igt@gem_madvise@dontneed-before-exec.html

  * igt@gen9_exec_parse@bb-start-far:
    - {shard-rkl}:        [SKIP][23] ([i915#2527]) -> [PASS][24] +2 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-4/igt@gen9_exec_parse@bb-start-far.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-5/igt@gen9_exec_parse@bb-start-far.html

  * igt@i915_pm_dc@dc9-dpms:
    - {shard-rkl}:        [SKIP][25] ([i915#3361]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-5/igt@i915_pm_dc@dc9-dpms.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@i915_pm_dc@dc9-dpms.html

  * igt@i915_pm_rpm@dpms-mode-unset-lpsp:
    - {shard-rkl}:        [SKIP][27] ([i915#1397]) -> [PASS][28] +2 similar issues
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-3/igt@i915_pm_rpm@dpms-mode-unset-lpsp.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@i915_pm_rpm@dpms-mode-unset-lpsp.html

  * igt@kms_big_fb@x-tiled-32bpp-rotate-0:
    - {shard-rkl}:        [SKIP][29] ([i915#1845] / [i915#4098]) -> [PASS][30] +20 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-3/igt@kms_big_fb@x-tiled-32bpp-rotate-0.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@kms_big_fb@x-tiled-32bpp-rotate-0.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-hdmi-a1:
    - shard-glk:          [FAIL][31] ([i915#79]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-glk3/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-hdmi-a1.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-glk7/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-hdmi-a1.html

  * igt@kms_frontbuffer_tracking@fbc-badstride:
    - {shard-rkl}:        [SKIP][33] ([i915#1849] / [i915#4098]) -> [PASS][34] +16 similar issues
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-5/igt@kms_frontbuffer_tracking@fbc-badstride.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@kms_frontbuffer_tracking@fbc-badstride.html

  * igt@kms_psr@cursor_mmap_gtt:
    - {shard-rkl}:        [SKIP][35] ([i915#1072]) -> [PASS][36] +2 similar issues
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12680/shard-rkl-3/igt@kms_psr@cursor_mmap_gtt.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/shard-rkl-6/igt@kms_psr@cursor_mmap_gtt.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109283]: https://bugs.freedesktop.org/show_bug.cgi?id=109283
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [fdo#109300]: https://bugs.freedesktop.org/show_bug.cgi?id=109300
  [fdo#109302]: https://bugs.freedesktop.org/show_bug.cgi?id=109302
  [fdo#109309]: https://bugs.freedesktop.org/show_bug.cgi?id=109309
  [fdo#109313]: https://bugs.freedesktop.org/show_bug.cgi?id=109313
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109506]: https://bugs.freedesktop.org/show_bug.cgi?id=109506
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110189]: https://bugs.freedesktop.org/show_bug.cgi?id=110189
  [fdo#110723]: https://bugs.freedesktop.org/show_bug.cgi?id=110723
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111614]: https://bugs.freedesktop.org/show_bug.cgi?id=111614
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111644]: https://bugs.freedesktop.org/show_bug.cgi?id=111644
  [fdo#111656]: https://bugs.freedesktop.org/show_bug.cgi?id=111656
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [fdo#112054]: https://bugs.freedesktop.org/show_bug.cgi?id=112054
  [fdo#112283]: https://bugs.freedesktop.org/show_bug.cgi?id=112283
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#132]: https://gitlab.freedesktop.org/drm/intel/issues/132
  [i915#1397]: https://gitlab.freedesktop.org/drm/intel/issues/1397
  [i915#1825]: https://gitlab.freedesktop.org/drm/intel/issues/1825
  [i915#1839]: https://gitlab.freedesktop.org/drm/intel/issues/1839
  [i915#1845]: https://gitlab.freedesktop.org/drm/intel/issues/1845
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#1902]: https://gitlab.freedesktop.org/drm/intel/issues/1902
  [i915#1937]: https://gitlab.freedesktop.org/drm/intel/issues/1937
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2437]: https://gitlab.freedesktop.org/drm/intel/issues/2437
  [i915#2527]: https://gitlab.freedesktop.org/drm/intel/issues/2527
  [i915#2575]: https://gitlab.freedesktop.org/drm/intel/issues/2575
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#2587]: https://gitlab.freedesktop.org/drm/intel/issues/2587
  [i915#2658]: https://gitlab.freedesktop.org/drm/intel/issues/2658
  [i915#2672]: https://gitlab.freedesktop.org/drm/intel/issues/2672
  [i915#2681]: https://gitlab.freedesktop.org/drm/intel/issues/2681
  [i915#280]: https://gitlab.freedesktop.org/drm/intel/issues/280
  [i915#284]: https://gitlab.freedesktop.org/drm/intel/issues/284
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2846]: https://gitlab.freedesktop.org/drm/intel/issues/2846
  [i915#2856]: https://gitlab.freedesktop.org/drm/intel/issues/2856
  [i915#2920]: https://gitlab.freedesktop.org/drm/intel/issues/2920
  [i915#3116]: https://gitlab.freedesktop.org/drm/intel/issues/3116
  [i915#3281]: https://gitlab.freedesktop.org/drm/intel/issues/3281
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3297]: https://gitlab.freedesktop.org/drm/intel/issues/3297
  [i915#3299]: https://gitlab.freedesktop.org/drm/intel/issues/3299
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3359]: https://gitlab.freedesktop.org/drm/intel/issues/3359
  [i915#3361]: https://gitlab.freedesktop.org/drm/intel/issues/3361
  [i915#3458]: https://gitlab.freedesktop.org/drm/intel/issues/3458
  [i915#3469]: https://gitlab.freedesktop.org/drm/intel/issues/3469
  [i915#3539]: https://gitlab.freedesktop.org/drm/intel/issues/3539
  [i915#3546]: https://gitlab.freedesktop.org/drm/intel/issues/3546
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3637]: https://gitlab.freedesktop.org/drm/intel/issues/3637
  [i915#3638]: https://gitlab.freedesktop.org/drm/intel/issues/3638
  [i915#3689]: https://gitlab.freedesktop.org/drm/intel/issues/3689
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3734]: https://gitlab.freedesktop.org/drm/intel/issues/3734
  [i915#3742]: https://gitlab.freedesktop.org/drm/intel/issues/3742
  [i915#3804]: https://gitlab.freedesktop.org/drm/intel/issues/3804
  [i915#3825]: https://gitlab.freedesktop.org/drm/intel/issues/3825
  [i915#3840]: https://gitlab.freedesktop.org/drm/intel/issues/3840
  [i915#3886]: https://gitlab.freedesktop.org/drm/intel/issues/3886
  [i915#3952]: https://gitlab.freedesktop.org/drm/intel/issues/3952
  [i915#3955]: https://gitlab.freedesktop.org/drm/intel/issues/3955
  [i915#3966]: https://gitlab.freedesktop.org/drm/intel/issues/3966
  [i915#3989]: https://gitlab.freedesktop.org/drm/intel/issues/3989
  [i915#4070]: https://gitlab.freedesktop.org/drm/intel/issues/4070
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4098]: https://gitlab.freedesktop.org/drm/intel/issues/4098
  [i915#4270]: https://gitlab.freedesktop.org/drm/intel/issues/4270
  [i915#4349]: https://gitlab.freedesktop.org/drm/intel/issues/4349
  [i915#4387]: https://gitlab.freedesktop.org/drm/intel/issues/4387
  [i915#4538]: https://gitlab.freedesktop.org/drm/intel/issues/4538
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#4565]: https://gitlab.freedesktop.org/drm/intel/issues/4565
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4767]: https://gitlab.freedesktop.org/drm/intel/issues/4767
  [i915#4771]: https://gitlab.freedesktop.org/drm/intel/issues/4771
  [i915#4812]: https://gitlab.freedesktop.org/drm/intel/issues/4812
  [i915#4833]: https://gitlab.freedesktop.org/drm/intel/issues/4833
  [i915#4852]: https://gitlab.freedesktop.org/drm/intel/issues/4852
  [i915#4859]: https://gitlab.freedesktop.org/drm/intel/issues/4859
  [i915#4860]: https://gitlab.freedesktop.org/drm/intel/issues/4860
  [i915#4874]: https://gitlab.freedesktop.org/drm/intel/issues/4874
  [i915#4879]: https://gitlab.freedesktop.org/drm/intel/issues/4879
  [i915#4880]: https://gitlab.freedesktop.org/drm/intel/issues/4880
  [i915#4884]: https://gitlab.freedesktop.org/drm/intel/issues/4884
  [i915#5176]: https://gitlab.freedesktop.org/drm/intel/issues/5176
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#5286]: https://gitlab.freedesktop.org/drm/intel/issues/5286
  [i915#5288]: https://gitlab.freedesktop.org/drm/intel/issues/5288
  [i915#5289]: https://gitlab.freedesktop.org/drm/intel/issues/5289
  [i915#5325]: https://gitlab.freedesktop.org/drm/intel/issues/5325
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5461]: https://gitlab.freedesktop.org/drm/intel/issues/5461
  [i915#5563]: https://gitlab.freedesktop.org/drm/intel/issues/5563
  [i915#6095]: https://gitlab.freedesktop.org/drm/intel/issues/6095
  [i915#6227]: https://gitlab.freedesktop.org/drm/intel/issues/6227
  [i915#6230]: https://gitlab.freedesktop.org/drm/intel/issues/6230
  [i915#6245]: https://gitlab.freedesktop.org/drm/intel/issues/6245
  [i915#6247]: https://gitlab.freedesktop.org/drm/intel/issues/6247
  [i915#6248]: https://gitlab.freedesktop.org/drm/intel/issues/6248
  [i915#6252]: https://gitlab.freedesktop.org/drm/intel/issues/6252
  [i915#6301]: https://gitlab.freedesktop.org/drm/intel/issues/6301
  [i915#6334]: https://gitlab.freedesktop.org/drm/intel/issues/6334
  [i915#6335]: https://gitlab.freedesktop.org/drm/intel/issues/6335
  [i915#6433]: https://gitlab.freedesktop.org/drm/intel/issues/6433
  [i915#6497]: https://gitlab.freedesktop.org/drm/intel/issues/6497
  [i915#6524]: https://gitlab.freedesktop.org/drm/intel/issues/6524
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#6621]: https://gitlab.freedesktop.org/drm/intel/issues/6621
  [i915#6768]: https://gitlab.freedesktop.org/drm/intel/issues/6768
  [i915#6944]: https://gitlab.freedesktop.org/drm/intel/issues/6944
  [i915#6953]: https://gitlab.freedesktop.org/drm/intel/issues/6953
  [i915#7116]: https://gitlab.freedesktop.org/drm/intel/issues/7116
  [i915#7118]: https://gitlab.freedesktop.org/drm/intel/issues/7118
  [i915#7128]: https://gitlab.freedesktop.org/drm/intel/issues/7128
  [i915#7276]: https://gitlab.freedesktop.org/drm/intel/issues/7276
  [i915#7294]: https://gitlab.freedesktop.org/drm/intel/issues/7294
  [i915#7561]: https://gitlab.freedesktop.org/drm/intel/issues/7561
  [i915#7651]: https://gitlab.freedesktop.org/drm/intel/issues/7651
  [i915#7697]: https://gitlab.freedesktop.org/drm/intel/issues/7697
  [i915#7701]: https://gitlab.freedesktop.org/drm/intel/issues/7701
  [i915#7711]: https://gitlab.freedesktop.org/drm/intel/issues/7711
  [i915#7742]: https://gitlab.freedesktop.org/drm/intel/issues/7742
  [i915#7828]: https://gitlab.freedesktop.org/drm/intel/issues/7828
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#7949]: https://gitlab.freedesktop.org/drm/intel/issues/7949
  [i915#7975]: https://gitlab.freedesktop.org/drm/intel/issues/7975


Build changes
-------------

  * Linux: CI_DRM_12680 -> Patchwork_113563v1

  CI-20190529: 20190529
  CI_DRM_12680: 06086656e6f03cbcbb4b99273734a7943e923fc9 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7143: c7b12dcc460fc2348e1fa7f4dcb791bb82e29e44 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_113563v1: 06086656e6f03cbcbb4b99273734a7943e923fc9 @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_113563v1/index.html

[-- Attachment #2: Type: text/html, Size: 10421 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Consolidate TLB invalidation flow
  2023-02-01 16:51 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  (?)
@ 2023-02-02  7:43 ` Andrzej Hajda
  2023-02-02  8:33   ` Tvrtko Ursulin
  -1 siblings, 1 reply; 11+ messages in thread
From: Andrzej Hajda @ 2023-02-02  7:43 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx, dri-devel

On 01.02.2023 17:51, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> As the logic for selecting the register and corresponsing values grew, the
> code become a bit unsightly. Consolidate by storing the required values at
> engine init time in the engine itself, and by doing so minimise the amount
> of invariant platform and engine checks during each and every TLB
> invalidation.
> 
> v2:
>   * Fail engine probe if TLB invlidations registers are unknown.
> 
> v3:
>   * Rebase.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com> # v1
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  96 +++++++++++++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 ++
>   drivers/gpu/drm/i915/gt/intel_gt.c           | 138 +++----------------
>   3 files changed, 133 insertions(+), 116 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index d4e29da74612..e430945743ec 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -9,6 +9,7 @@
>   
>   #include "gem/i915_gem_context.h"
>   #include "gem/i915_gem_internal.h"
> +#include "gt/intel_gt_print.h"
>   #include "gt/intel_gt_regs.h"
>   
>   #include "i915_cmd_parser.h"
> @@ -1143,12 +1144,107 @@ static int init_status_page(struct intel_engine_cs *engine)
>   	return ret;
>   }
>   
> +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
> +{
> +	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> +		[RENDER_CLASS].reg		= GEN8_RTCR,
> +		[VIDEO_DECODE_CLASS].reg	= GEN8_M1TCR, /* , GEN8_M2TCR */
> +		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN8_VTCR,
> +		[COPY_ENGINE_CLASS].reg		= GEN8_BTCR,
> +	};
> +	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> +		[RENDER_CLASS].reg		= GEN12_GFX_TLB_INV_CR,
> +		[VIDEO_DECODE_CLASS].reg	= GEN12_VD_TLB_INV_CR,
> +		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN12_VE_TLB_INV_CR,
> +		[COPY_ENGINE_CLASS].reg		= GEN12_BLT_TLB_INV_CR,
> +		[COMPUTE_CLASS].reg		= GEN12_COMPCTX_TLB_INV_CR,
> +	};
> +	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> +		[RENDER_CLASS].mcr_reg		  = XEHP_GFX_TLB_INV_CR,
> +		[VIDEO_DECODE_CLASS].mcr_reg	  = XEHP_VD_TLB_INV_CR,
> +		[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> +		[COPY_ENGINE_CLASS].mcr_reg	  = XEHP_BLT_TLB_INV_CR,
> +		[COMPUTE_CLASS].mcr_reg		  = XEHP_COMPCTX_TLB_INV_CR,
> +	};
> +	struct drm_i915_private *i915 = engine->i915;
> +	const union intel_engine_tlb_inv_reg *regs;
> +	union intel_engine_tlb_inv_reg reg;
> +	unsigned int class = engine->class;
> +	unsigned int num = 0;
> +	u32 val;
> +
> +	/*
> +	 * New platforms should not be added with catch-all-newer (>=)
> +	 * condition so that any later platform added triggers the below warning
> +	 * and in turn mandates a human cross-check of whether the invalidation
> +	 * flows have compatible semantics.
> +	 *
> +	 * For instance with the 11.00 -> 12.00 transition three out of five
> +	 * respective engine registers were moved to masked type. Then after the
> +	 * 12.00 -> 12.50 transition multi cast handling is required too.
> +	 */
> +
> +	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> +	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> +		regs = xehp_regs;
> +		num = ARRAY_SIZE(xehp_regs);
> +	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> +		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> +		regs = gen12_regs;
> +		num = ARRAY_SIZE(gen12_regs);
> +	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> +		regs = gen8_regs;
> +		num = ARRAY_SIZE(gen8_regs);
> +	} else if (GRAPHICS_VER(i915) < 8) {
> +		return 0;
> +	}
> +
> +	if (gt_WARN_ONCE(engine->gt, !num,
> +			 "Platform does not implement TLB invalidation!"))
> +		return -ENODEV;
> +
> +	if (gt_WARN_ON_ONCE(engine->gt,
> +			     class >= num ||
> +			     (!regs[class].reg.reg &&
> +			      !regs[class].mcr_reg.reg)))
> +		return -ERANGE;
> +
> +	reg = regs[class];
> +
> +	if (GRAPHICS_VER(i915) == 8 && class == VIDEO_DECODE_CLASS) {

As selftest pointed out it should cover also gen 9-11.
Btw maybe it is worth to convert this pseudo array indexing to direct 
assignment:
if ((GRAPHICS_VER(i915) <= 11 && class == VIDEO_DECODE_CLASS && 
engine->instance == 1) {
	reg.reg = GEN8_M2TCR;
	val = 0;
}

Regards
Andrzej

> +		reg.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> +		val = 0;
> +	} else {
> +		val = engine->instance;
> +	}
> +
> +	val = BIT(val);
> +
> +	engine->tlb_inv.mcr = regs == xehp_regs;
> +	engine->tlb_inv.reg = reg;
> +	engine->tlb_inv.done = val;
> +
> +	if (GRAPHICS_VER(i915) >= 12 &&
> +	    (engine->class == VIDEO_DECODE_CLASS ||
> +	     engine->class == VIDEO_ENHANCEMENT_CLASS ||
> +	     engine->class == COMPUTE_CLASS))
> +		engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
> +	else
> +		engine->tlb_inv.request = val;
> +
> +	return 0;
> +}
> +
>   static int engine_setup_common(struct intel_engine_cs *engine)
>   {
>   	int err;
>   
>   	init_llist_head(&engine->barrier_tasks);
>   
> +	err = intel_engine_init_tlb_invalidation(engine);
> +	if (err)
> +		return err;
> +
>   	err = init_status_page(engine);
>   	if (err)
>   		return err;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 4fd54fb8810f..8c661fe89314 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -341,6 +341,19 @@ struct intel_engine_guc_stats {
>   	u64 start_gt_clk;
>   };
>   
> +union intel_engine_tlb_inv_reg {
> +	i915_reg_t	reg;
> +	i915_mcr_reg_t	mcr_reg;
> +};
> +
> +struct intel_engine_tlb_inv
> +{
> +	bool mcr;
> +	union intel_engine_tlb_inv_reg reg;
> +	u32 request;
> +	u32 done;
> +};
> +
>   struct intel_engine_cs {
>   	struct drm_i915_private *i915;
>   	struct intel_gt *gt;
> @@ -372,6 +385,8 @@ struct intel_engine_cs {
>   	u32 context_size;
>   	u32 mmio_base;
>   
> +	struct intel_engine_tlb_inv tlb_inv;
> +
>   	/*
>   	 * Some w/a require forcewake to be held (which prevents RC6) while
>   	 * a particular engine is active. If so, we set fw_domain to which
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 001a7ec5b861..f7f271708fc7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -982,35 +982,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
>   	intel_sseu_dump(&info->sseu, p);
>   }
>   
> -struct reg_and_bit {
> -	union {
> -		i915_reg_t reg;
> -		i915_mcr_reg_t mcr_reg;
> -	};
> -	u32 bit;
> -};
> -
> -static struct reg_and_bit
> -get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
> -		const i915_reg_t *regs, const unsigned int num)
> -{
> -	const unsigned int class = engine->class;
> -	struct reg_and_bit rb = { };
> -
> -	if (gt_WARN_ON_ONCE(engine->gt, class >= num || !regs[class].reg))
> -		return rb;
> -
> -	rb.reg = regs[class];
> -	if (gen8 && class == VIDEO_DECODE_CLASS)
> -		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> -	else
> -		rb.bit = engine->instance;
> -
> -	rb.bit = BIT(rb.bit);
> -
> -	return rb;
> -}
> -
>   /*
>    * HW architecture suggest typical invalidation time at 40us,
>    * with pessimistic cases up to 100us and a recommendation to
> @@ -1024,14 +995,20 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
>    * but are now considered MCR registers.  Since they exist within a GAM range,
>    * the primary instance of the register rolls up the status from each unit.
>    */
> -static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
> +static int wait_for_invalidate(struct intel_engine_cs *engine)
>   {
> -	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> -		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
> +	if (engine->tlb_inv.mcr)
> +		return intel_gt_mcr_wait_for_reg(engine->gt,
> +						 engine->tlb_inv.reg.mcr_reg,
> +						 engine->tlb_inv.done,
> +						 0,
>   						 TLB_INVAL_TIMEOUT_US,
>   						 TLB_INVAL_TIMEOUT_MS);
>   	else
> -		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
> +		return __intel_wait_for_register_fw(engine->gt->uncore,
> +						    engine->tlb_inv.reg.reg,
> +						    engine->tlb_inv.done,
> +						    0,
>   						    TLB_INVAL_TIMEOUT_US,
>   						    TLB_INVAL_TIMEOUT_MS,
>   						    NULL);
> @@ -1039,62 +1016,14 @@ static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
>   
>   static void mmio_invalidate_full(struct intel_gt *gt)
>   {
> -	static const i915_reg_t gen8_regs[] = {
> -		[RENDER_CLASS]			= GEN8_RTCR,
> -		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
> -		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
> -		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
> -	};
> -	static const i915_reg_t gen12_regs[] = {
> -		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
> -		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
> -		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
> -		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
> -		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
> -	};
> -	static const i915_mcr_reg_t xehp_regs[] = {
> -		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
> -		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
> -		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
> -		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
> -		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
> -	};
>   	struct drm_i915_private *i915 = gt->i915;
>   	struct intel_uncore *uncore = gt->uncore;
>   	struct intel_engine_cs *engine;
>   	intel_engine_mask_t awake, tmp;
>   	enum intel_engine_id id;
> -	const i915_reg_t *regs;
> -	unsigned int num = 0;
>   	unsigned long flags;
>   
> -	/*
> -	 * New platforms should not be added with catch-all-newer (>=)
> -	 * condition so that any later platform added triggers the below warning
> -	 * and in turn mandates a human cross-check of whether the invalidation
> -	 * flows have compatible semantics.
> -	 *
> -	 * For instance with the 11.00 -> 12.00 transition three out of five
> -	 * respective engine registers were moved to masked type. Then after the
> -	 * 12.00 -> 12.50 transition multi cast handling is required too.
> -	 */
> -
> -	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> -	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> -		regs = NULL;
> -		num = ARRAY_SIZE(xehp_regs);
> -	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> -		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> -		regs = gen12_regs;
> -		num = ARRAY_SIZE(gen12_regs);
> -	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> -		regs = gen8_regs;
> -		num = ARRAY_SIZE(gen8_regs);
> -	} else if (GRAPHICS_VER(i915) < 8) {
> -		return;
> -	}
> -
> -	if (gt_WARN_ONCE(gt, !num, "Platform does not implement TLB invalidation!"))
> +	if (GRAPHICS_VER(i915) < 8)
>   		return;
>   
>   	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
> @@ -1104,33 +1033,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   
>   	awake = 0;
>   	for_each_engine(engine, gt, id) {
> -		struct reg_and_bit rb;
> -
>   		if (!intel_engine_pm_is_awake(engine))
>   			continue;
>   
> -		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> -			u32 val = BIT(engine->instance);
> -
> -			if (engine->class == VIDEO_DECODE_CLASS ||
> -			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
> -			    engine->class == COMPUTE_CLASS)
> -				val = _MASKED_BIT_ENABLE(val);
> +		if (engine->tlb_inv.mcr)
>   			intel_gt_mcr_multicast_write_fw(gt,
> -							xehp_regs[engine->class],
> -							val);
> -		} else {
> -			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -			if (!i915_mmio_reg_offset(rb.reg))
> -				continue;
> -
> -			if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
> -			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
> -			    engine->class == COMPUTE_CLASS))
> -				rb.bit = _MASKED_BIT_ENABLE(rb.bit);
> -
> -			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> -		}
> +							engine->tlb_inv.reg.mcr_reg,
> +							engine->tlb_inv.request);
> +		else
> +			intel_uncore_write_fw(uncore,
> +					      engine->tlb_inv.reg.reg,
> +					      engine->tlb_inv.request);
> +
>   		awake |= engine->mask;
>   	}
>   
> @@ -1149,17 +1063,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   	intel_gt_mcr_unlock(gt, flags);
>   
>   	for_each_engine_masked(engine, gt, awake, tmp) {
> -		struct reg_and_bit rb;
> -
> -		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> -			rb.mcr_reg = xehp_regs[engine->class];
> -			rb.bit = BIT(engine->instance);
> -		} else {
> -			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -		}
> -
> -		if (wait_for_invalidate(gt, rb))
> -			gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n",
> +		if (wait_for_invalidate(engine))
> +			gt_err_ratelimited(gt,
> +					   "%s TLB invalidation did not complete in %ums!\n",
>   					   engine->name, TLB_INVAL_TIMEOUT_MS);
>   	}
>   


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Consolidate TLB invalidation flow
  2023-02-02  7:43 ` [Intel-gfx] [PATCH v3] " Andrzej Hajda
@ 2023-02-02  8:33   ` Tvrtko Ursulin
  2023-02-02  9:39     ` Andrzej Hajda
  0 siblings, 1 reply; 11+ messages in thread
From: Tvrtko Ursulin @ 2023-02-02  8:33 UTC (permalink / raw)
  To: Andrzej Hajda, Intel-gfx, dri-devel


On 02/02/2023 07:43, Andrzej Hajda wrote:
> On 01.02.2023 17:51, Tvrtko Ursulin wrote:

[snip]

>> +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs 
>> *engine)
>> +{
>> +    static const union intel_engine_tlb_inv_reg gen8_regs[] = {
>> +        [RENDER_CLASS].reg        = GEN8_RTCR,
>> +        [VIDEO_DECODE_CLASS].reg    = GEN8_M1TCR, /* , GEN8_M2TCR */
>> +        [VIDEO_ENHANCEMENT_CLASS].reg    = GEN8_VTCR,
>> +        [COPY_ENGINE_CLASS].reg        = GEN8_BTCR,
>> +    };
>> +    static const union intel_engine_tlb_inv_reg gen12_regs[] = {
>> +        [RENDER_CLASS].reg        = GEN12_GFX_TLB_INV_CR,
>> +        [VIDEO_DECODE_CLASS].reg    = GEN12_VD_TLB_INV_CR,
>> +        [VIDEO_ENHANCEMENT_CLASS].reg    = GEN12_VE_TLB_INV_CR,
>> +        [COPY_ENGINE_CLASS].reg        = GEN12_BLT_TLB_INV_CR,
>> +        [COMPUTE_CLASS].reg        = GEN12_COMPCTX_TLB_INV_CR,
>> +    };
>> +    static const union intel_engine_tlb_inv_reg xehp_regs[] = {
>> +        [RENDER_CLASS].mcr_reg          = XEHP_GFX_TLB_INV_CR,
>> +        [VIDEO_DECODE_CLASS].mcr_reg      = XEHP_VD_TLB_INV_CR,
>> +        [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
>> +        [COPY_ENGINE_CLASS].mcr_reg      = XEHP_BLT_TLB_INV_CR,
>> +        [COMPUTE_CLASS].mcr_reg          = XEHP_COMPCTX_TLB_INV_CR,
>> +    };
>> +    struct drm_i915_private *i915 = engine->i915;
>> +    const union intel_engine_tlb_inv_reg *regs;
>> +    union intel_engine_tlb_inv_reg reg;
>> +    unsigned int class = engine->class;
>> +    unsigned int num = 0;
>> +    u32 val;
>> +
>> +    /*
>> +     * New platforms should not be added with catch-all-newer (>=)
>> +     * condition so that any later platform added triggers the below 
>> warning
>> +     * and in turn mandates a human cross-check of whether the 
>> invalidation
>> +     * flows have compatible semantics.
>> +     *
>> +     * For instance with the 11.00 -> 12.00 transition three out of five
>> +     * respective engine registers were moved to masked type. Then 
>> after the
>> +     * 12.00 -> 12.50 transition multi cast handling is required too.
>> +     */
>> +
>> +    if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
>> +        GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
>> +        regs = xehp_regs;
>> +        num = ARRAY_SIZE(xehp_regs);
>> +    } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
>> +           GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
>> +        regs = gen12_regs;
>> +        num = ARRAY_SIZE(gen12_regs);
>> +    } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
>> +        regs = gen8_regs;
>> +        num = ARRAY_SIZE(gen8_regs);
>> +    } else if (GRAPHICS_VER(i915) < 8) {
>> +        return 0;
>> +    }
>> +
>> +    if (gt_WARN_ONCE(engine->gt, !num,
>> +             "Platform does not implement TLB invalidation!"))
>> +        return -ENODEV;
>> +
>> +    if (gt_WARN_ON_ONCE(engine->gt,
>> +                 class >= num ||
>> +                 (!regs[class].reg.reg &&
>> +                  !regs[class].mcr_reg.reg)))
>> +        return -ERANGE;
>> +
>> +    reg = regs[class];
>> +
>> +    if (GRAPHICS_VER(i915) == 8 && class == VIDEO_DECODE_CLASS) {
> 
> As selftest pointed out it should cover also gen 9-11.
> Btw maybe it is worth to convert this pseudo array indexing to direct 
> assignment:
> if ((GRAPHICS_VER(i915) <= 11 && class == VIDEO_DECODE_CLASS && 
> engine->instance == 1) {
>      reg.reg = GEN8_M2TCR;
>      val = 0;
> }

Yes good call, v4 sent.

Btw - do you have any idea why the test is suppressed already?! CI told 
me BAT was a success...

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Consolidate TLB invalidation flow
  2023-02-02  8:33   ` Tvrtko Ursulin
@ 2023-02-02  9:39     ` Andrzej Hajda
  2023-02-15  8:14       ` Tvrtko Ursulin
  0 siblings, 1 reply; 11+ messages in thread
From: Andrzej Hajda @ 2023-02-02  9:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx, dri-devel

On 02.02.2023 09:33, Tvrtko Ursulin wrote:
> 
> On 02/02/2023 07:43, Andrzej Hajda wrote:
>> On 01.02.2023 17:51, Tvrtko Ursulin wrote:
> 
> [snip]
> 
>
> 
> Btw - do you have any idea why the test is suppressed already?! CI told 
> me BAT was a success...


Except this patch, igt@i915_selftest@live@gt_tlb always succeeds[1][2]. 
So I guess this is just CI logic which do not trust new tests, sounds 
reasonable. Lets wait few days to see if it changes.

[1]: 
http://gfx-ci.igk.intel.com/cibuglog-ng/results/all?query_key=d3cc1f04e52acd0f911cd54fd855a3f085a40e14
[2]: 
https://lore.kernel.org/intel-gfx/?q=igt%40i915_selftest%40live%40gt_tlb


Regards
Andrzej

> 
> Regards,
> 
> Tvrtko


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] drm/i915: Consolidate TLB invalidation flow
  2023-02-01 16:51 ` [Intel-gfx] " Tvrtko Ursulin
@ 2023-02-13 18:44   ` Matt Roper
  -1 siblings, 0 replies; 11+ messages in thread
From: Matt Roper @ 2023-02-13 18:44 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx, Andrzej Hajda, dri-devel, Tvrtko Ursulin

On Wed, Feb 01, 2023 at 04:51:46PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> As the logic for selecting the register and corresponsing values grew, the
> code become a bit unsightly. Consolidate by storing the required values at
> engine init time in the engine itself, and by doing so minimise the amount
> of invariant platform and engine checks during each and every TLB
> invalidation.
> 
> v2:
>  * Fail engine probe if TLB invlidations registers are unknown.
> 
> v3:
>  * Rebase.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com> # v1

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  96 +++++++++++++
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 ++
>  drivers/gpu/drm/i915/gt/intel_gt.c           | 138 +++----------------
>  3 files changed, 133 insertions(+), 116 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index d4e29da74612..e430945743ec 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -9,6 +9,7 @@
>  
>  #include "gem/i915_gem_context.h"
>  #include "gem/i915_gem_internal.h"
> +#include "gt/intel_gt_print.h"
>  #include "gt/intel_gt_regs.h"
>  
>  #include "i915_cmd_parser.h"
> @@ -1143,12 +1144,107 @@ static int init_status_page(struct intel_engine_cs *engine)
>  	return ret;
>  }
>  
> +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
> +{
> +	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> +		[RENDER_CLASS].reg		= GEN8_RTCR,
> +		[VIDEO_DECODE_CLASS].reg	= GEN8_M1TCR, /* , GEN8_M2TCR */
> +		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN8_VTCR,
> +		[COPY_ENGINE_CLASS].reg		= GEN8_BTCR,
> +	};
> +	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> +		[RENDER_CLASS].reg		= GEN12_GFX_TLB_INV_CR,
> +		[VIDEO_DECODE_CLASS].reg	= GEN12_VD_TLB_INV_CR,
> +		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN12_VE_TLB_INV_CR,
> +		[COPY_ENGINE_CLASS].reg		= GEN12_BLT_TLB_INV_CR,
> +		[COMPUTE_CLASS].reg		= GEN12_COMPCTX_TLB_INV_CR,
> +	};
> +	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> +		[RENDER_CLASS].mcr_reg		  = XEHP_GFX_TLB_INV_CR,
> +		[VIDEO_DECODE_CLASS].mcr_reg	  = XEHP_VD_TLB_INV_CR,
> +		[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> +		[COPY_ENGINE_CLASS].mcr_reg	  = XEHP_BLT_TLB_INV_CR,
> +		[COMPUTE_CLASS].mcr_reg		  = XEHP_COMPCTX_TLB_INV_CR,
> +	};
> +	struct drm_i915_private *i915 = engine->i915;
> +	const union intel_engine_tlb_inv_reg *regs;
> +	union intel_engine_tlb_inv_reg reg;
> +	unsigned int class = engine->class;
> +	unsigned int num = 0;
> +	u32 val;
> +
> +	/*
> +	 * New platforms should not be added with catch-all-newer (>=)
> +	 * condition so that any later platform added triggers the below warning
> +	 * and in turn mandates a human cross-check of whether the invalidation
> +	 * flows have compatible semantics.
> +	 *
> +	 * For instance with the 11.00 -> 12.00 transition three out of five
> +	 * respective engine registers were moved to masked type. Then after the
> +	 * 12.00 -> 12.50 transition multi cast handling is required too.
> +	 */
> +
> +	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> +	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> +		regs = xehp_regs;
> +		num = ARRAY_SIZE(xehp_regs);
> +	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> +		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> +		regs = gen12_regs;
> +		num = ARRAY_SIZE(gen12_regs);
> +	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> +		regs = gen8_regs;
> +		num = ARRAY_SIZE(gen8_regs);
> +	} else if (GRAPHICS_VER(i915) < 8) {
> +		return 0;
> +	}
> +
> +	if (gt_WARN_ONCE(engine->gt, !num,
> +			 "Platform does not implement TLB invalidation!"))
> +		return -ENODEV;
> +
> +	if (gt_WARN_ON_ONCE(engine->gt,
> +			     class >= num ||
> +			     (!regs[class].reg.reg &&
> +			      !regs[class].mcr_reg.reg)))
> +		return -ERANGE;
> +
> +	reg = regs[class];
> +
> +	if (GRAPHICS_VER(i915) == 8 && class == VIDEO_DECODE_CLASS) {
> +		reg.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> +		val = 0;
> +	} else {
> +		val = engine->instance;
> +	}
> +
> +	val = BIT(val);
> +
> +	engine->tlb_inv.mcr = regs == xehp_regs;
> +	engine->tlb_inv.reg = reg;
> +	engine->tlb_inv.done = val;
> +
> +	if (GRAPHICS_VER(i915) >= 12 &&
> +	    (engine->class == VIDEO_DECODE_CLASS ||
> +	     engine->class == VIDEO_ENHANCEMENT_CLASS ||
> +	     engine->class == COMPUTE_CLASS))
> +		engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
> +	else
> +		engine->tlb_inv.request = val;
> +
> +	return 0;
> +}
> +
>  static int engine_setup_common(struct intel_engine_cs *engine)
>  {
>  	int err;
>  
>  	init_llist_head(&engine->barrier_tasks);
>  
> +	err = intel_engine_init_tlb_invalidation(engine);
> +	if (err)
> +		return err;
> +
>  	err = init_status_page(engine);
>  	if (err)
>  		return err;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 4fd54fb8810f..8c661fe89314 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -341,6 +341,19 @@ struct intel_engine_guc_stats {
>  	u64 start_gt_clk;
>  };
>  
> +union intel_engine_tlb_inv_reg {
> +	i915_reg_t	reg;
> +	i915_mcr_reg_t	mcr_reg;
> +};
> +
> +struct intel_engine_tlb_inv
> +{
> +	bool mcr;
> +	union intel_engine_tlb_inv_reg reg;
> +	u32 request;
> +	u32 done;
> +};
> +
>  struct intel_engine_cs {
>  	struct drm_i915_private *i915;
>  	struct intel_gt *gt;
> @@ -372,6 +385,8 @@ struct intel_engine_cs {
>  	u32 context_size;
>  	u32 mmio_base;
>  
> +	struct intel_engine_tlb_inv tlb_inv;
> +
>  	/*
>  	 * Some w/a require forcewake to be held (which prevents RC6) while
>  	 * a particular engine is active. If so, we set fw_domain to which
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 001a7ec5b861..f7f271708fc7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -982,35 +982,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
>  	intel_sseu_dump(&info->sseu, p);
>  }
>  
> -struct reg_and_bit {
> -	union {
> -		i915_reg_t reg;
> -		i915_mcr_reg_t mcr_reg;
> -	};
> -	u32 bit;
> -};
> -
> -static struct reg_and_bit
> -get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
> -		const i915_reg_t *regs, const unsigned int num)
> -{
> -	const unsigned int class = engine->class;
> -	struct reg_and_bit rb = { };
> -
> -	if (gt_WARN_ON_ONCE(engine->gt, class >= num || !regs[class].reg))
> -		return rb;
> -
> -	rb.reg = regs[class];
> -	if (gen8 && class == VIDEO_DECODE_CLASS)
> -		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> -	else
> -		rb.bit = engine->instance;
> -
> -	rb.bit = BIT(rb.bit);
> -
> -	return rb;
> -}
> -
>  /*
>   * HW architecture suggest typical invalidation time at 40us,
>   * with pessimistic cases up to 100us and a recommendation to
> @@ -1024,14 +995,20 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
>   * but are now considered MCR registers.  Since they exist within a GAM range,
>   * the primary instance of the register rolls up the status from each unit.
>   */
> -static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
> +static int wait_for_invalidate(struct intel_engine_cs *engine)
>  {
> -	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> -		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
> +	if (engine->tlb_inv.mcr)
> +		return intel_gt_mcr_wait_for_reg(engine->gt,
> +						 engine->tlb_inv.reg.mcr_reg,
> +						 engine->tlb_inv.done,
> +						 0,
>  						 TLB_INVAL_TIMEOUT_US,
>  						 TLB_INVAL_TIMEOUT_MS);
>  	else
> -		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
> +		return __intel_wait_for_register_fw(engine->gt->uncore,
> +						    engine->tlb_inv.reg.reg,
> +						    engine->tlb_inv.done,
> +						    0,
>  						    TLB_INVAL_TIMEOUT_US,
>  						    TLB_INVAL_TIMEOUT_MS,
>  						    NULL);
> @@ -1039,62 +1016,14 @@ static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
>  
>  static void mmio_invalidate_full(struct intel_gt *gt)
>  {
> -	static const i915_reg_t gen8_regs[] = {
> -		[RENDER_CLASS]			= GEN8_RTCR,
> -		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
> -		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
> -		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
> -	};
> -	static const i915_reg_t gen12_regs[] = {
> -		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
> -		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
> -		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
> -		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
> -		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
> -	};
> -	static const i915_mcr_reg_t xehp_regs[] = {
> -		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
> -		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
> -		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
> -		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
> -		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
> -	};
>  	struct drm_i915_private *i915 = gt->i915;
>  	struct intel_uncore *uncore = gt->uncore;
>  	struct intel_engine_cs *engine;
>  	intel_engine_mask_t awake, tmp;
>  	enum intel_engine_id id;
> -	const i915_reg_t *regs;
> -	unsigned int num = 0;
>  	unsigned long flags;
>  
> -	/*
> -	 * New platforms should not be added with catch-all-newer (>=)
> -	 * condition so that any later platform added triggers the below warning
> -	 * and in turn mandates a human cross-check of whether the invalidation
> -	 * flows have compatible semantics.
> -	 *
> -	 * For instance with the 11.00 -> 12.00 transition three out of five
> -	 * respective engine registers were moved to masked type. Then after the
> -	 * 12.00 -> 12.50 transition multi cast handling is required too.
> -	 */
> -
> -	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> -	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> -		regs = NULL;
> -		num = ARRAY_SIZE(xehp_regs);
> -	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> -		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> -		regs = gen12_regs;
> -		num = ARRAY_SIZE(gen12_regs);
> -	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> -		regs = gen8_regs;
> -		num = ARRAY_SIZE(gen8_regs);
> -	} else if (GRAPHICS_VER(i915) < 8) {
> -		return;
> -	}
> -
> -	if (gt_WARN_ONCE(gt, !num, "Platform does not implement TLB invalidation!"))
> +	if (GRAPHICS_VER(i915) < 8)
>  		return;
>  
>  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
> @@ -1104,33 +1033,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  
>  	awake = 0;
>  	for_each_engine(engine, gt, id) {
> -		struct reg_and_bit rb;
> -
>  		if (!intel_engine_pm_is_awake(engine))
>  			continue;
>  
> -		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> -			u32 val = BIT(engine->instance);
> -
> -			if (engine->class == VIDEO_DECODE_CLASS ||
> -			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
> -			    engine->class == COMPUTE_CLASS)
> -				val = _MASKED_BIT_ENABLE(val);
> +		if (engine->tlb_inv.mcr)
>  			intel_gt_mcr_multicast_write_fw(gt,
> -							xehp_regs[engine->class],
> -							val);
> -		} else {
> -			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -			if (!i915_mmio_reg_offset(rb.reg))
> -				continue;
> -
> -			if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
> -			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
> -			    engine->class == COMPUTE_CLASS))
> -				rb.bit = _MASKED_BIT_ENABLE(rb.bit);
> -
> -			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> -		}
> +							engine->tlb_inv.reg.mcr_reg,
> +							engine->tlb_inv.request);
> +		else
> +			intel_uncore_write_fw(uncore,
> +					      engine->tlb_inv.reg.reg,
> +					      engine->tlb_inv.request);
> +
>  		awake |= engine->mask;
>  	}
>  
> @@ -1149,17 +1063,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  	intel_gt_mcr_unlock(gt, flags);
>  
>  	for_each_engine_masked(engine, gt, awake, tmp) {
> -		struct reg_and_bit rb;
> -
> -		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> -			rb.mcr_reg = xehp_regs[engine->class];
> -			rb.bit = BIT(engine->instance);
> -		} else {
> -			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -		}
> -
> -		if (wait_for_invalidate(gt, rb))
> -			gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n",
> +		if (wait_for_invalidate(engine))
> +			gt_err_ratelimited(gt,
> +					   "%s TLB invalidation did not complete in %ums!\n",
>  					   engine->name, TLB_INVAL_TIMEOUT_MS);
>  	}
>  
> -- 
> 2.34.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Consolidate TLB invalidation flow
@ 2023-02-13 18:44   ` Matt Roper
  0 siblings, 0 replies; 11+ messages in thread
From: Matt Roper @ 2023-02-13 18:44 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx, Andrzej Hajda, dri-devel

On Wed, Feb 01, 2023 at 04:51:46PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> As the logic for selecting the register and corresponsing values grew, the
> code become a bit unsightly. Consolidate by storing the required values at
> engine init time in the engine itself, and by doing so minimise the amount
> of invariant platform and engine checks during each and every TLB
> invalidation.
> 
> v2:
>  * Fail engine probe if TLB invlidations registers are unknown.
> 
> v3:
>  * Rebase.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com> # v1

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  96 +++++++++++++
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 ++
>  drivers/gpu/drm/i915/gt/intel_gt.c           | 138 +++----------------
>  3 files changed, 133 insertions(+), 116 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index d4e29da74612..e430945743ec 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -9,6 +9,7 @@
>  
>  #include "gem/i915_gem_context.h"
>  #include "gem/i915_gem_internal.h"
> +#include "gt/intel_gt_print.h"
>  #include "gt/intel_gt_regs.h"
>  
>  #include "i915_cmd_parser.h"
> @@ -1143,12 +1144,107 @@ static int init_status_page(struct intel_engine_cs *engine)
>  	return ret;
>  }
>  
> +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
> +{
> +	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> +		[RENDER_CLASS].reg		= GEN8_RTCR,
> +		[VIDEO_DECODE_CLASS].reg	= GEN8_M1TCR, /* , GEN8_M2TCR */
> +		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN8_VTCR,
> +		[COPY_ENGINE_CLASS].reg		= GEN8_BTCR,
> +	};
> +	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> +		[RENDER_CLASS].reg		= GEN12_GFX_TLB_INV_CR,
> +		[VIDEO_DECODE_CLASS].reg	= GEN12_VD_TLB_INV_CR,
> +		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN12_VE_TLB_INV_CR,
> +		[COPY_ENGINE_CLASS].reg		= GEN12_BLT_TLB_INV_CR,
> +		[COMPUTE_CLASS].reg		= GEN12_COMPCTX_TLB_INV_CR,
> +	};
> +	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> +		[RENDER_CLASS].mcr_reg		  = XEHP_GFX_TLB_INV_CR,
> +		[VIDEO_DECODE_CLASS].mcr_reg	  = XEHP_VD_TLB_INV_CR,
> +		[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> +		[COPY_ENGINE_CLASS].mcr_reg	  = XEHP_BLT_TLB_INV_CR,
> +		[COMPUTE_CLASS].mcr_reg		  = XEHP_COMPCTX_TLB_INV_CR,
> +	};
> +	struct drm_i915_private *i915 = engine->i915;
> +	const union intel_engine_tlb_inv_reg *regs;
> +	union intel_engine_tlb_inv_reg reg;
> +	unsigned int class = engine->class;
> +	unsigned int num = 0;
> +	u32 val;
> +
> +	/*
> +	 * New platforms should not be added with catch-all-newer (>=)
> +	 * condition so that any later platform added triggers the below warning
> +	 * and in turn mandates a human cross-check of whether the invalidation
> +	 * flows have compatible semantics.
> +	 *
> +	 * For instance with the 11.00 -> 12.00 transition three out of five
> +	 * respective engine registers were moved to masked type. Then after the
> +	 * 12.00 -> 12.50 transition multi cast handling is required too.
> +	 */
> +
> +	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> +	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> +		regs = xehp_regs;
> +		num = ARRAY_SIZE(xehp_regs);
> +	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> +		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> +		regs = gen12_regs;
> +		num = ARRAY_SIZE(gen12_regs);
> +	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> +		regs = gen8_regs;
> +		num = ARRAY_SIZE(gen8_regs);
> +	} else if (GRAPHICS_VER(i915) < 8) {
> +		return 0;
> +	}
> +
> +	if (gt_WARN_ONCE(engine->gt, !num,
> +			 "Platform does not implement TLB invalidation!"))
> +		return -ENODEV;
> +
> +	if (gt_WARN_ON_ONCE(engine->gt,
> +			     class >= num ||
> +			     (!regs[class].reg.reg &&
> +			      !regs[class].mcr_reg.reg)))
> +		return -ERANGE;
> +
> +	reg = regs[class];
> +
> +	if (GRAPHICS_VER(i915) == 8 && class == VIDEO_DECODE_CLASS) {
> +		reg.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> +		val = 0;
> +	} else {
> +		val = engine->instance;
> +	}
> +
> +	val = BIT(val);
> +
> +	engine->tlb_inv.mcr = regs == xehp_regs;
> +	engine->tlb_inv.reg = reg;
> +	engine->tlb_inv.done = val;
> +
> +	if (GRAPHICS_VER(i915) >= 12 &&
> +	    (engine->class == VIDEO_DECODE_CLASS ||
> +	     engine->class == VIDEO_ENHANCEMENT_CLASS ||
> +	     engine->class == COMPUTE_CLASS))
> +		engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
> +	else
> +		engine->tlb_inv.request = val;
> +
> +	return 0;
> +}
> +
>  static int engine_setup_common(struct intel_engine_cs *engine)
>  {
>  	int err;
>  
>  	init_llist_head(&engine->barrier_tasks);
>  
> +	err = intel_engine_init_tlb_invalidation(engine);
> +	if (err)
> +		return err;
> +
>  	err = init_status_page(engine);
>  	if (err)
>  		return err;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 4fd54fb8810f..8c661fe89314 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -341,6 +341,19 @@ struct intel_engine_guc_stats {
>  	u64 start_gt_clk;
>  };
>  
> +union intel_engine_tlb_inv_reg {
> +	i915_reg_t	reg;
> +	i915_mcr_reg_t	mcr_reg;
> +};
> +
> +struct intel_engine_tlb_inv
> +{
> +	bool mcr;
> +	union intel_engine_tlb_inv_reg reg;
> +	u32 request;
> +	u32 done;
> +};
> +
>  struct intel_engine_cs {
>  	struct drm_i915_private *i915;
>  	struct intel_gt *gt;
> @@ -372,6 +385,8 @@ struct intel_engine_cs {
>  	u32 context_size;
>  	u32 mmio_base;
>  
> +	struct intel_engine_tlb_inv tlb_inv;
> +
>  	/*
>  	 * Some w/a require forcewake to be held (which prevents RC6) while
>  	 * a particular engine is active. If so, we set fw_domain to which
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 001a7ec5b861..f7f271708fc7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -982,35 +982,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
>  	intel_sseu_dump(&info->sseu, p);
>  }
>  
> -struct reg_and_bit {
> -	union {
> -		i915_reg_t reg;
> -		i915_mcr_reg_t mcr_reg;
> -	};
> -	u32 bit;
> -};
> -
> -static struct reg_and_bit
> -get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
> -		const i915_reg_t *regs, const unsigned int num)
> -{
> -	const unsigned int class = engine->class;
> -	struct reg_and_bit rb = { };
> -
> -	if (gt_WARN_ON_ONCE(engine->gt, class >= num || !regs[class].reg))
> -		return rb;
> -
> -	rb.reg = regs[class];
> -	if (gen8 && class == VIDEO_DECODE_CLASS)
> -		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> -	else
> -		rb.bit = engine->instance;
> -
> -	rb.bit = BIT(rb.bit);
> -
> -	return rb;
> -}
> -
>  /*
>   * HW architecture suggest typical invalidation time at 40us,
>   * with pessimistic cases up to 100us and a recommendation to
> @@ -1024,14 +995,20 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
>   * but are now considered MCR registers.  Since they exist within a GAM range,
>   * the primary instance of the register rolls up the status from each unit.
>   */
> -static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
> +static int wait_for_invalidate(struct intel_engine_cs *engine)
>  {
> -	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> -		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
> +	if (engine->tlb_inv.mcr)
> +		return intel_gt_mcr_wait_for_reg(engine->gt,
> +						 engine->tlb_inv.reg.mcr_reg,
> +						 engine->tlb_inv.done,
> +						 0,
>  						 TLB_INVAL_TIMEOUT_US,
>  						 TLB_INVAL_TIMEOUT_MS);
>  	else
> -		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
> +		return __intel_wait_for_register_fw(engine->gt->uncore,
> +						    engine->tlb_inv.reg.reg,
> +						    engine->tlb_inv.done,
> +						    0,
>  						    TLB_INVAL_TIMEOUT_US,
>  						    TLB_INVAL_TIMEOUT_MS,
>  						    NULL);
> @@ -1039,62 +1016,14 @@ static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
>  
>  static void mmio_invalidate_full(struct intel_gt *gt)
>  {
> -	static const i915_reg_t gen8_regs[] = {
> -		[RENDER_CLASS]			= GEN8_RTCR,
> -		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
> -		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
> -		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
> -	};
> -	static const i915_reg_t gen12_regs[] = {
> -		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
> -		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
> -		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
> -		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
> -		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
> -	};
> -	static const i915_mcr_reg_t xehp_regs[] = {
> -		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
> -		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
> -		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
> -		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
> -		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
> -	};
>  	struct drm_i915_private *i915 = gt->i915;
>  	struct intel_uncore *uncore = gt->uncore;
>  	struct intel_engine_cs *engine;
>  	intel_engine_mask_t awake, tmp;
>  	enum intel_engine_id id;
> -	const i915_reg_t *regs;
> -	unsigned int num = 0;
>  	unsigned long flags;
>  
> -	/*
> -	 * New platforms should not be added with catch-all-newer (>=)
> -	 * condition so that any later platform added triggers the below warning
> -	 * and in turn mandates a human cross-check of whether the invalidation
> -	 * flows have compatible semantics.
> -	 *
> -	 * For instance with the 11.00 -> 12.00 transition three out of five
> -	 * respective engine registers were moved to masked type. Then after the
> -	 * 12.00 -> 12.50 transition multi cast handling is required too.
> -	 */
> -
> -	if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> -	    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> -		regs = NULL;
> -		num = ARRAY_SIZE(xehp_regs);
> -	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> -		   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> -		regs = gen12_regs;
> -		num = ARRAY_SIZE(gen12_regs);
> -	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> -		regs = gen8_regs;
> -		num = ARRAY_SIZE(gen8_regs);
> -	} else if (GRAPHICS_VER(i915) < 8) {
> -		return;
> -	}
> -
> -	if (gt_WARN_ONCE(gt, !num, "Platform does not implement TLB invalidation!"))
> +	if (GRAPHICS_VER(i915) < 8)
>  		return;
>  
>  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
> @@ -1104,33 +1033,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  
>  	awake = 0;
>  	for_each_engine(engine, gt, id) {
> -		struct reg_and_bit rb;
> -
>  		if (!intel_engine_pm_is_awake(engine))
>  			continue;
>  
> -		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> -			u32 val = BIT(engine->instance);
> -
> -			if (engine->class == VIDEO_DECODE_CLASS ||
> -			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
> -			    engine->class == COMPUTE_CLASS)
> -				val = _MASKED_BIT_ENABLE(val);
> +		if (engine->tlb_inv.mcr)
>  			intel_gt_mcr_multicast_write_fw(gt,
> -							xehp_regs[engine->class],
> -							val);
> -		} else {
> -			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -			if (!i915_mmio_reg_offset(rb.reg))
> -				continue;
> -
> -			if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
> -			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
> -			    engine->class == COMPUTE_CLASS))
> -				rb.bit = _MASKED_BIT_ENABLE(rb.bit);
> -
> -			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> -		}
> +							engine->tlb_inv.reg.mcr_reg,
> +							engine->tlb_inv.request);
> +		else
> +			intel_uncore_write_fw(uncore,
> +					      engine->tlb_inv.reg.reg,
> +					      engine->tlb_inv.request);
> +
>  		awake |= engine->mask;
>  	}
>  
> @@ -1149,17 +1063,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  	intel_gt_mcr_unlock(gt, flags);
>  
>  	for_each_engine_masked(engine, gt, awake, tmp) {
> -		struct reg_and_bit rb;
> -
> -		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> -			rb.mcr_reg = xehp_regs[engine->class];
> -			rb.bit = BIT(engine->instance);
> -		} else {
> -			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -		}
> -
> -		if (wait_for_invalidate(gt, rb))
> -			gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n",
> +		if (wait_for_invalidate(engine))
> +			gt_err_ratelimited(gt,
> +					   "%s TLB invalidation did not complete in %ums!\n",
>  					   engine->name, TLB_INVAL_TIMEOUT_MS);
>  	}
>  
> -- 
> 2.34.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Consolidate TLB invalidation flow
  2023-02-02  9:39     ` Andrzej Hajda
@ 2023-02-15  8:14       ` Tvrtko Ursulin
  0 siblings, 0 replies; 11+ messages in thread
From: Tvrtko Ursulin @ 2023-02-15  8:14 UTC (permalink / raw)
  To: Andrzej Hajda, Intel-gfx, dri-devel


On 02/02/2023 09:39, Andrzej Hajda wrote:
> On 02.02.2023 09:33, Tvrtko Ursulin wrote:
>>
>> On 02/02/2023 07:43, Andrzej Hajda wrote:
>>> On 01.02.2023 17:51, Tvrtko Ursulin wrote:
>>
>> [snip]
>>
>>
>>
>> Btw - do you have any idea why the test is suppressed already?! CI 
>> told me BAT was a success...
> 
> 
> Except this patch, igt@i915_selftest@live@gt_tlb always succeeds[1][2]. 
> So I guess this is just CI logic which do not trust new tests, sounds 
> reasonable. Lets wait few days to see if it changes.

Did another run and it is all still green. Also have another r-b from 
Matt now. Okay to merge from your point of view?

Regards,

Tvrtko

> [1]: 
> http://gfx-ci.igk.intel.com/cibuglog-ng/results/all?query_key=d3cc1f04e52acd0f911cd54fd855a3f085a40e14
> [2]: 
> https://lore.kernel.org/intel-gfx/?q=igt%40i915_selftest%40live%40gt_tlb
> 
> 
> Regards
> Andrzej
> 
>>
>> Regards,
>>
>> Tvrtko
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-02-15  8:14 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-01 16:51 [PATCH v3] drm/i915: Consolidate TLB invalidation flow Tvrtko Ursulin
2023-02-01 16:51 ` [Intel-gfx] " Tvrtko Ursulin
2023-02-01 18:04 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2023-02-01 18:20 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2023-02-01 19:49 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2023-02-02  7:43 ` [Intel-gfx] [PATCH v3] " Andrzej Hajda
2023-02-02  8:33   ` Tvrtko Ursulin
2023-02-02  9:39     ` Andrzej Hajda
2023-02-15  8:14       ` Tvrtko Ursulin
2023-02-13 18:44 ` Matt Roper
2023-02-13 18:44   ` [Intel-gfx] " Matt Roper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.