All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
@ 2020-04-17 14:44 Mika Kuoppala
  2020-04-17 14:44 ` [Intel-gfx] [PATCH 2/3] drm/i915: Add live selftests for indirect ctx batchbuffers Mika Kuoppala
                   ` (6 more replies)
  0 siblings, 7 replies; 11+ messages in thread
From: Mika Kuoppala @ 2020-04-17 14:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Restoration of a previous timestamp can collide
with updating the timestamp, causing a value corruption.

Combat this issue by using indirect ctx bb to
modify the context image during restoring process.

For render engine, we can preload value into
scratch register. From which we then do the actual
write with LRR. LRR is faster and thus less error prone.
For other engines, no scratch is available so we
must do a more complex sequence of sync and async LRMs.
As the LRM is slower, the probablity of racy write
raises and thus we still see corruption sometimes.

References: HSDES#16010904313
Testcase: igt/i915_selftest/gt_lrc
Suggested-by: Joseph Koston <joseph.koston@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

bug on fix
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   3 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 196 ++++++++++++++----
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |   1 +
 4 files changed, 165 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 07cb83a0d017..c7573d565f58 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -70,6 +70,9 @@ struct intel_context {
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
+
+	u32 ctx_bb_offset;
+
 	u32 tag; /* cookie passed to HW to track this context on submission */
 
 	/* Time on GPU as tracked by the hw. */
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index f04214a54f75..0c2adb4078a7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,7 +138,7 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
-#define   MI_LRI_CS_MMIO		(1<<19)
+#define   MI_LRI_LRM_CS_MMIO		(1<<19)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
 #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
@@ -155,6 +155,7 @@
 #define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
 #define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
 #define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
+#define   MI_LRM_ASYNC			(1<<21)
 #define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE		(1)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 6fbad5e2343f..531884b9050c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -234,7 +234,7 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     const struct intel_ring *ring,
 				     bool close);
 static void
-__execlists_update_reg_state(const struct intel_context *ce,
+__execlists_update_reg_state(struct intel_context *ce,
 			     const struct intel_engine_cs *engine,
 			     u32 head);
 
@@ -537,7 +537,7 @@ static void set_offsets(u32 *regs,
 		if (flags & POSTED)
 			*regs |= MI_LRI_FORCE_POSTED;
 		if (INTEL_GEN(engine->i915) >= 11)
-			*regs |= MI_LRI_CS_MMIO;
+			*regs |= MI_LRI_LRM_CS_MMIO;
 		regs++;
 
 		GEM_BUG_ON(!count);
@@ -3142,8 +3142,152 @@ static void execlists_context_unpin(struct intel_context *ce)
 	i915_gem_object_unpin_map(ce->state->obj);
 }
 
+static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
+{
+	u32 indirect_ctx_offset;
+
+	switch (INTEL_GEN(engine->i915)) {
+	default:
+		MISSING_CASE(INTEL_GEN(engine->i915));
+		fallthrough;
+	case 12:
+		indirect_ctx_offset =
+			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
+	case 11:
+		indirect_ctx_offset =
+			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
+	case 10:
+		indirect_ctx_offset =
+			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
+	case 9:
+		indirect_ctx_offset =
+			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
+	case 8:
+		indirect_ctx_offset =
+			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
+	}
+
+	return indirect_ctx_offset;
+}
+
+static u32 *
+gen12_emit_timestamp_wa_lrm(struct intel_context *ce, u32 *cs)
+{
+	const u32 lrc_offset = i915_ggtt_offset(ce->state) +
+		LRC_STATE_PN * PAGE_SIZE;
+	const u32 lrm = MI_LOAD_REGISTER_MEM_GEN8 |
+		MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
+
+	*cs++ = lrm;
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+	*cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
+	*cs++ = 0;
+
+	*cs++ = lrm | MI_LRM_ASYNC;
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+	*cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
+	*cs++ = 0;
+
+	*cs++ = lrm | MI_LRM_ASYNC;
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+	*cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
+	*cs++ = 0;
+
+	*cs++ = lrm;
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+	*cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
+	*cs++ = 0;
+
+	return cs;
+}
+
+static u32 *
+gen12_emit_timestamp_wa_lrr(struct intel_context *ce, u32 *cs)
+{
+	const u32 lrc_offset = i915_ggtt_offset(ce->state) +
+		LRC_STATE_PN * PAGE_SIZE;
+	const u32 lrm = MI_LOAD_REGISTER_MEM_GEN8 |
+		MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
+	const u32 scratch_reg = 0x2198;
+
+	*cs++ = lrm;
+	*cs++ = scratch_reg;
+	*cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_REG | MI_LRI_LRM_CS_MMIO;
+	*cs++ = scratch_reg;
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+	*cs++ = MI_LOAD_REGISTER_REG | MI_LRI_LRM_CS_MMIO;
+	*cs++ = scratch_reg;
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+	return cs;
+}
+
+static u32 *
+execlists_emit_ctx_bb(struct intel_context *ce,
+		      u32 *(*emit)(struct intel_context *, u32 *))
+{
+	u32 *cs = (void *)(ce->lrc_reg_state) -
+		LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
+	const u32 * const batch_start = cs;
+
+	GEM_DEBUG_BUG_ON(!ce->ctx_bb_offset);
+
+	cs = emit(ce, cs);
+
+	GEM_DEBUG_BUG_ON(cs - batch_start >
+			 (I915_GTT_PAGE_SIZE - 4)/sizeof(*cs));
+
+	return cs;
+}
+
+static void
+execlists_setup_indirect_ctx_bb(struct intel_context *ce,
+				u32 *(*emit)(struct intel_context *, u32 *))
+{
+	const u32 indirect_ptr_offset =
+		INTEL_GEN(ce->engine->i915) >= 12 ?
+		GEN12_CTX_BB_PER_CTX_PTR + 2 :
+		CTX_BB_PER_CTX_PTR + 2;
+	const u32 * const start = (void *)(ce->lrc_reg_state) -
+		LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
+	u32 *cs;
+
+	cs = execlists_emit_ctx_bb(ce, emit);
+
+	while ((unsigned long)cs % CACHELINE_BYTES)
+		*cs++ = MI_NOOP;
+
+	ce->lrc_reg_state[indirect_ptr_offset] =
+		(i915_ggtt_offset(ce->state) + ce->ctx_bb_offset) |
+		(cs - start) * sizeof(*cs) /
+		CACHELINE_BYTES;
+
+	ce->lrc_reg_state[indirect_ptr_offset + 2] =
+		intel_lr_indirect_ctx_offset(ce->engine) << 6;
+}
+
 static void
-__execlists_update_reg_state(const struct intel_context *ce,
+gen12_setup_timestamp_ctx_wa(struct intel_context *ce)
+{
+	if (ce->engine->class == RENDER_CLASS)
+		execlists_setup_indirect_ctx_bb(ce,
+					gen12_emit_timestamp_wa_lrr);
+	else
+		execlists_setup_indirect_ctx_bb(ce,
+						gen12_emit_timestamp_wa_lrm);
+}
+
+static void
+__execlists_update_reg_state(struct intel_context *ce,
 			     const struct intel_engine_cs *engine,
 			     u32 head)
 {
@@ -3164,6 +3308,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
 
 		i915_oa_init_reg_state(ce, engine);
+
+	}
+
+	if (ce->ctx_bb_offset) {
+		/* Mutually exclusive wrt to global indirect bb */
+		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
+		gen12_setup_timestamp_ctx_wa(ce);
 	}
 }
 
@@ -4667,40 +4818,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
-{
-	u32 indirect_ctx_offset;
-
-	switch (INTEL_GEN(engine->i915)) {
-	default:
-		MISSING_CASE(INTEL_GEN(engine->i915));
-		/* fall through */
-	case 12:
-		indirect_ctx_offset =
-			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 11:
-		indirect_ctx_offset =
-			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 10:
-		indirect_ctx_offset =
-			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 9:
-		indirect_ctx_offset =
-			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 8:
-		indirect_ctx_offset =
-			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	}
-
-	return indirect_ctx_offset;
-}
-
-
 static void init_common_reg_state(u32 * const regs,
 				  const struct intel_engine_cs *engine,
 				  const struct intel_ring *ring,
@@ -4867,6 +4984,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 
+	if (INTEL_GEN(engine->i915) == 12) {
+		ce->ctx_bb_offset = context_size;
+		context_size += PAGE_SIZE;
+	}
+
 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index d39b72590e40..bb444614f33b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -32,6 +32,7 @@
 
 /* GEN12+ Reg State Context */
 #define GEN12_CTX_BB_PER_CTX_PTR		(0x12 + 1)
+#define   CTX_BB_PER_CTX_PTR_VALID		BIT(0)
 
 #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
 	u32 *reg_state__ = (reg_state); \
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915: Add live selftests for indirect ctx batchbuffers
  2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
@ 2020-04-17 14:44 ` Mika Kuoppala
  2020-04-17 15:13   ` Chris Wilson
  2020-04-17 14:44 ` [Intel-gfx] [PATCH 3/3] drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL Mika Kuoppala
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 11+ messages in thread
From: Mika Kuoppala @ 2020-04-17 14:44 UTC (permalink / raw)
  To: intel-gfx

Add live selftest for indirect ctx context batchbuffers

Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 152 ++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6f5e35afe1b2..cd7543e10813 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -5363,6 +5363,155 @@ static int live_lrc_isolation(void *arg)
 	return err;
 }
 
+static int ctx_bb_submit_req(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	int err;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	err = i915_request_wait(rq, 0, HZ / 5);
+	if (err < 0)
+		pr_err("%s: request not completed!\n", rq->engine->name);
+
+	i915_request_put(rq);
+
+	return 0;
+}
+
+#define CTX_BB_CANARY_OFFSET (3*1024)
+#define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET/sizeof(u32))
+
+static u32 *
+emit_ctx_bb_canary(struct intel_context *ce, u32 *cs)
+{
+	const u32 ring_start_reg = i915_mmio_reg_offset(RING_START(0));
+	const u32 srm = MI_STORE_REGISTER_MEM_GEN8 |
+		MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
+
+	*cs++ = srm;
+	*cs++ = ring_start_reg;
+	*cs++ = i915_ggtt_offset(ce->state) +
+		ce->ctx_bb_offset + CTX_BB_CANARY_OFFSET;
+	*cs++ = 0;
+
+	return cs;
+}
+
+static void
+ctx_bb_setup(struct intel_context *ce)
+{
+	u32 *cs = (void *)(ce->lrc_reg_state) -
+		LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
+
+	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
+
+	execlists_setup_indirect_ctx_bb(ce, emit_ctx_bb_canary);
+}
+
+static bool check_ring_start(struct intel_context *ce)
+{
+	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
+		LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
+
+	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
+		return true;
+
+	pr_err("ring start mismatch canary 0x%08x vs state 0x%08x\n",
+	       ctx_bb[CTX_BB_CANARY_INDEX],
+	       ce->lrc_reg_state[CTX_RING_START]);
+
+	return false;
+}
+
+static int ctx_bb_check(struct intel_context *ce)
+{
+	int err;
+
+	err = ctx_bb_submit_req(ce);
+	if (err)
+		return err;
+
+	if (!check_ring_start(ce))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __per_ctx_bb(struct intel_engine_cs *engine)
+{
+	struct intel_context *ce1, *ce2;
+	int err = 0;
+
+	ce1 = intel_context_create(engine);
+	ce2 = intel_context_create(engine);
+
+	err = intel_context_pin(ce1);
+	if (err)
+		return err;
+
+	err = intel_context_pin(ce2);
+	if (err) {
+		intel_context_put(ce1);
+		return err;
+	}
+
+	if (!ce1->ctx_bb_offset) {
+		GEM_BUG_ON(ce2->ctx_bb_offset);
+		/* must have */
+		GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
+		goto out;
+	}
+
+	/* make the batch to grab ring start and write it to canary spot */
+	ctx_bb_setup(ce1);
+	ctx_bb_setup(ce2);
+
+	err = ctx_bb_check(ce1);
+	if (err)
+		goto out;
+
+	err = ctx_bb_check(ce2);
+out:
+	intel_context_unpin(ce2);
+	intel_context_put(ce2);
+
+	intel_context_unpin(ce1);
+	intel_context_put(ce1);
+
+	return err;
+}
+
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	for_each_engine(engine, gt, id) {
+
+		intel_engine_pm_get(engine);
+		err = __per_ctx_bb(engine);
+		intel_engine_pm_put(engine);
+
+		if (err)
+			break;
+
+		if (igt_flush_test(gt->i915)) {
+			err = -EIO;
+			break;
+		}
+	}
+
+	return err;
+}
+
 static void garbage_reset(struct intel_engine_cs *engine,
 			  struct i915_request *rq)
 {
@@ -5594,10 +5743,11 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_lrc_fixed),
 		SUBTEST(live_lrc_state),
 		SUBTEST(live_lrc_gpr),
-		SUBTEST(live_lrc_isolation),
+		SUBTEST(live_lrc_indirect_ctx_bb),
 		SUBTEST(live_lrc_timestamp),
 		SUBTEST(live_lrc_garbage),
 		SUBTEST(live_pphwsp_runtime),
+		SUBTEST(live_lrc_isolation),
 	};
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL
  2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
  2020-04-17 14:44 ` [Intel-gfx] [PATCH 2/3] drm/i915: Add live selftests for indirect ctx batchbuffers Mika Kuoppala
@ 2020-04-17 14:44 ` Mika Kuoppala
  2020-04-17 14:55   ` Chris Wilson
  2020-04-17 15:07 ` [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Chris Wilson
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 11+ messages in thread
From: Mika Kuoppala @ 2020-04-17 14:44 UTC (permalink / raw)
  To: intel-gfx

Use indirect ctx bb to load cmd buffer control value
from context image to avoid corruption.

Testcase: igt/i915_selftest/gt_lrc
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c     | 13 +++++++++++--
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h |  1 +
 drivers/gpu/drm/i915/i915_reg.h         |  1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 531884b9050c..45e07209ddf3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3207,7 +3207,7 @@ gen12_emit_timestamp_wa_lrm(struct intel_context *ce, u32 *cs)
 }
 
 static u32 *
-gen12_emit_timestamp_wa_lrr(struct intel_context *ce, u32 *cs)
+gen12_emit_render_ctx_wa(struct intel_context *ce, u32 *cs)
 {
 	const u32 lrc_offset = i915_ggtt_offset(ce->state) +
 		LRC_STATE_PN * PAGE_SIZE;
@@ -3228,6 +3228,15 @@ gen12_emit_timestamp_wa_lrr(struct intel_context *ce, u32 *cs)
 	*cs++ = scratch_reg;
 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
 
+	*cs++ = lrm;
+	*cs++ = scratch_reg;
+	*cs++ = lrc_offset + CTX_CMD_BUF_CCTL * sizeof(u32);
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_REG | MI_LRI_LRM_CS_MMIO;
+	*cs++ = scratch_reg;
+	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
+
 	return cs;
 }
 
@@ -3280,7 +3289,7 @@ gen12_setup_timestamp_ctx_wa(struct intel_context *ce)
 {
 	if (ce->engine->class == RENDER_CLASS)
 		execlists_setup_indirect_ctx_bb(ce,
-					gen12_emit_timestamp_wa_lrr);
+					gen12_emit_render_ctx_wa);
 	else
 		execlists_setup_indirect_ctx_bb(ce,
 						gen12_emit_timestamp_wa_lrm);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index bb444614f33b..6c81a3a815ac 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -27,6 +27,7 @@
 #define CTX_PDP0_UDW			(0x30 + 1)
 #define CTX_PDP0_LDW			(0x32 + 1)
 #define CTX_R_PWR_CLK_STATE		(0x42 + 1)
+#define CTX_CMD_BUF_CCTL		(0xB6 + 1)
 
 #define GEN9_CTX_RING_MI_MODE		0x54
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a24e23e9b3d0..b31ca8e4399f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2657,6 +2657,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define RING_DMA_FADD_UDW(base)	_MMIO((base) + 0x60) /* gen8+ */
 #define RING_INSTPM(base)	_MMIO((base) + 0xc0)
 #define RING_MI_MODE(base)	_MMIO((base) + 0x9c)
+#define RING_CMD_BUF_CCTL(base) _MMIO((base) + 0x84)
 #define INSTPS		_MMIO(0x2070) /* 965+ only */
 #define GEN4_INSTDONE1	_MMIO(0x207c) /* 965+ only, aka INSTDONE_2 on SNB */
 #define ACTHD_I965	_MMIO(0x2074)
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL
  2020-04-17 14:44 ` [Intel-gfx] [PATCH 3/3] drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL Mika Kuoppala
@ 2020-04-17 14:55   ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2020-04-17 14:55 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-04-17 15:44:29)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index bb444614f33b..6c81a3a815ac 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -27,6 +27,7 @@
>  #define CTX_PDP0_UDW                   (0x30 + 1)
>  #define CTX_PDP0_LDW                   (0x32 + 1)
>  #define CTX_R_PWR_CLK_STATE            (0x42 + 1)
> +#define CTX_CMD_BUF_CCTL               (0xB6 + 1)

Can you add this to live_lrc_layout to confirm its location?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
  2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
  2020-04-17 14:44 ` [Intel-gfx] [PATCH 2/3] drm/i915: Add live selftests for indirect ctx batchbuffers Mika Kuoppala
  2020-04-17 14:44 ` [Intel-gfx] [PATCH 3/3] drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL Mika Kuoppala
@ 2020-04-17 15:07 ` Chris Wilson
  2020-04-21 10:01   ` Mika Kuoppala
  2020-04-17 20:29 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] " Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2020-04-17 15:07 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-04-17 15:44:27)
> Restoration of a previous timestamp can collide
> with updating the timestamp, causing a value corruption.
> 
> Combat this issue by using indirect ctx bb to
> modify the context image during restoring process.
> 
> For render engine, we can preload value into
> scratch register. From which we then do the actual
> write with LRR. LRR is faster and thus less error prone.
> For other engines, no scratch is available so we
> must do a more complex sequence of sync and async LRMs.
> As the LRM is slower, the probablity of racy write
> raises and thus we still see corruption sometimes.
> 
> References: HSDES#16010904313
> Testcase: igt/i915_selftest/gt_lrc
> Suggested-by: Joseph Koston <joseph.koston@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> 
> bug on fix
> ---
>  drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   3 +-
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 196 ++++++++++++++----
>  drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |   1 +
>  4 files changed, 165 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 07cb83a0d017..c7573d565f58 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -70,6 +70,9 @@ struct intel_context {
>  
>         u32 *lrc_reg_state;
>         u64 lrc_desc;
> +
> +       u32 ctx_bb_offset;
> +
>         u32 tag; /* cookie passed to HW to track this context on submission */
>  
>         /* Time on GPU as tracked by the hw. */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index f04214a54f75..0c2adb4078a7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -138,7 +138,7 @@
>   */
>  #define MI_LOAD_REGISTER_IMM(x)        MI_INSTR(0x22, 2*(x)-1)
>  /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
> -#define   MI_LRI_CS_MMIO               (1<<19)
> +#define   MI_LRI_LRM_CS_MMIO           (1<<19)
>  #define   MI_LRI_FORCE_POSTED          (1<<12)
>  #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
>  #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
> @@ -155,6 +155,7 @@
>  #define   MI_FLUSH_DW_USE_PPGTT                (0<<2)
>  #define MI_LOAD_REGISTER_MEM      MI_INSTR(0x29, 1)
>  #define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
> +#define   MI_LRM_ASYNC                 (1<<21)
>  #define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
>  #define MI_BATCH_BUFFER                MI_INSTR(0x30, 1)
>  #define   MI_BATCH_NON_SECURE          (1)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 6fbad5e2343f..531884b9050c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -234,7 +234,7 @@ static void execlists_init_reg_state(u32 *reg_state,
>                                      const struct intel_ring *ring,
>                                      bool close);
>  static void
> -__execlists_update_reg_state(const struct intel_context *ce,
> +__execlists_update_reg_state(struct intel_context *ce,
>                              const struct intel_engine_cs *engine,
>                              u32 head);
>  
> @@ -537,7 +537,7 @@ static void set_offsets(u32 *regs,
>                 if (flags & POSTED)
>                         *regs |= MI_LRI_FORCE_POSTED;
>                 if (INTEL_GEN(engine->i915) >= 11)
> -                       *regs |= MI_LRI_CS_MMIO;
> +                       *regs |= MI_LRI_LRM_CS_MMIO;
>                 regs++;
>  
>                 GEM_BUG_ON(!count);
> @@ -3142,8 +3142,152 @@ static void execlists_context_unpin(struct intel_context *ce)
>         i915_gem_object_unpin_map(ce->state->obj);
>  }
>  
> +static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
> +{
> +       u32 indirect_ctx_offset;
> +
> +       switch (INTEL_GEN(engine->i915)) {
> +       default:
> +               MISSING_CASE(INTEL_GEN(engine->i915));
> +               fallthrough;
> +       case 12:
> +               indirect_ctx_offset =
> +                       GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> +               break;
> +       case 11:
> +               indirect_ctx_offset =
> +                       GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> +               break;
> +       case 10:
> +               indirect_ctx_offset =
> +                       GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> +               break;
> +       case 9:
> +               indirect_ctx_offset =
> +                       GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> +               break;
> +       case 8:
> +               indirect_ctx_offset =
> +                       GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> +               break;
> +       }
> +
> +       return indirect_ctx_offset;
> +}
> +
> +static u32 *
> +gen12_emit_timestamp_wa_lrm(struct intel_context *ce, u32 *cs)
> +{
> +       const u32 lrc_offset = i915_ggtt_offset(ce->state) +
> +               LRC_STATE_PN * PAGE_SIZE;
> +       const u32 lrm = MI_LOAD_REGISTER_MEM_GEN8 |
> +               MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
> +
> +       *cs++ = lrm;
> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
> +       *cs++ = 0;
> +
> +       *cs++ = lrm | MI_LRM_ASYNC;
> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
> +       *cs++ = 0;
> +
> +       *cs++ = lrm | MI_LRM_ASYNC;
> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
> +       *cs++ = 0;
> +
> +       *cs++ = lrm;
> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
> +       *cs++ = 0;
> +
> +       return cs;
> +}
> +
> +static u32 *
> +gen12_emit_timestamp_wa_lrr(struct intel_context *ce, u32 *cs)
> +{
> +       const u32 lrc_offset = i915_ggtt_offset(ce->state) +
> +               LRC_STATE_PN * PAGE_SIZE;

Repetitive.

> +       const u32 lrm = MI_LOAD_REGISTER_MEM_GEN8 |
> +               MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;

Used once, might as just use the constant inplace.

> +       const u32 scratch_reg = 0x2198;
> +
> +       *cs++ = lrm;
> +       *cs++ = scratch_reg;
> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
> +       *cs++ = 0;
> +
> +       *cs++ = MI_LOAD_REGISTER_REG | MI_LRI_LRM_CS_MMIO;
> +       *cs++ = scratch_reg;
> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
> +
> +       *cs++ = MI_LOAD_REGISTER_REG | MI_LRI_LRM_CS_MMIO;
> +       *cs++ = scratch_reg;
> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
> +
> +       return cs;
> +}
> +
> +static u32 *
> +execlists_emit_ctx_bb(struct intel_context *ce,
> +                     u32 *(*emit)(struct intel_context *, u32 *))
> +{
> +       u32 *cs = (void *)(ce->lrc_reg_state) -
> +               LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;

static u32 *context_indirect_bb(struct intel_context *ce)
{
	void *ptr;

	GEM_BUG_ON(!intel_context_is_pinned(ce));
	GEM_BUG_ON(!ce->ctx_bb_offset);

	ptr = ce->lrc_reg_state;
	ptr -= LRC_STATE_PN * PAGE_SIZE; /* back to start of context image */
	ptr += ce->ctx_bb_offset;

	return ptr;
}

> +       const u32 * const batch_start = cs;
> +
> +       GEM_DEBUG_BUG_ON(!ce->ctx_bb_offset);
> +
> +       cs = emit(ce, cs);
> +
> +       GEM_DEBUG_BUG_ON(cs - batch_start >
> +                        (I915_GTT_PAGE_SIZE - 4)/sizeof(*cs));
> +
> +       return cs;
> +}
> +
> +static void
> +execlists_setup_indirect_ctx_bb(struct intel_context *ce,
> +                               u32 *(*emit)(struct intel_context *, u32 *))
> +{
> +       const u32 indirect_ptr_offset =
> +               INTEL_GEN(ce->engine->i915) >= 12 ?
> +               GEN12_CTX_BB_PER_CTX_PTR + 2 :
> +               CTX_BB_PER_CTX_PTR + 2;

Do we have this in live_lrc_layout yet?

> +       const u32 * const start = (void *)(ce->lrc_reg_state) -
> +               LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
> +       u32 *cs;
> +
> +       cs = execlists_emit_ctx_bb(ce, emit);
> +
> +       while ((unsigned long)cs % CACHELINE_BYTES)
> +               *cs++ = MI_NOOP;
> +
> +       ce->lrc_reg_state[indirect_ptr_offset] =
> +               (i915_ggtt_offset(ce->state) + ce->ctx_bb_offset) |
> +               (cs - start) * sizeof(*cs) /
> +               CACHELINE_BYTES;
> +
> +       ce->lrc_reg_state[indirect_ptr_offset + 2] =
> +               intel_lr_indirect_ctx_offset(ce->engine) << 6;
> +}
> +
>  static void
> -__execlists_update_reg_state(const struct intel_context *ce,
> +gen12_setup_timestamp_ctx_wa(struct intel_context *ce)
> +{
> +       if (ce->engine->class == RENDER_CLASS)
> +               execlists_setup_indirect_ctx_bb(ce,

I'd be tempted to drop the execlists_ at this depth.

> +                                       gen12_emit_timestamp_wa_lrr);
> +       else
> +               execlists_setup_indirect_ctx_bb(ce,
> +                                               gen12_emit_timestamp_wa_lrm);

	u32 *(*fn)(struct intel_context *ce, u32 *cs);

	fn = gen12_emit_timestamp_wa_lrm;
	if (ce->engine->class == RENDER_CLASS)
		fn = gen12_emit_timestamp_wa_lrr;

	setup_indirect_ctx_bb(ce, fn);


> +static u32 *
> +gen12_emit_timestamp_wa_lrr(struct intel_context *ce, u32 *cs)
> +}
> +
> +static void
> +__execlists_update_reg_state(struct intel_context *ce,
>                              const struct intel_engine_cs *engine,
>                              u32 head)
>  {
> @@ -3164,6 +3308,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
>                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
>  
>                 i915_oa_init_reg_state(ce, engine);
> +
> +       }
> +
> +       if (ce->ctx_bb_offset) {
> +               /* Mutually exclusive wrt to global indirect bb */
> +               GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
> +               gen12_setup_timestamp_ctx_wa(ce);
>         }
>  }
>  
> @@ -4667,40 +4818,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>         return 0;
>  }
>  
> -static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
> -{
> -       u32 indirect_ctx_offset;
> -
> -       switch (INTEL_GEN(engine->i915)) {
> -       default:
> -               MISSING_CASE(INTEL_GEN(engine->i915));
> -               /* fall through */
> -       case 12:
> -               indirect_ctx_offset =
> -                       GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> -               break;
> -       case 11:
> -               indirect_ctx_offset =
> -                       GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> -               break;
> -       case 10:
> -               indirect_ctx_offset =
> -                       GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> -               break;
> -       case 9:
> -               indirect_ctx_offset =
> -                       GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> -               break;
> -       case 8:
> -               indirect_ctx_offset =
> -                       GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
> -               break;
> -       }
> -
> -       return indirect_ctx_offset;
> -}
> -
> -
>  static void init_common_reg_state(u32 * const regs,
>                                   const struct intel_engine_cs *engine,
>                                   const struct intel_ring *ring,
> @@ -4867,6 +4984,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
>         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
>  
> +       if (INTEL_GEN(engine->i915) == 12) {
> +               ce->ctx_bb_offset = context_size;
> +               context_size += PAGE_SIZE;
> +       }
> +
>         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
>         if (IS_ERR(ctx_obj))
>                 return PTR_ERR(ctx_obj);
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index d39b72590e40..bb444614f33b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -32,6 +32,7 @@
>  
>  /* GEN12+ Reg State Context */
>  #define GEN12_CTX_BB_PER_CTX_PTR               (0x12 + 1)
> +#define   CTX_BB_PER_CTX_PTR_VALID             BIT(0)
>  
>  #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
>         u32 *reg_state__ = (reg_state); \

Was I imagining things, but didn't you devise a selftest to prove we
could use the indirect-ctx-bb to do interesting things?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: Add live selftests for indirect ctx batchbuffers
  2020-04-17 14:44 ` [Intel-gfx] [PATCH 2/3] drm/i915: Add live selftests for indirect ctx batchbuffers Mika Kuoppala
@ 2020-04-17 15:13   ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2020-04-17 15:13 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-04-17 15:44:28)
> Add live selftest for indirect ctx context batchbuffers

A quick why about what you want to achieve. (Or just pull into the user
as proof that it works.)

> +static int __per_ctx_bb(struct intel_engine_cs *engine)
> +{
> +       struct intel_context *ce1, *ce2;
> +       int err = 0;

Either here or live_lrc_indirect_ctx_bb a quick summary of operations.
What you are testing and how.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
  2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
                   ` (2 preceding siblings ...)
  2020-04-17 15:07 ` [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Chris Wilson
@ 2020-04-17 20:29 ` Patchwork
  2020-04-17 20:47 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2020-04-17 20:29 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
URL   : https://patchwork.freedesktop.org/series/76102/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
41bd098fd910 drm/i915: Add per ctx batchbuffer wa for timestamp
-:51: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#51: FILE: drivers/gpu/drm/i915/gt/intel_gpu_commands.h:141:
+#define   MI_LRI_LRM_CS_MMIO		(1<<19)
                             		  ^

-:59: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#59: FILE: drivers/gpu/drm/i915/gt/intel_gpu_commands.h:158:
+#define   MI_LRM_ASYNC			(1<<21)
                       			  ^

-:191: CHECK:SPACING: spaces preferred around that '/' (ctx:VxV)
#191: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:3247:
+			 (I915_GTT_PAGE_SIZE - 4)/sizeof(*cs));
 			                         ^

-:228: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#228: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:3283:
+		execlists_setup_indirect_ctx_bb(ce,
+					gen12_emit_timestamp_wa_lrr);

-:244: CHECK:BRACES: Blank lines aren't necessary before a close brace '}'
#244: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:3312:
+
+	}

total: 0 errors, 0 warnings, 5 checks, 264 lines checked
384079efb59c drm/i915: Add live selftests for indirect ctx batchbuffers
-:39: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#39: FILE: drivers/gpu/drm/i915/gt/selftest_lrc.c:5387:
+#define CTX_BB_CANARY_OFFSET (3*1024)
                                ^

-:40: CHECK:SPACING: spaces preferred around that '/' (ctx:VxV)
#40: FILE: drivers/gpu/drm/i915/gt/selftest_lrc.c:5388:
+#define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET/sizeof(u32))
                                                   ^

-:150: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#150: FILE: drivers/gpu/drm/i915/gt/selftest_lrc.c:5498:
+	for_each_engine(engine, gt, id) {
+

total: 0 errors, 0 warnings, 3 checks, 167 lines checked
eae7f1fa6697 drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✗ Fi.CI.DOCS: warning for series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
  2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
                   ` (3 preceding siblings ...)
  2020-04-17 20:29 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] " Patchwork
@ 2020-04-17 20:47 ` Patchwork
  2020-04-17 20:53 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-04-19  8:25 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2020-04-17 20:47 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
URL   : https://patchwork.freedesktop.org/series/76102/
State : warning

== Summary ==

$ make htmldocs 2>&1 > /dev/null | grep i915
/home/cidrm/kernel/Documentation/gpu/i915.rst:610: WARNING: duplicate label gpu/i915:layout, other instance in /home/cidrm/kernel/Documentation/gpu/i915.rst

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
  2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
                   ` (4 preceding siblings ...)
  2020-04-17 20:47 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
@ 2020-04-17 20:53 ` Patchwork
  2020-04-19  8:25 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2020-04-17 20:53 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
URL   : https://patchwork.freedesktop.org/series/76102/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8319 -> Patchwork_17353
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/index.html


Changes
-------

  No changes found


Participating hosts (51 -> 45)
------------------------------

  Missing    (6): fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8319 -> Patchwork_17353

  CI-20190529: 20190529
  CI_DRM_8319: 18043327e8a9cba095bca9f80cdc70900a51f92c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5599: cdb07101dda33e2fcb0f4c2aa199c47159d88f35 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17353: eae7f1fa669766bec86c492f5ce5500a61febd2a @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

eae7f1fa6697 drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL
384079efb59c drm/i915: Add live selftests for indirect ctx batchbuffers
41bd098fd910 drm/i915: Add per ctx batchbuffer wa for timestamp

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
  2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
                   ` (5 preceding siblings ...)
  2020-04-17 20:53 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-04-19  8:25 ` Patchwork
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2020-04-19  8:25 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
URL   : https://patchwork.freedesktop.org/series/76102/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8319_full -> Patchwork_17353_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_17353_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_params@invalid-bsd-ring:
    - shard-iclb:         [PASS][1] -> [SKIP][2] ([fdo#109276])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-iclb1/igt@gem_exec_params@invalid-bsd-ring.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-iclb6/igt@gem_exec_params@invalid-bsd-ring.html

  * igt@kms_cursor_crc@pipe-a-cursor-128x42-random:
    - shard-kbl:          [PASS][3] -> [FAIL][4] ([i915#54] / [i915#93] / [i915#95])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-kbl6/igt@kms_cursor_crc@pipe-a-cursor-128x42-random.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-128x42-random.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-skl:          [PASS][5] -> [INCOMPLETE][6] ([i915#300])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-skl4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-skl5/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_crc@pipe-b-cursor-256x85-sliding:
    - shard-skl:          [PASS][7] -> [FAIL][8] ([i915#54])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-skl8/igt@kms_cursor_crc@pipe-b-cursor-256x85-sliding.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-skl1/igt@kms_cursor_crc@pipe-b-cursor-256x85-sliding.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-kbl:          [PASS][9] -> [INCOMPLETE][10] ([i915#155])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-kbl7/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-kbl4/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_hdr@bpc-switch:
    - shard-skl:          [PASS][11] -> [FAIL][12] ([i915#1188])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-skl5/igt@kms_hdr@bpc-switch.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-skl5/igt@kms_hdr@bpc-switch.html

  * igt@kms_mmap_write_crc@main:
    - shard-kbl:          [PASS][13] -> [FAIL][14] ([i915#93] / [i915#95])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-kbl7/igt@kms_mmap_write_crc@main.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-kbl6/igt@kms_mmap_write_crc@main.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-kbl:          [PASS][15] -> [DMESG-WARN][16] ([i915#180]) +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-kbl3/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-kbl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [PASS][17] -> [FAIL][18] ([fdo#108145] / [i915#265]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-skl4/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-skl5/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [PASS][19] -> [FAIL][20] ([i915#31])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-kbl2/igt@kms_setmode@basic.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-kbl4/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-apl:          [PASS][21] -> [DMESG-WARN][22] ([i915#180]) +3 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-apl8/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-apl1/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  
#### Possible fixes ####

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [DMESG-WARN][23] ([i915#716]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-glk6/igt@gen9_exec_parse@allowed-all.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-glk1/igt@gen9_exec_parse@allowed-all.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][25] ([i915#454]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-iclb2/igt@i915_pm_dc@dc6-psr.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-iclb2/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-snb:          [FAIL][27] ([i915#1066]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-snb2/igt@i915_pm_rc6_residency@rc6-idle.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-snb5/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [DMESG-WARN][29] ([i915#180]) -> [PASS][30] +6 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-apl:          [DMESG-WARN][31] ([i915#180]) -> [PASS][32] +1 similar issue
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-apl4/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-apl6/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_draw_crc@draw-method-rgb565-mmap-wc-untiled:
    - shard-glk:          [FAIL][33] ([i915#52] / [i915#54]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-glk4/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-untiled.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-glk5/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-untiled.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-skl:          [INCOMPLETE][35] ([i915#69]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-skl6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-skl5/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-iclb:         [SKIP][37] ([fdo#109441]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-iclb5/igt@kms_psr@psr2_no_drrs.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-iclb2/igt@kms_psr@psr2_no_drrs.html

  * {igt@perf@blocking-parameterized}:
    - shard-iclb:         [FAIL][39] ([i915#1542]) -> [PASS][40] +1 similar issue
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-iclb1/igt@perf@blocking-parameterized.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-iclb6/igt@perf@blocking-parameterized.html

  
#### Warnings ####

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [SKIP][41] ([i915#468]) -> [FAIL][42] ([i915#454])
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-tglb2/igt@i915_pm_dc@dc6-psr.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-tglb1/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_plane_alpha_blend@pipe-c-alpha-7efc:
    - shard-apl:          [FAIL][43] ([fdo#108145] / [i915#265] / [i915#95]) -> [FAIL][44] ([fdo#108145] / [i915#265])
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8319/shard-apl7/igt@kms_plane_alpha_blend@pipe-c-alpha-7efc.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/shard-apl3/igt@kms_plane_alpha_blend@pipe-c-alpha-7efc.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#1066]: https://gitlab.freedesktop.org/drm/intel/issues/1066
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#155]: https://gitlab.freedesktop.org/drm/intel/issues/155
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#300]: https://gitlab.freedesktop.org/drm/intel/issues/300
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
  [i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8319 -> Patchwork_17353

  CI-20190529: 20190529
  CI_DRM_8319: 18043327e8a9cba095bca9f80cdc70900a51f92c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5599: cdb07101dda33e2fcb0f4c2aa199c47159d88f35 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17353: eae7f1fa669766bec86c492f5ce5500a61febd2a @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17353/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp
  2020-04-17 15:07 ` [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Chris Wilson
@ 2020-04-21 10:01   ` Mika Kuoppala
  0 siblings, 0 replies; 11+ messages in thread
From: Mika Kuoppala @ 2020-04-21 10:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2020-04-17 15:44:27)
>> Restoration of a previous timestamp can collide
>> with updating the timestamp, causing a value corruption.
>> 
>> Combat this issue by using indirect ctx bb to
>> modify the context image during restoring process.
>> 
>> For render engine, we can preload value into
>> scratch register. From which we then do the actual
>> write with LRR. LRR is faster and thus less error prone.
>> For other engines, no scratch is available so we
>> must do a more complex sequence of sync and async LRMs.
>> As the LRM is slower, the probablity of racy write
>> raises and thus we still see corruption sometimes.
>> 
>> References: HSDES#16010904313
>> Testcase: igt/i915_selftest/gt_lrc
>> Suggested-by: Joseph Koston <joseph.koston@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> 
>> bug on fix
>> ---
>>  drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +
>>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   3 +-
>>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 196 ++++++++++++++----
>>  drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |   1 +
>>  4 files changed, 165 insertions(+), 38 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index 07cb83a0d017..c7573d565f58 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -70,6 +70,9 @@ struct intel_context {
>>  
>>         u32 *lrc_reg_state;
>>         u64 lrc_desc;
>> +
>> +       u32 ctx_bb_offset;
>> +
>>         u32 tag; /* cookie passed to HW to track this context on submission */
>>  
>>         /* Time on GPU as tracked by the hw. */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> index f04214a54f75..0c2adb4078a7 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> @@ -138,7 +138,7 @@
>>   */
>>  #define MI_LOAD_REGISTER_IMM(x)        MI_INSTR(0x22, 2*(x)-1)
>>  /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
>> -#define   MI_LRI_CS_MMIO               (1<<19)
>> +#define   MI_LRI_LRM_CS_MMIO           (1<<19)
>>  #define   MI_LRI_FORCE_POSTED          (1<<12)
>>  #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
>>  #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
>> @@ -155,6 +155,7 @@
>>  #define   MI_FLUSH_DW_USE_PPGTT                (0<<2)
>>  #define MI_LOAD_REGISTER_MEM      MI_INSTR(0x29, 1)
>>  #define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
>> +#define   MI_LRM_ASYNC                 (1<<21)
>>  #define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
>>  #define MI_BATCH_BUFFER                MI_INSTR(0x30, 1)
>>  #define   MI_BATCH_NON_SECURE          (1)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index 6fbad5e2343f..531884b9050c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -234,7 +234,7 @@ static void execlists_init_reg_state(u32 *reg_state,
>>                                      const struct intel_ring *ring,
>>                                      bool close);
>>  static void
>> -__execlists_update_reg_state(const struct intel_context *ce,
>> +__execlists_update_reg_state(struct intel_context *ce,
>>                              const struct intel_engine_cs *engine,
>>                              u32 head);
>>  
>> @@ -537,7 +537,7 @@ static void set_offsets(u32 *regs,
>>                 if (flags & POSTED)
>>                         *regs |= MI_LRI_FORCE_POSTED;
>>                 if (INTEL_GEN(engine->i915) >= 11)
>> -                       *regs |= MI_LRI_CS_MMIO;
>> +                       *regs |= MI_LRI_LRM_CS_MMIO;
>>                 regs++;
>>  
>>                 GEM_BUG_ON(!count);
>> @@ -3142,8 +3142,152 @@ static void execlists_context_unpin(struct intel_context *ce)
>>         i915_gem_object_unpin_map(ce->state->obj);
>>  }
>>  
>> +static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
>> +{
>> +       u32 indirect_ctx_offset;
>> +
>> +       switch (INTEL_GEN(engine->i915)) {
>> +       default:
>> +               MISSING_CASE(INTEL_GEN(engine->i915));
>> +               fallthrough;
>> +       case 12:
>> +               indirect_ctx_offset =
>> +                       GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> +               break;
>> +       case 11:
>> +               indirect_ctx_offset =
>> +                       GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> +               break;
>> +       case 10:
>> +               indirect_ctx_offset =
>> +                       GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> +               break;
>> +       case 9:
>> +               indirect_ctx_offset =
>> +                       GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> +               break;
>> +       case 8:
>> +               indirect_ctx_offset =
>> +                       GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> +               break;
>> +       }
>> +
>> +       return indirect_ctx_offset;
>> +}
>> +
>> +static u32 *
>> +gen12_emit_timestamp_wa_lrm(struct intel_context *ce, u32 *cs)
>> +{
>> +       const u32 lrc_offset = i915_ggtt_offset(ce->state) +
>> +               LRC_STATE_PN * PAGE_SIZE;
>> +       const u32 lrm = MI_LOAD_REGISTER_MEM_GEN8 |
>> +               MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
>> +
>> +       *cs++ = lrm;
>> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
>> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
>> +       *cs++ = 0;
>> +
>> +       *cs++ = lrm | MI_LRM_ASYNC;
>> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
>> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
>> +       *cs++ = 0;
>> +
>> +       *cs++ = lrm | MI_LRM_ASYNC;
>> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
>> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
>> +       *cs++ = 0;
>> +
>> +       *cs++ = lrm;
>> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
>> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
>> +       *cs++ = 0;
>> +
>> +       return cs;
>> +}
>> +
>> +static u32 *
>> +gen12_emit_timestamp_wa_lrr(struct intel_context *ce, u32 *cs)
>> +{
>> +       const u32 lrc_offset = i915_ggtt_offset(ce->state) +
>> +               LRC_STATE_PN * PAGE_SIZE;
>
> Repetitive.

I will introduce LRC_STATE_OFFSET driverwide :P

>
>> +       const u32 lrm = MI_LOAD_REGISTER_MEM_GEN8 |
>> +               MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
>
> Used once, might as just use the constant inplace.
>
>> +       const u32 scratch_reg = 0x2198;
>> +
>> +       *cs++ = lrm;
>> +       *cs++ = scratch_reg;
>> +       *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
>> +       *cs++ = 0;
>> +
>> +       *cs++ = MI_LOAD_REGISTER_REG | MI_LRI_LRM_CS_MMIO;
>> +       *cs++ = scratch_reg;
>> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
>> +
>> +       *cs++ = MI_LOAD_REGISTER_REG | MI_LRI_LRM_CS_MMIO;
>> +       *cs++ = scratch_reg;
>> +       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
>> +
>> +       return cs;
>> +}
>> +
>> +static u32 *
>> +execlists_emit_ctx_bb(struct intel_context *ce,
>> +                     u32 *(*emit)(struct intel_context *, u32 *))
>> +{
>> +       u32 *cs = (void *)(ce->lrc_reg_state) -
>> +               LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
>
> static u32 *context_indirect_bb(struct intel_context *ce)
> {
> 	void *ptr;
>
> 	GEM_BUG_ON(!intel_context_is_pinned(ce));

This happens during pinning as we need different
contents for each pinned location.

Apparently the pinned state will be set only
after until all these preparations are done.

So I can't use
GEM_BUG_ON(!intel_context_is_pinned(ce));
on this stage.

> 	GEM_BUG_ON(!ce->ctx_bb_offset);
>
> 	ptr = ce->lrc_reg_state;
> 	ptr -= LRC_STATE_PN * PAGE_SIZE; /* back to start of context image */
> 	ptr += ce->ctx_bb_offset;
>
> 	return ptr;
> }
>
>> +       const u32 * const batch_start = cs;
>> +
>> +       GEM_DEBUG_BUG_ON(!ce->ctx_bb_offset);
>> +
>> +       cs = emit(ce, cs);
>> +
>> +       GEM_DEBUG_BUG_ON(cs - batch_start >
>> +                        (I915_GTT_PAGE_SIZE - 4)/sizeof(*cs));
>> +
>> +       return cs;
>> +}
>> +
>> +static void
>> +execlists_setup_indirect_ctx_bb(struct intel_context *ce,
>> +                               u32 *(*emit)(struct intel_context *, u32 *))
>> +{
>> +       const u32 indirect_ptr_offset =
>> +               INTEL_GEN(ce->engine->i915) >= 12 ?
>> +               GEN12_CTX_BB_PER_CTX_PTR + 2 :
>> +               CTX_BB_PER_CTX_PTR + 2;
>
> Do we have this in live_lrc_layout yet?

Yes it is there.

>
>> +       const u32 * const start = (void *)(ce->lrc_reg_state) -
>> +               LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
>> +       u32 *cs;
>> +
>> +       cs = execlists_emit_ctx_bb(ce, emit);
>> +
>> +       while ((unsigned long)cs % CACHELINE_BYTES)
>> +               *cs++ = MI_NOOP;
>> +
>> +       ce->lrc_reg_state[indirect_ptr_offset] =
>> +               (i915_ggtt_offset(ce->state) + ce->ctx_bb_offset) |
>> +               (cs - start) * sizeof(*cs) /
>> +               CACHELINE_BYTES;
>> +
>> +       ce->lrc_reg_state[indirect_ptr_offset + 2] =
>> +               intel_lr_indirect_ctx_offset(ce->engine) << 6;
>> +}
>> +
>>  static void
>> -__execlists_update_reg_state(const struct intel_context *ce,
>> +gen12_setup_timestamp_ctx_wa(struct intel_context *ce)
>> +{
>> +       if (ce->engine->class == RENDER_CLASS)
>> +               execlists_setup_indirect_ctx_bb(ce,
>
> I'd be tempted to drop the execlists_ at this depth.

Dropped.

>
>> +                                       gen12_emit_timestamp_wa_lrr);
>> +       else
>> +               execlists_setup_indirect_ctx_bb(ce,
>> +                                               gen12_emit_timestamp_wa_lrm);
>
> 	u32 *(*fn)(struct intel_context *ce, u32 *cs);
>
> 	fn = gen12_emit_timestamp_wa_lrm;
> 	if (ce->engine->class == RENDER_CLASS)
> 		fn = gen12_emit_timestamp_wa_lrr;
>
> 	setup_indirect_ctx_bb(ce, fn);
>
>
>> +static u32 *
>> +gen12_emit_timestamp_wa_lrr(struct intel_context *ce, u32 *cs)
>> +}
>> +
>> +static void
>> +__execlists_update_reg_state(struct intel_context *ce,
>>                              const struct intel_engine_cs *engine,
>>                              u32 head)
>>  {
>> @@ -3164,6 +3308,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
>>                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
>>  
>>                 i915_oa_init_reg_state(ce, engine);
>> +
>> +       }
>> +
>> +       if (ce->ctx_bb_offset) {
>> +               /* Mutually exclusive wrt to global indirect bb */
>> +               GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
>> +               gen12_setup_timestamp_ctx_wa(ce);
>>         }
>>  }
>>  
>> @@ -4667,40 +4818,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>>         return 0;
>>  }
>>  
>> -static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
>> -{
>> -       u32 indirect_ctx_offset;
>> -
>> -       switch (INTEL_GEN(engine->i915)) {
>> -       default:
>> -               MISSING_CASE(INTEL_GEN(engine->i915));
>> -               /* fall through */
>> -       case 12:
>> -               indirect_ctx_offset =
>> -                       GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> -               break;
>> -       case 11:
>> -               indirect_ctx_offset =
>> -                       GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> -               break;
>> -       case 10:
>> -               indirect_ctx_offset =
>> -                       GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> -               break;
>> -       case 9:
>> -               indirect_ctx_offset =
>> -                       GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> -               break;
>> -       case 8:
>> -               indirect_ctx_offset =
>> -                       GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
>> -               break;
>> -       }
>> -
>> -       return indirect_ctx_offset;
>> -}
>> -
>> -
>>  static void init_common_reg_state(u32 * const regs,
>>                                   const struct intel_engine_cs *engine,
>>                                   const struct intel_ring *ring,
>> @@ -4867,6 +4984,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
>>         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>>                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
>>  
>> +       if (INTEL_GEN(engine->i915) == 12) {
>> +               ce->ctx_bb_offset = context_size;
>> +               context_size += PAGE_SIZE;
>> +       }
>> +
>>         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
>>         if (IS_ERR(ctx_obj))
>>                 return PTR_ERR(ctx_obj);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> index d39b72590e40..bb444614f33b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> @@ -32,6 +32,7 @@
>>  
>>  /* GEN12+ Reg State Context */
>>  #define GEN12_CTX_BB_PER_CTX_PTR               (0x12 + 1)
>> +#define   CTX_BB_PER_CTX_PTR_VALID             BIT(0)
>>  
>>  #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
>>         u32 *reg_state__ = (reg_state); \
>
> Was I imagining things, but didn't you devise a selftest to prove we
> could use the indirect-ctx-bb to do interesting things?

I have simple selftest and then some followup patches to show
how we can mold the context image.
-Mika

> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-04-21 10:03 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-17 14:44 [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Mika Kuoppala
2020-04-17 14:44 ` [Intel-gfx] [PATCH 2/3] drm/i915: Add live selftests for indirect ctx batchbuffers Mika Kuoppala
2020-04-17 15:13   ` Chris Wilson
2020-04-17 14:44 ` [Intel-gfx] [PATCH 3/3] drm/i915: Use indirect ctx bb to mend CMD_BUF_CCTL Mika Kuoppala
2020-04-17 14:55   ` Chris Wilson
2020-04-17 15:07 ` [Intel-gfx] [PATCH 1/3] drm/i915: Add per ctx batchbuffer wa for timestamp Chris Wilson
2020-04-21 10:01   ` Mika Kuoppala
2020-04-17 20:29 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] " Patchwork
2020-04-17 20:47 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-04-17 20:53 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-19  8:25 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.