intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915: Introduce Wa_14011274333
@ 2023-07-12 23:34 Matt Atwood
  2023-07-13  0:04 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Matt Atwood @ 2023-07-12 23:34 UTC (permalink / raw)
  To: intel-gfx, matthew.d.roper

Wa_14011274333 applies to RKL, ADL-S, ADL-P and TGL. ALlocate buffer
pinned to GGTT and add WA to restore impacted registers.

v2: use correct lineage number, more generically apply workarounds for
all registers impacted, move workaround to gt/intel_workarounds.c
(MattR)

Based off patch by Tilak Tangudu.

Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h     |  5 ++
 drivers/gpu/drm/i915/gt/intel_rc6.c         | 63 +++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_rc6_types.h   |  3 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 40 +++++++++++++
 4 files changed, 111 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 718cb2c80f79e..eaee35ecbc8d3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -83,6 +83,11 @@
 #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
 #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
 
+#define CTX_WA_PTR				_MMIO(0x2058)
+#define  CTX_WA_PTR_ADDR_MASK			REG_GENMASK(31,12)
+#define  CTX_WA_TYPE_MASK			REG_GENMASK(4,3)
+#define  CTX_WA_VALID				REG_BIT(0)
+
 #define IPEIR_I965				_MMIO(0x2064)
 #define IPEHR_I965				_MMIO(0x2068)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 58bb1c55294c9..6baa341814da7 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -14,6 +14,7 @@
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_regs.h"
+#include "intel_gpu_commands.h"
 #include "intel_pcode.h"
 #include "intel_rc6.h"
 
@@ -53,6 +54,65 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
 	return rc6_to_gt(rc)->i915;
 }
 
+static int rc6_wa_bb_ctx_init(struct intel_rc6 *rc6)
+{
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
+	struct intel_gt *gt = rc6_to_gt(rc6);
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	void *batch;
+	struct i915_gem_ww_ctx ww;
+	int err;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)){
+		err = PTR_ERR(vma);
+		goto err;
+	}
+	rc6->vma=vma;
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = i915_gem_object_lock(obj, &ww);
+	if (!err)
+		err = i915_ggtt_pin(rc6->vma, &ww, 0, PIN_HIGH);
+	if (err)
+		goto err_ww_fini;
+
+	batch = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err_unpin;
+	}
+	rc6->rc6_wa_bb = batch;
+	return 0;
+err_unpin:
+	if (err)
+		i915_vma_unpin(rc6->vma);
+err_ww_fini:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+
+	if (err)
+		i915_vma_put(rc6->vma);
+err:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+void rc6_wa_bb_ctx_wa_fini(struct intel_rc6 *rc6)
+{
+	i915_vma_unpin(rc6->vma);
+	i915_vma_put(rc6->vma);
+}
+
 static void gen11_rc6_enable(struct intel_rc6 *rc6)
 {
 	struct intel_gt *gt = rc6_to_gt(rc6);
@@ -616,6 +676,9 @@ void intel_rc6_init(struct intel_rc6 *rc6)
 		err = chv_rc6_init(rc6);
 	else if (IS_VALLEYVIEW(i915))
 		err = vlv_rc6_init(rc6);
+	else if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 0)) &&
+		 (GRAPHICS_VER_FULL(i915) <= IP_VER(12, 70)))
+		err = rc6_wa_bb_ctx_init(rc6);
 	else
 		err = 0;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index cd4587098162a..643fd4e839ad4 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -33,6 +33,9 @@ struct intel_rc6 {
 
 	struct drm_i915_gem_object *pctx;
 
+	u32 *rc6_wa_bb;
+	struct i915_vma *vma;
+
 	bool supported : 1;
 	bool enabled : 1;
 	bool manual : 1;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4d2dece960115..d20afb318d857 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -14,6 +14,7 @@
 #include "intel_gt_regs.h"
 #include "intel_ring.h"
 #include "intel_workarounds.h"
+#include "intel_rc6.h"
 
 /**
  * DOC: Hardware workarounds
@@ -3132,6 +3133,41 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 			   true);
 }
 
+static void
+populate_wa_bb_ctx(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+	struct intel_rc6 *rc6 = &engine->gt->rc6;
+	struct intel_uncore *uncore = engine->gt->uncore;
+	const struct i915_wa *wa;
+	int i;
+	u32 *rc6_wa_bb;
+
+	if (!rc6->vma->obj) return;
+
+	rc6_wa_bb = rc6->rc6_wa_bb;
+	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+		if (i915_mmio_reg_offset(wa->mcr_reg) ==
+		    i915_mmio_reg_offset(GEN8_HALF_SLICE_CHICKEN1) ||
+		    ((i915_mmio_reg_offset(wa->mcr_reg) >=
+		      i915_mmio_reg_offset(HALF_SLICE_CHICKEN2)) &&
+		     (i915_mmio_reg_offset(wa->mcr_reg) <=
+		      i915_mmio_reg_offset(GEN9_HALF_SLICE_CHICKEN7)))) {
+			*rc6_wa_bb++ = MI_NOOP;
+			*rc6_wa_bb++ = MI_LOAD_REGISTER_IMM(1);
+			*rc6_wa_bb++ = i915_mmio_reg_offset(wa->mcr_reg);
+			*rc6_wa_bb++ = wa->set;
+			*rc6_wa_bb++ = MI_NOOP;
+                }
+	}
+	*rc6_wa_bb++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(rc6->vma->obj);
+	intel_uncore_write(uncore, CTX_WA_PTR,
+			   REG_FIELD_PREP(CTX_WA_PTR_ADDR_MASK,
+					  i915_vma_offset(rc6->vma) & GENMASK(19, 0)) |
+			   CTX_WA_VALID);
+}
+
 static void
 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
@@ -3154,6 +3190,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
 		rcs_engine_wa_init(engine, wal);
 	else
 		xcs_engine_wa_init(engine, wal);
+
+	if ((GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 0)) &&
+	    (GRAPHICS_VER_FULL(engine->i915) <= IP_VER(12, 70)))
+		populate_wa_bb_ctx(engine, wal);
 }
 
 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: failure for drm/i915: Introduce Wa_14011274333
  2023-07-12 23:34 [Intel-gfx] [PATCH] drm/i915: Introduce Wa_14011274333 Matt Atwood
@ 2023-07-13  0:04 ` Patchwork
  2023-07-13 12:00 ` [Intel-gfx] [PATCH] " Lionel Landwerlin
  2023-07-13 17:25 ` Matt Roper
  2 siblings, 0 replies; 4+ messages in thread
From: Patchwork @ 2023-07-13  0:04 UTC (permalink / raw)
  To: Matt Atwood; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Introduce Wa_14011274333
URL   : https://patchwork.freedesktop.org/series/120644/
State : failure

== Summary ==

Error: make failed
  CALL    scripts/checksyscalls.sh
  DESCEND objtool
  INSTALL libsubcmd_headers
  CC [M]  drivers/gpu/drm/i915/gt/intel_rc6.o
drivers/gpu/drm/i915/gt/intel_rc6.c:110:6: error: no previous prototype for ‘rc6_wa_bb_ctx_wa_fini’ [-Werror=missing-prototypes]
  110 | void rc6_wa_bb_ctx_wa_fini(struct intel_rc6 *rc6)
      |      ^~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
make[6]: *** [scripts/Makefile.build:243: drivers/gpu/drm/i915/gt/intel_rc6.o] Error 1
make[5]: *** [scripts/Makefile.build:477: drivers/gpu/drm/i915] Error 2
make[4]: *** [scripts/Makefile.build:477: drivers/gpu/drm] Error 2
make[3]: *** [scripts/Makefile.build:477: drivers/gpu] Error 2
make[2]: *** [scripts/Makefile.build:477: drivers] Error 2
make[1]: *** [/home/kbuild/kernel/Makefile:2020: .] Error 2
make: *** [Makefile:234: __sub-make] Error 2
Build failed, no error log produced



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Introduce Wa_14011274333
  2023-07-12 23:34 [Intel-gfx] [PATCH] drm/i915: Introduce Wa_14011274333 Matt Atwood
  2023-07-13  0:04 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
@ 2023-07-13 12:00 ` Lionel Landwerlin
  2023-07-13 17:25 ` Matt Roper
  2 siblings, 0 replies; 4+ messages in thread
From: Lionel Landwerlin @ 2023-07-13 12:00 UTC (permalink / raw)
  To: Matt Atwood, intel-gfx, matthew.d.roper

On 13/07/2023 02:34, Matt Atwood wrote:
> Wa_14011274333 applies to RKL, ADL-S, ADL-P and TGL. ALlocate buffer
> pinned to GGTT and add WA to restore impacted registers.
>
> v2: use correct lineage number, more generically apply workarounds for
> all registers impacted, move workaround to gt/intel_workarounds.c
> (MattR)
>
> Based off patch by Tilak Tangudu.
>
> Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>


I applied this patch to drm-tip and as far as I can tell it doesn't fix 
the problem of the SAMPLER_MODE register loosing its bit0 programming.


-Lionel


> ---
>   drivers/gpu/drm/i915/gt/intel_gt_regs.h     |  5 ++
>   drivers/gpu/drm/i915/gt/intel_rc6.c         | 63 +++++++++++++++++++++
>   drivers/gpu/drm/i915/gt/intel_rc6_types.h   |  3 +
>   drivers/gpu/drm/i915/gt/intel_workarounds.c | 40 +++++++++++++
>   4 files changed, 111 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 718cb2c80f79e..eaee35ecbc8d3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -83,6 +83,11 @@
>   #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
>   #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
>   
> +#define CTX_WA_PTR				_MMIO(0x2058)
> +#define  CTX_WA_PTR_ADDR_MASK			REG_GENMASK(31,12)
> +#define  CTX_WA_TYPE_MASK			REG_GENMASK(4,3)
> +#define  CTX_WA_VALID				REG_BIT(0)
> +
>   #define IPEIR_I965				_MMIO(0x2064)
>   #define IPEHR_I965				_MMIO(0x2068)
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
> index 58bb1c55294c9..6baa341814da7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
> @@ -14,6 +14,7 @@
>   #include "intel_gt.h"
>   #include "intel_gt_pm.h"
>   #include "intel_gt_regs.h"
> +#include "intel_gpu_commands.h"
>   #include "intel_pcode.h"
>   #include "intel_rc6.h"
>   
> @@ -53,6 +54,65 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
>   	return rc6_to_gt(rc)->i915;
>   }
>   
> +static int rc6_wa_bb_ctx_init(struct intel_rc6 *rc6)
> +{
> +	struct drm_i915_private *i915 = rc6_to_i915(rc6);
> +	struct intel_gt *gt = rc6_to_gt(rc6);
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +	void *batch;
> +	struct i915_gem_ww_ctx ww;
> +	int err;
> +
> +	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
> +
> +	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
> +	if (IS_ERR(vma)){
> +		err = PTR_ERR(vma);
> +		goto err;
> +	}
> +	rc6->vma=vma;
> +	i915_gem_ww_ctx_init(&ww, true);
> +retry:
> +	err = i915_gem_object_lock(obj, &ww);
> +	if (!err)
> +		err = i915_ggtt_pin(rc6->vma, &ww, 0, PIN_HIGH);
> +	if (err)
> +		goto err_ww_fini;
> +
> +	batch = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	if (IS_ERR(batch)) {
> +		err = PTR_ERR(batch);
> +		goto err_unpin;
> +	}
> +	rc6->rc6_wa_bb = batch;
> +	return 0;
> +err_unpin:
> +	if (err)
> +		i915_vma_unpin(rc6->vma);
> +err_ww_fini:
> +	if (err == -EDEADLK) {
> +		err = i915_gem_ww_ctx_backoff(&ww);
> +		if (!err)
> +			goto retry;
> +	}
> +	i915_gem_ww_ctx_fini(&ww);
> +
> +	if (err)
> +		i915_vma_put(rc6->vma);
> +err:
> +	i915_gem_object_put(obj);
> +	return err;
> +}
> +
> +void rc6_wa_bb_ctx_wa_fini(struct intel_rc6 *rc6)
> +{
> +	i915_vma_unpin(rc6->vma);
> +	i915_vma_put(rc6->vma);
> +}
> +
>   static void gen11_rc6_enable(struct intel_rc6 *rc6)
>   {
>   	struct intel_gt *gt = rc6_to_gt(rc6);
> @@ -616,6 +676,9 @@ void intel_rc6_init(struct intel_rc6 *rc6)
>   		err = chv_rc6_init(rc6);
>   	else if (IS_VALLEYVIEW(i915))
>   		err = vlv_rc6_init(rc6);
> +	else if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 0)) &&
> +		 (GRAPHICS_VER_FULL(i915) <= IP_VER(12, 70)))
> +		err = rc6_wa_bb_ctx_init(rc6);
>   	else
>   		err = 0;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> index cd4587098162a..643fd4e839ad4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> @@ -33,6 +33,9 @@ struct intel_rc6 {
>   
>   	struct drm_i915_gem_object *pctx;
>   
> +	u32 *rc6_wa_bb;
> +	struct i915_vma *vma;
> +
>   	bool supported : 1;
>   	bool enabled : 1;
>   	bool manual : 1;
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 4d2dece960115..d20afb318d857 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -14,6 +14,7 @@
>   #include "intel_gt_regs.h"
>   #include "intel_ring.h"
>   #include "intel_workarounds.h"
> +#include "intel_rc6.h"
>   
>   /**
>    * DOC: Hardware workarounds
> @@ -3132,6 +3133,41 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
>   			   true);
>   }
>   
> +static void
> +populate_wa_bb_ctx(struct intel_engine_cs *engine, struct i915_wa_list *wal)
> +{
> +	struct intel_rc6 *rc6 = &engine->gt->rc6;
> +	struct intel_uncore *uncore = engine->gt->uncore;
> +	const struct i915_wa *wa;
> +	int i;
> +	u32 *rc6_wa_bb;
> +
> +	if (!rc6->vma->obj) return;
> +
> +	rc6_wa_bb = rc6->rc6_wa_bb;
> +	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
> +		if (i915_mmio_reg_offset(wa->mcr_reg) ==
> +		    i915_mmio_reg_offset(GEN8_HALF_SLICE_CHICKEN1) ||
> +		    ((i915_mmio_reg_offset(wa->mcr_reg) >=
> +		      i915_mmio_reg_offset(HALF_SLICE_CHICKEN2)) &&
> +		     (i915_mmio_reg_offset(wa->mcr_reg) <=
> +		      i915_mmio_reg_offset(GEN9_HALF_SLICE_CHICKEN7)))) {


Guessing this is where SAMPLER_MODE is missing.


> +			*rc6_wa_bb++ = MI_NOOP;
> +			*rc6_wa_bb++ = MI_LOAD_REGISTER_IMM(1);
> +			*rc6_wa_bb++ = i915_mmio_reg_offset(wa->mcr_reg);
> +			*rc6_wa_bb++ = wa->set;
> +			*rc6_wa_bb++ = MI_NOOP;
> +                }
> +	}
> +	*rc6_wa_bb++ = MI_BATCH_BUFFER_END;
> +
> +	i915_gem_object_flush_map(rc6->vma->obj);
> +	intel_uncore_write(uncore, CTX_WA_PTR,
> +			   REG_FIELD_PREP(CTX_WA_PTR_ADDR_MASK,
> +					  i915_vma_offset(rc6->vma) & GENMASK(19, 0)) |
> +			   CTX_WA_VALID);
> +}
> +
>   static void
>   engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>   {
> @@ -3154,6 +3190,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
>   		rcs_engine_wa_init(engine, wal);
>   	else
>   		xcs_engine_wa_init(engine, wal);
> +
> +	if ((GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 0)) &&
> +	    (GRAPHICS_VER_FULL(engine->i915) <= IP_VER(12, 70)))
> +		populate_wa_bb_ctx(engine, wal);
>   }
>   
>   void intel_engine_init_workarounds(struct intel_engine_cs *engine)



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Introduce Wa_14011274333
  2023-07-12 23:34 [Intel-gfx] [PATCH] drm/i915: Introduce Wa_14011274333 Matt Atwood
  2023-07-13  0:04 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
  2023-07-13 12:00 ` [Intel-gfx] [PATCH] " Lionel Landwerlin
@ 2023-07-13 17:25 ` Matt Roper
  2 siblings, 0 replies; 4+ messages in thread
From: Matt Roper @ 2023-07-13 17:25 UTC (permalink / raw)
  To: Matt Atwood; +Cc: intel-gfx

On Wed, Jul 12, 2023 at 04:34:15PM -0700, Matt Atwood wrote:
> Wa_14011274333 applies to RKL, ADL-S, ADL-P and TGL. ALlocate buffer
> pinned to GGTT and add WA to restore impacted registers.

You should explain the approach you're taking to implement this
workaround since someone reading the workaround description isn't really
going to understand how your code here addresses it.

> 
> v2: use correct lineage number, more generically apply workarounds for
> all registers impacted, move workaround to gt/intel_workarounds.c
> (MattR)
> 
> Based off patch by Tilak Tangudu.
> 
> Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h     |  5 ++
>  drivers/gpu/drm/i915/gt/intel_rc6.c         | 63 +++++++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_rc6_types.h   |  3 +
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 40 +++++++++++++
>  4 files changed, 111 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 718cb2c80f79e..eaee35ecbc8d3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -83,6 +83,11 @@
>  #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
>  #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
>  
> +#define CTX_WA_PTR				_MMIO(0x2058)

CTX_WA_PTR is a register that exists for every engine, not just the RCS
engine.  This should be a parameterized definition in the engine reg
file, not here.

> +#define  CTX_WA_PTR_ADDR_MASK			REG_GENMASK(31,12)
> +#define  CTX_WA_TYPE_MASK			REG_GENMASK(4,3)
> +#define  CTX_WA_VALID				REG_BIT(0)
> +
>  #define IPEIR_I965				_MMIO(0x2064)
>  #define IPEHR_I965				_MMIO(0x2068)
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
> index 58bb1c55294c9..6baa341814da7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
> @@ -14,6 +14,7 @@
>  #include "intel_gt.h"
>  #include "intel_gt_pm.h"
>  #include "intel_gt_regs.h"
> +#include "intel_gpu_commands.h"
>  #include "intel_pcode.h"
>  #include "intel_rc6.h"
>  
> @@ -53,6 +54,65 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
>  	return rc6_to_gt(rc)->i915;
>  }
>  
> +static int rc6_wa_bb_ctx_init(struct intel_rc6 *rc6)
> +{
> +	struct drm_i915_private *i915 = rc6_to_i915(rc6);
> +	struct intel_gt *gt = rc6_to_gt(rc6);
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +	void *batch;
> +	struct i915_gem_ww_ctx ww;
> +	int err;
> +
> +	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);

Should we be using i915_gem_object_create_internal() here?  This is
something that's permanently pinned and we don't really need a swappable
storage, right?

> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
> +
> +	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
> +	if (IS_ERR(vma)){
> +		err = PTR_ERR(vma);
> +		goto err;
> +	}
> +	rc6->vma=vma;

Coding style isn't correct on this line.

> +	i915_gem_ww_ctx_init(&ww, true);
> +retry:
> +	err = i915_gem_object_lock(obj, &ww);
> +	if (!err)
> +		err = i915_ggtt_pin(rc6->vma, &ww, 0, PIN_HIGH);
> +	if (err)
> +		goto err_ww_fini;
> +
> +	batch = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	if (IS_ERR(batch)) {
> +		err = PTR_ERR(batch);
> +		goto err_unpin;
> +	}
> +	rc6->rc6_wa_bb = batch;
> +	return 0;
> +err_unpin:
> +	if (err)

Isn't this redundant?

> +		i915_vma_unpin(rc6->vma);
> +err_ww_fini:
> +	if (err == -EDEADLK) {
> +		err = i915_gem_ww_ctx_backoff(&ww);
> +		if (!err)
> +			goto retry;
> +	}
> +	i915_gem_ww_ctx_fini(&ww);
> +
> +	if (err)

And isn't this redundant too?

> +		i915_vma_put(rc6->vma);
> +err:
> +	i915_gem_object_put(obj);
> +	return err;
> +}
> +
> +void rc6_wa_bb_ctx_wa_fini(struct intel_rc6 *rc6)
> +{
> +	i915_vma_unpin(rc6->vma);
> +	i915_vma_put(rc6->vma);

Don't we need to put the object here too?  Actually, can we use
i915_vma_unpin_and_release here?

Also, is this function ever called anywhere?

> +}
> +
>  static void gen11_rc6_enable(struct intel_rc6 *rc6)
>  {
>  	struct intel_gt *gt = rc6_to_gt(rc6);
> @@ -616,6 +676,9 @@ void intel_rc6_init(struct intel_rc6 *rc6)
>  		err = chv_rc6_init(rc6);
>  	else if (IS_VALLEYVIEW(i915))
>  		err = vlv_rc6_init(rc6);
> +	else if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 0)) &&
> +		 (GRAPHICS_VER_FULL(i915) <= IP_VER(12, 70)))

This doesn't match the platforms that the commit message said it should
apply to.

Also, if we have complicated version checks for a workaround that's
spread throughout the code, they should be pulled out to a helper
function/macro so that we can change the condition in just one place if
the bounds change in the future.

> +		err = rc6_wa_bb_ctx_init(rc6);
>  	else
>  		err = 0;
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> index cd4587098162a..643fd4e839ad4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> @@ -33,6 +33,9 @@ struct intel_rc6 {
>  
>  	struct drm_i915_gem_object *pctx;
>  
> +	u32 *rc6_wa_bb;
> +	struct i915_vma *vma;
> +
>  	bool supported : 1;
>  	bool enabled : 1;
>  	bool manual : 1;
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 4d2dece960115..d20afb318d857 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -14,6 +14,7 @@
>  #include "intel_gt_regs.h"
>  #include "intel_ring.h"
>  #include "intel_workarounds.h"
> +#include "intel_rc6.h"
>  
>  /**
>   * DOC: Hardware workarounds
> @@ -3132,6 +3133,41 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
>  			   true);
>  }
>  
> +static void
> +populate_wa_bb_ctx(struct intel_engine_cs *engine, struct i915_wa_list *wal)
> +{
> +	struct intel_rc6 *rc6 = &engine->gt->rc6;
> +	struct intel_uncore *uncore = engine->gt->uncore;
> +	const struct i915_wa *wa;
> +	int i;
> +	u32 *rc6_wa_bb;
> +
> +	if (!rc6->vma->obj) return;
> +
> +	rc6_wa_bb = rc6->rc6_wa_bb;
> +	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
> +		if (i915_mmio_reg_offset(wa->mcr_reg) ==
> +		    i915_mmio_reg_offset(GEN8_HALF_SLICE_CHICKEN1) ||
> +		    ((i915_mmio_reg_offset(wa->mcr_reg) >=
> +		      i915_mmio_reg_offset(HALF_SLICE_CHICKEN2)) &&
> +		     (i915_mmio_reg_offset(wa->mcr_reg) <=
> +		      i915_mmio_reg_offset(GEN9_HALF_SLICE_CHICKEN7)))) {

I think it's more confusing to use symbolic names for things that are
really supposed to be the bounds of register offset ranges.  I think
it's fine to use numeric offsets as long as it's clear what those
offsets actually represent (in this case the TDL power context): If you
pull reg_in_range_table() out of the perf code and make it available
elsewhere in the driver, then something like this would work:

        static const struct i915_range tdl_pwr_ctx[] = {
                { .start = 0xE100, .end = 0xE100 },
                { .start = 0xE180, .end = 0xE194 },
                {}
        };

        offset = i915_mmio_reg_offset(wa->mcr_reg);
        if (i915_reg_in_range_table(offset, tdl_pwr_ctx)) {
                ...

If this workaround winds up getting extended to other platforms that
have different TDL contexts, then it's also more straightforward to just
provide an additional table for the new platforms.

> +			*rc6_wa_bb++ = MI_NOOP;
> +			*rc6_wa_bb++ = MI_LOAD_REGISTER_IMM(1);
> +			*rc6_wa_bb++ = i915_mmio_reg_offset(wa->mcr_reg);
> +			*rc6_wa_bb++ = wa->set;
> +			*rc6_wa_bb++ = MI_NOOP;

You could potentially use a single MI_LRI to load all of the registers
in one instruction.  But if you want to do them one by one, then you
need to make sure you're handling qword alignment properly.  Depending
on how many registers you find in the list, you might end up with an odd
or even number of dwords.  That means you probably don't want one of
these MI_NOOPs inside the loop.

> +                }
> +	}
> +	*rc6_wa_bb++ = MI_BATCH_BUFFER_END;
> +
> +	i915_gem_object_flush_map(rc6->vma->obj);
> +	intel_uncore_write(uncore, CTX_WA_PTR,
> +			   REG_FIELD_PREP(CTX_WA_PTR_ADDR_MASK,
> +					  i915_vma_offset(rc6->vma) & GENMASK(19, 0)) |

These are the wrong bits.  You want the high bits now the low ones.  The
bottom 12 are guaranteed to be 0's (due to page alignment), so the
hardware doesn't need to know about them.

Also CTX_WA_PTR is a context register, but you're only programming it a
single time per engine in engine_init_workarounds.  You need this to be
part of the golden context instead so that it applies to all clients.


Matt

> +			   CTX_WA_VALID);
> +}
> +
>  static void
>  engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  {
> @@ -3154,6 +3190,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
>  		rcs_engine_wa_init(engine, wal);
>  	else
>  		xcs_engine_wa_init(engine, wal);
> +
> +	if ((GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 0)) &&
> +	    (GRAPHICS_VER_FULL(engine->i915) <= IP_VER(12, 70)))
> +		populate_wa_bb_ctx(engine, wal);
>  }
>  
>  void intel_engine_init_workarounds(struct intel_engine_cs *engine)
> -- 
> 2.40.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-07-13 17:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-12 23:34 [Intel-gfx] [PATCH] drm/i915: Introduce Wa_14011274333 Matt Atwood
2023-07-13  0:04 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
2023-07-13 12:00 ` [Intel-gfx] [PATCH] " Lionel Landwerlin
2023-07-13 17:25 ` Matt Roper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).