All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] Add BDW workarounds to golden render state
@ 2014-08-08  9:52 arun.siluvery
  2014-08-08  9:52 ` [RFC] drm/i915/bdw: Apply workarounds to the " arun.siluvery
  0 siblings, 1 reply; 7+ messages in thread
From: arun.siluvery @ 2014-08-08  9:52 UTC (permalink / raw)
  To: intel-gfx

From: Arun Siluvery <arun.siluvery@linux.intel.com>

In this patch workarounds for BDW are applied to golden render state.
Only those registers that are part of register state are added to this batch.
Remaining workarounds are still in its current place init_clock_gating() which
are not affected by a gpu reset. I can send another patch where they can be
moved to render ring init function but during testing I found their state
doesn't change after reset, please give your comments on this.

Arun Siluvery (1):
  drm/i915/bdw: Apply workarounds to the golden render state

 drivers/gpu/drm/i915/intel_pm.c               | 50 ---------------------
 drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++----------
 2 files changed, 39 insertions(+), 73 deletions(-)

-- 
2.0.4

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC] drm/i915/bdw: Apply workarounds to the golden render state
  2014-08-08  9:52 [RFC] Add BDW workarounds to golden render state arun.siluvery
@ 2014-08-08  9:52 ` arun.siluvery
  2014-08-08  9:57   ` Chris Wilson
  2014-08-08 12:20   ` Ville Syrjälä
  0 siblings, 2 replies; 7+ messages in thread
From: arun.siluvery @ 2014-08-08  9:52 UTC (permalink / raw)
  To: intel-gfx

From: Arun Siluvery <arun.siluvery@linux.intel.com>

Workarounds for bdw are currently applied in init_clock_gating() but they
are lost following a gpu reset. Some of the registers are part of register
state context and they are restored with every context switch so initializing
WAs in golden render state ensures that they are applied even when we start
with an uninitialized context or during hw initialization followed by a reset.

Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c               | 50 ---------------------
 drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++----------
 2 files changed, 39 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1ddd4df..ab64b64 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
 	/* FIXME(BDW): Check all the w/a, some might only apply to
 	 * pre-production hw. */
 
-	/* WaDisablePartialInstShootdown:bdw */
-	I915_WRITE(GEN8_ROW_CHICKEN,
-		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
-	/* WaDisableThreadStallDopClockGating:bdw */
-	/* FIXME: Unclear whether we really need this on production bdw. */
-	I915_WRITE(GEN8_ROW_CHICKEN,
-		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
-
-	/*
-	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
-	 * pre-production hardware
-	 */
-	I915_WRITE(HALF_SLICE_CHICKEN3,
-		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
-	I915_WRITE(HALF_SLICE_CHICKEN3,
-		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
 	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
 
 	I915_WRITE(_3D_CHICKEN3,
 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
 
-	I915_WRITE(COMMON_SLICE_CHICKEN2,
-		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
-
-	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
-
-	/* WaDisableDopClockGating:bdw May not be needed for production */
-	I915_WRITE(GEN7_ROW_CHICKEN2,
-		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-
 	/* WaSwitchSolVfFArbitrationPriority:bdw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
 			   BDW_DPRS_MASK_VBLANK_SRD);
 	}
 
-	/* Use Force Non-Coherent whenever executing a 3D context. This is a
-	 * workaround for for a possible hang in the unlikely event a TLB
-	 * invalidation occurs during a PSD flush.
-	 */
-	I915_WRITE(HDC_CHICKEN0,
-		   I915_READ(HDC_CHICKEN0) |
-		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
-
 	/* WaVSRefCountFullforceMissDisable:bdw */
 	/* WaDSRefCountFullforceMissDisable:bdw */
 	I915_WRITE(GEN7_FF_THREAD_MODE,
 		   I915_READ(GEN7_FF_THREAD_MODE) &
 		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
 
-	/*
-	 * BSpec recommends 8x4 when MSAA is used,
-	 * however in practice 16x4 seems fastest.
-	 *
-	 * Note that PS/WM thread counts depend on the WIZ hashing
-	 * disable bit, which we don't touch here, but it's good
-	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-	 */
-	I915_WRITE(GEN7_GT_MODE,
-		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
 	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
 		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
 
 	/* WaDisableSDEUnitClockGating:bdw */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
-
-	/* Wa4x4STCOptimizationDisable:bdw */
-	I915_WRITE(CACHE_MODE_1,
-		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
 }
 
 static void haswell_init_clock_gating(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
index 75ef1b5..0b26783 100644
--- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
@@ -1,14 +1,38 @@
 #include "intel_renderstate.h"
 
 static const u32 gen8_null_state_relocs[] = {
-	0x00000048,
-	0x00000050,
-	0x00000060,
-	0x000003ec,
+	0x000000a8,
+	0x000000b0,
+	0x000000c0,
+	0x0000044c,
 	-1,
 };
 
 static const u32 gen8_null_state_batch[] = {
+	0x11000001,
+	0x0000e4f0,
+	0x83208320,
+	0x11000001,
+	0x0000e4f4,
+	0x00010001,
+	0x11000001,
+	0x0000e184,
+	0x01020102,
+	0x11000001,
+	0x0000e100,
+	0x04000400,
+	0x11000001,
+	0x00007014,
+	0x00010001,
+	0x11000001,
+	0x00007300,
+	0x00100010,
+	0x11000001,
+	0x00007004,
+	0x00400040,
+	0x11000001,
+	0x00007008,
+	0x02800200,
 	0x69040000,
 	0x61020001,
 	0x00000000,
@@ -40,9 +64,9 @@ static const u32 gen8_null_state_batch[] = {
 	0xfffff001,
 	0x00001001,
 	0x78230000,
-	0x000006e0,
+	0x00000720,
 	0x78210000,
-	0x00000700,
+	0x00000740,
 	0x78300000,
 	0x08010040,
 	0x78330000,
@@ -52,9 +76,9 @@ static const u32 gen8_null_state_batch[] = {
 	0x78320000,
 	0x08000000,
 	0x78240000,
-	0x00000641,
+	0x00000681,
 	0x780e0000,
-	0x00000601,
+	0x00000641,
 	0x780d0000,
 	0x00000000,
 	0x78180000,
@@ -199,9 +223,9 @@ static const u32 gen8_null_state_batch[] = {
 	0x00000000,
 	0x00000000,
 	0x782a0000,
-	0x00000480,
+	0x000004c0,
 	0x782f0000,
-	0x00000540,
+	0x00000580,
 	0x78140000,
 	0x00000800,
 	0x78170009,
@@ -216,7 +240,7 @@ static const u32 gen8_null_state_batch[] = {
 	0x00000000,
 	0x00000000,
 	0x7820000a,
-	0x00000580,
+	0x000005c0,
 	0x00000000,
 	0x08080000,
 	0x00000000,
@@ -232,7 +256,7 @@ static const u32 gen8_null_state_batch[] = {
 	0x784f0000,
 	0x80000100,
 	0x780f0000,
-	0x00000740,
+	0x00000780,
 	0x78050006,
 	0x00000000,
 	0x00000000,
@@ -260,7 +284,7 @@ static const u32 gen8_null_state_batch[] = {
 	0x00000000,
 	0x78080003,
 	0x00006000,
-	0x000005e0,	 /* reloc */
+	0x00000620,	 /* reloc */
 	0x00000000,
 	0x00000000,
 	0x78090005,
@@ -289,16 +313,8 @@ static const u32 gen8_null_state_batch[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x000004c0,	 /* state start */
-	0x00000500,
+	0x00000500,	 /* state start */
+	0x00000540,
 	0x00000000,
 	0x00000000,
 	0x00000000,
-- 
2.0.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC] drm/i915/bdw: Apply workarounds to the golden render state
  2014-08-08  9:52 ` [RFC] drm/i915/bdw: Apply workarounds to the " arun.siluvery
@ 2014-08-08  9:57   ` Chris Wilson
  2014-08-08 10:34     ` Siluvery, Arun
  2014-08-08 12:20   ` Ville Syrjälä
  1 sibling, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2014-08-08  9:57 UTC (permalink / raw)
  To: arun.siluvery; +Cc: intel-gfx

On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery@linux.intel.com wrote:
> From: Arun Siluvery <arun.siluvery@linux.intel.com>
> 
> Workarounds for bdw are currently applied in init_clock_gating() but they
> are lost following a gpu reset. Some of the registers are part of register
> state context and they are restored with every context switch so initializing
> WAs in golden render state ensures that they are applied even when we start
> with an uninitialized context or during hw initialization followed by a reset.

Interesting, but let's try to keep the opaque blobs minimal. The
comments for w/a are even more valuable than the code.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] drm/i915/bdw: Apply workarounds to the golden render state
  2014-08-08  9:57   ` Chris Wilson
@ 2014-08-08 10:34     ` Siluvery, Arun
  2014-08-08 10:39       ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Siluvery, Arun @ 2014-08-08 10:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 08/08/2014 10:57, Chris Wilson wrote:
> On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery@linux.intel.com wrote:
>> From: Arun Siluvery <arun.siluvery@linux.intel.com>
>>
>> Workarounds for bdw are currently applied in init_clock_gating() but they
>> are lost following a gpu reset. Some of the registers are part of register
>> state context and they are restored with every context switch so initializing
>> WAs in golden render state ensures that they are applied even when we start
>> with an uninitialized context or during hw initialization followed by a reset.
>
> Interesting, but let's try to keep the opaque blobs minimal. The
> comments for w/a are even more valuable than the code.
I agree, I will add comments to each workaround.
We are looking at augmenting workarounds to the null batch in render 
state setup function itself. Do you have any comments with that approach?

regards
Arun

> -Chris
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] drm/i915/bdw: Apply workarounds to the golden render state
  2014-08-08 10:34     ` Siluvery, Arun
@ 2014-08-08 10:39       ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2014-08-08 10:39 UTC (permalink / raw)
  To: Siluvery, Arun; +Cc: intel-gfx

On Fri, Aug 08, 2014 at 11:34:07AM +0100, Siluvery, Arun wrote:
> On 08/08/2014 10:57, Chris Wilson wrote:
> >On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery@linux.intel.com wrote:
> >>From: Arun Siluvery <arun.siluvery@linux.intel.com>
> >>
> >>Workarounds for bdw are currently applied in init_clock_gating() but they
> >>are lost following a gpu reset. Some of the registers are part of register
> >>state context and they are restored with every context switch so initializing
> >>WAs in golden render state ensures that they are applied even when we start
> >>with an uninitialized context or during hw initialization followed by a reset.
> >
> >Interesting, but let's try to keep the opaque blobs minimal. The
> >comments for w/a are even more valuable than the code.
> I agree, I will add comments to each workaround.
> We are looking at augmenting workarounds to the null batch in render
> state setup function itself. Do you have any comments with that
> approach?

Other than changing its name, that does actually seem like a useful
juncture to do things since it is the first userspace operation on the
RCS ring within that context. And if it has to be within the context,
that is indeed more troublesome to do without adding to the renderstate.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] drm/i915/bdw: Apply workarounds to the golden render state
  2014-08-08  9:52 ` [RFC] drm/i915/bdw: Apply workarounds to the " arun.siluvery
  2014-08-08  9:57   ` Chris Wilson
@ 2014-08-08 12:20   ` Ville Syrjälä
  2014-08-08 13:11     ` Siluvery, Arun
  1 sibling, 1 reply; 7+ messages in thread
From: Ville Syrjälä @ 2014-08-08 12:20 UTC (permalink / raw)
  To: arun.siluvery; +Cc: intel-gfx

On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery@linux.intel.com wrote:
> From: Arun Siluvery <arun.siluvery@linux.intel.com>
> 
> Workarounds for bdw are currently applied in init_clock_gating() but they
> are lost following a gpu reset. Some of the registers are part of register
> state context and they are restored with every context switch so initializing
> WAs in golden render state ensures that they are applied even when we start
> with an uninitialized context or during hw initialization followed by a reset.

This approach might require separate null states for BDW vs. CHV and IVB
vs. HSW vs. VLV, which seems a bit unfortunate. Might be better to just
issue the w/a register writes via LRIs from the code as part of the null
state load.

Although I don't actually undertand how this improves things as opposed
to just appllying the w/as via mmio writes. Does it?

> 
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c               | 50 ---------------------
>  drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++----------
>  2 files changed, 39 insertions(+), 73 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 1ddd4df..ab64b64 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>  	/* FIXME(BDW): Check all the w/a, some might only apply to
>  	 * pre-production hw. */
>  
> -	/* WaDisablePartialInstShootdown:bdw */
> -	I915_WRITE(GEN8_ROW_CHICKEN,
> -		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> -
> -	/* WaDisableThreadStallDopClockGating:bdw */
> -	/* FIXME: Unclear whether we really need this on production bdw. */
> -	I915_WRITE(GEN8_ROW_CHICKEN,
> -		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> -
> -	/*
> -	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
> -	 * pre-production hardware
> -	 */
> -	I915_WRITE(HALF_SLICE_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
> -	I915_WRITE(HALF_SLICE_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>  	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
>  
>  	I915_WRITE(_3D_CHICKEN3,
>  		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
>  
> -	I915_WRITE(COMMON_SLICE_CHICKEN2,
> -		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> -
> -	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> -		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> -
> -	/* WaDisableDopClockGating:bdw May not be needed for production */
> -	I915_WRITE(GEN7_ROW_CHICKEN2,
> -		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> -
>  	/* WaSwitchSolVfFArbitrationPriority:bdw */
>  	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
>  
> @@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>  			   BDW_DPRS_MASK_VBLANK_SRD);
>  	}
>  
> -	/* Use Force Non-Coherent whenever executing a 3D context. This is a
> -	 * workaround for for a possible hang in the unlikely event a TLB
> -	 * invalidation occurs during a PSD flush.
> -	 */
> -	I915_WRITE(HDC_CHICKEN0,
> -		   I915_READ(HDC_CHICKEN0) |
> -		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> -
>  	/* WaVSRefCountFullforceMissDisable:bdw */
>  	/* WaDSRefCountFullforceMissDisable:bdw */
>  	I915_WRITE(GEN7_FF_THREAD_MODE,
>  		   I915_READ(GEN7_FF_THREAD_MODE) &
>  		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
>  
> -	/*
> -	 * BSpec recommends 8x4 when MSAA is used,
> -	 * however in practice 16x4 seems fastest.
> -	 *
> -	 * Note that PS/WM thread counts depend on the WIZ hashing
> -	 * disable bit, which we don't touch here, but it's good
> -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> -	 */
> -	I915_WRITE(GEN7_GT_MODE,
> -		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> -
>  	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
>  		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
>  
>  	/* WaDisableSDEUnitClockGating:bdw */
>  	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
>  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
> -
> -	/* Wa4x4STCOptimizationDisable:bdw */
> -	I915_WRITE(CACHE_MODE_1,
> -		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>  }
>  
>  static void haswell_init_clock_gating(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
> index 75ef1b5..0b26783 100644
> --- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c
> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
> @@ -1,14 +1,38 @@
>  #include "intel_renderstate.h"
>  
>  static const u32 gen8_null_state_relocs[] = {
> -	0x00000048,
> -	0x00000050,
> -	0x00000060,
> -	0x000003ec,
> +	0x000000a8,
> +	0x000000b0,
> +	0x000000c0,
> +	0x0000044c,
>  	-1,
>  };
>  
>  static const u32 gen8_null_state_batch[] = {
> +	0x11000001,
> +	0x0000e4f0,
> +	0x83208320,
> +	0x11000001,
> +	0x0000e4f4,
> +	0x00010001,
> +	0x11000001,
> +	0x0000e184,
> +	0x01020102,
> +	0x11000001,
> +	0x0000e100,
> +	0x04000400,
> +	0x11000001,
> +	0x00007014,
> +	0x00010001,
> +	0x11000001,
> +	0x00007300,
> +	0x00100010,
> +	0x11000001,
> +	0x00007004,
> +	0x00400040,
> +	0x11000001,
> +	0x00007008,
> +	0x02800200,
>  	0x69040000,
>  	0x61020001,
>  	0x00000000,
> @@ -40,9 +64,9 @@ static const u32 gen8_null_state_batch[] = {
>  	0xfffff001,
>  	0x00001001,
>  	0x78230000,
> -	0x000006e0,
> +	0x00000720,
>  	0x78210000,
> -	0x00000700,
> +	0x00000740,
>  	0x78300000,
>  	0x08010040,
>  	0x78330000,
> @@ -52,9 +76,9 @@ static const u32 gen8_null_state_batch[] = {
>  	0x78320000,
>  	0x08000000,
>  	0x78240000,
> -	0x00000641,
> +	0x00000681,
>  	0x780e0000,
> -	0x00000601,
> +	0x00000641,
>  	0x780d0000,
>  	0x00000000,
>  	0x78180000,
> @@ -199,9 +223,9 @@ static const u32 gen8_null_state_batch[] = {
>  	0x00000000,
>  	0x00000000,
>  	0x782a0000,
> -	0x00000480,
> +	0x000004c0,
>  	0x782f0000,
> -	0x00000540,
> +	0x00000580,
>  	0x78140000,
>  	0x00000800,
>  	0x78170009,
> @@ -216,7 +240,7 @@ static const u32 gen8_null_state_batch[] = {
>  	0x00000000,
>  	0x00000000,
>  	0x7820000a,
> -	0x00000580,
> +	0x000005c0,
>  	0x00000000,
>  	0x08080000,
>  	0x00000000,
> @@ -232,7 +256,7 @@ static const u32 gen8_null_state_batch[] = {
>  	0x784f0000,
>  	0x80000100,
>  	0x780f0000,
> -	0x00000740,
> +	0x00000780,
>  	0x78050006,
>  	0x00000000,
>  	0x00000000,
> @@ -260,7 +284,7 @@ static const u32 gen8_null_state_batch[] = {
>  	0x00000000,
>  	0x78080003,
>  	0x00006000,
> -	0x000005e0,	 /* reloc */
> +	0x00000620,	 /* reloc */
>  	0x00000000,
>  	0x00000000,
>  	0x78090005,
> @@ -289,16 +313,8 @@ static const u32 gen8_null_state_batch[] = {
>  	0x00000000,
>  	0x00000000,
>  	0x00000000,
> -	0x00000000,
> -	0x00000000,
> -	0x00000000,
> -	0x00000000,
> -	0x00000000,
> -	0x00000000,
> -	0x00000000,
> -	0x00000000,
> -	0x000004c0,	 /* state start */
> -	0x00000500,
> +	0x00000500,	 /* state start */
> +	0x00000540,
>  	0x00000000,
>  	0x00000000,
>  	0x00000000,
> -- 
> 2.0.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] drm/i915/bdw: Apply workarounds to the golden render state
  2014-08-08 12:20   ` Ville Syrjälä
@ 2014-08-08 13:11     ` Siluvery, Arun
  0 siblings, 0 replies; 7+ messages in thread
From: Siluvery, Arun @ 2014-08-08 13:11 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On 08/08/2014 13:20, Ville Syrjälä wrote:
> On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery@linux.intel.com wrote:
>> From: Arun Siluvery <arun.siluvery@linux.intel.com>
>>
>> Workarounds for bdw are currently applied in init_clock_gating() but they
>> are lost following a gpu reset. Some of the registers are part of register
>> state context and they are restored with every context switch so initializing
>> WAs in golden render state ensures that they are applied even when we start
>> with an uninitialized context or during hw initialization followed by a reset.
>
> This approach might require separate null states for BDW vs. CHV and IVB
> vs. HSW vs. VLV, which seems a bit unfortunate. Might be better to just
> issue the w/a register writes via LRIs from the code as part of the null
> state load.
>
Yes this is a better approach, I am currently changing the code to 
achieve this, not sure how easy it would be.

> Although I don't actually undertand how this improves things as opposed
> to just appllying the w/as via mmio writes. Does it?
>
I observed random behaviour CACHE_MODE_1 which simply used to lose the 
applied workaround on first context switch even though it is loaded with 
inhibit==1; register values are not supposed to change but it was changing.

I think it is better to add them in null batch to ensure hardware starts 
with WAs applied.

regards
Arun

>>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_pm.c               | 50 ---------------------
>>   drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++----------
>>   2 files changed, 39 insertions(+), 73 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 1ddd4df..ab64b64 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>>   	/* FIXME(BDW): Check all the w/a, some might only apply to
>>   	 * pre-production hw. */
>>
>> -	/* WaDisablePartialInstShootdown:bdw */
>> -	I915_WRITE(GEN8_ROW_CHICKEN,
>> -		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>> -
>> -	/* WaDisableThreadStallDopClockGating:bdw */
>> -	/* FIXME: Unclear whether we really need this on production bdw. */
>> -	I915_WRITE(GEN8_ROW_CHICKEN,
>> -		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>> -
>> -	/*
>> -	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
>> -	 * pre-production hardware
>> -	 */
>> -	I915_WRITE(HALF_SLICE_CHICKEN3,
>> -		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
>> -	I915_WRITE(HALF_SLICE_CHICKEN3,
>> -		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>   	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
>>
>>   	I915_WRITE(_3D_CHICKEN3,
>>   		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
>>
>> -	I915_WRITE(COMMON_SLICE_CHICKEN2,
>> -		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
>> -
>> -	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
>> -		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
>> -
>> -	/* WaDisableDopClockGating:bdw May not be needed for production */
>> -	I915_WRITE(GEN7_ROW_CHICKEN2,
>> -		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>> -
>>   	/* WaSwitchSolVfFArbitrationPriority:bdw */
>>   	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
>>
>> @@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>>   			   BDW_DPRS_MASK_VBLANK_SRD);
>>   	}
>>
>> -	/* Use Force Non-Coherent whenever executing a 3D context. This is a
>> -	 * workaround for for a possible hang in the unlikely event a TLB
>> -	 * invalidation occurs during a PSD flush.
>> -	 */
>> -	I915_WRITE(HDC_CHICKEN0,
>> -		   I915_READ(HDC_CHICKEN0) |
>> -		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>> -
>>   	/* WaVSRefCountFullforceMissDisable:bdw */
>>   	/* WaDSRefCountFullforceMissDisable:bdw */
>>   	I915_WRITE(GEN7_FF_THREAD_MODE,
>>   		   I915_READ(GEN7_FF_THREAD_MODE) &
>>   		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
>>
>> -	/*
>> -	 * BSpec recommends 8x4 when MSAA is used,
>> -	 * however in practice 16x4 seems fastest.
>> -	 *
>> -	 * Note that PS/WM thread counts depend on the WIZ hashing
>> -	 * disable bit, which we don't touch here, but it's good
>> -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>> -	 */
>> -	I915_WRITE(GEN7_GT_MODE,
>> -		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>> -
>>   	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
>>   		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
>>
>>   	/* WaDisableSDEUnitClockGating:bdw */
>>   	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
>>   		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>> -
>> -	/* Wa4x4STCOptimizationDisable:bdw */
>> -	I915_WRITE(CACHE_MODE_1,
>> -		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>>   }
>>
>>   static void haswell_init_clock_gating(struct drm_device *dev)
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
>> index 75ef1b5..0b26783 100644
>> --- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
>> @@ -1,14 +1,38 @@
>>   #include "intel_renderstate.h"
>>
>>   static const u32 gen8_null_state_relocs[] = {
>> -	0x00000048,
>> -	0x00000050,
>> -	0x00000060,
>> -	0x000003ec,
>> +	0x000000a8,
>> +	0x000000b0,
>> +	0x000000c0,
>> +	0x0000044c,
>>   	-1,
>>   };
>>
>>   static const u32 gen8_null_state_batch[] = {
>> +	0x11000001,
>> +	0x0000e4f0,
>> +	0x83208320,
>> +	0x11000001,
>> +	0x0000e4f4,
>> +	0x00010001,
>> +	0x11000001,
>> +	0x0000e184,
>> +	0x01020102,
>> +	0x11000001,
>> +	0x0000e100,
>> +	0x04000400,
>> +	0x11000001,
>> +	0x00007014,
>> +	0x00010001,
>> +	0x11000001,
>> +	0x00007300,
>> +	0x00100010,
>> +	0x11000001,
>> +	0x00007004,
>> +	0x00400040,
>> +	0x11000001,
>> +	0x00007008,
>> +	0x02800200,
>>   	0x69040000,
>>   	0x61020001,
>>   	0x00000000,
>> @@ -40,9 +64,9 @@ static const u32 gen8_null_state_batch[] = {
>>   	0xfffff001,
>>   	0x00001001,
>>   	0x78230000,
>> -	0x000006e0,
>> +	0x00000720,
>>   	0x78210000,
>> -	0x00000700,
>> +	0x00000740,
>>   	0x78300000,
>>   	0x08010040,
>>   	0x78330000,
>> @@ -52,9 +76,9 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x78320000,
>>   	0x08000000,
>>   	0x78240000,
>> -	0x00000641,
>> +	0x00000681,
>>   	0x780e0000,
>> -	0x00000601,
>> +	0x00000641,
>>   	0x780d0000,
>>   	0x00000000,
>>   	0x78180000,
>> @@ -199,9 +223,9 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x00000000,
>>   	0x782a0000,
>> -	0x00000480,
>> +	0x000004c0,
>>   	0x782f0000,
>> -	0x00000540,
>> +	0x00000580,
>>   	0x78140000,
>>   	0x00000800,
>>   	0x78170009,
>> @@ -216,7 +240,7 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x00000000,
>>   	0x7820000a,
>> -	0x00000580,
>> +	0x000005c0,
>>   	0x00000000,
>>   	0x08080000,
>>   	0x00000000,
>> @@ -232,7 +256,7 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x784f0000,
>>   	0x80000100,
>>   	0x780f0000,
>> -	0x00000740,
>> +	0x00000780,
>>   	0x78050006,
>>   	0x00000000,
>>   	0x00000000,
>> @@ -260,7 +284,7 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x78080003,
>>   	0x00006000,
>> -	0x000005e0,	 /* reloc */
>> +	0x00000620,	 /* reloc */
>>   	0x00000000,
>>   	0x00000000,
>>   	0x78090005,
>> @@ -289,16 +313,8 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x00000000,
>>   	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x000004c0,	 /* state start */
>> -	0x00000500,
>> +	0x00000500,	 /* state start */
>> +	0x00000540,
>>   	0x00000000,
>>   	0x00000000,
>>   	0x00000000,
>> --
>> 2.0.4
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-08-08 13:11 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-08  9:52 [RFC] Add BDW workarounds to golden render state arun.siluvery
2014-08-08  9:52 ` [RFC] drm/i915/bdw: Apply workarounds to the " arun.siluvery
2014-08-08  9:57   ` Chris Wilson
2014-08-08 10:34     ` Siluvery, Arun
2014-08-08 10:39       ` Chris Wilson
2014-08-08 12:20   ` Ville Syrjälä
2014-08-08 13:11     ` Siluvery, Arun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.