* [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV
@ 2012-10-18 18:07 Jesse Barnes
2012-10-18 18:07 ` [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB Jesse Barnes
` (6 more replies)
0 siblings, 7 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
Needs to be set on every context restore as well, so set it as part of
the initial state so we can save/restore it. Note this removes the IVB
workaround value from VLV and uses the default value, just adding in the
L3 cache aging disable bit, since the IVB value is wrong for VLV.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 1 +
drivers/gpu/drm/i915/intel_pm.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c31ee5b..876ef96 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3449,6 +3449,7 @@
#define GEN7_L3CNTLREG1 0xB01C
#define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C4FFF8C
+#define GEN7_L3AGDIS (1<<19)
#define GEN7_L3_CHICKEN_MODE_REGISTER 0xB030
#define GEN7_WA_L3_CHICKEN_MODE 0x20000000
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2b3cddf..049c7e2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3613,7 +3613,7 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
- I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+ I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
/* WaForceL3Serialization */
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 3/8] drm/i915: implement WaForceL3Serialization " Jesse Barnes
` (5 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
v2: use correct register
v3: remove extra hunks, pull in register definitions & offset check directly
v4: add GT1 vs GT2 distinction for IVB portion (Ben)
References: https://bugs.freedesktop.org/show_bug.cgi?id=50233
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_drv.c | 7 +++++++
drivers/gpu/drm/i915/i915_drv.h | 3 +++
drivers/gpu/drm/i915/i915_reg.h | 4 ++++
drivers/gpu/drm/i915/intel_pm.c | 13 ++++++++++++-
4 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9e7e647..39282a4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1129,6 +1129,13 @@ static bool IS_DISPLAYREG(u32 reg)
if (reg == GEN6_GDRST)
return false;
+ switch (reg) {
+ case GEN7_ROW_CHICKEN2:
+ return false;
+ default:
+ break;
+ }
+
return true;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4728d30..beef67f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1128,6 +1128,9 @@ struct drm_i915_file_private {
#define IS_IRONLAKE_D(dev) ((dev)->pci_device == 0x0042)
#define IS_IRONLAKE_M(dev) ((dev)->pci_device == 0x0046)
#define IS_IVYBRIDGE(dev) (INTEL_INFO(dev)->is_ivybridge)
+#define IS_IVB_GT1(dev) ((dev)->pci_device == 0x0156 || \
+ (dev)->pci_device == 0x0152 || \
+ (dev)->pci_device == 0x015a)
#define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview)
#define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell)
#define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 876ef96..b07d309 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4266,6 +4266,10 @@
#define GEN7_L3LOG_BASE 0xB070
#define GEN7_L3LOG_SIZE 0x80
+#define GEN7_ROW_CHICKEN2 0xe4f4
+#define GEN7_ROW_CHICKEN2_GT2 0xf4f4
+#define DOP_CLOCK_GATING_DISABLE (1<<0)
+
#define G4X_AUD_VID_DID 0x62020
#define INTEL_AUDIO_DEVCL 0x808629FB
#define INTEL_AUDIO_DEVBLC 0x80862801
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 049c7e2..15328a3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3535,7 +3535,14 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN7_L3CNTLREG1,
GEN7_WA_FOR_GEN7_L3_CONTROL);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
- GEN7_WA_L3_CHICKEN_MODE);
+ GEN7_WA_L3_CHICKEN_MODE);
+ if (IS_IVB_GT1(dev))
+ I915_WRITE(GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ else
+ I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
/* WaForceL3Serialization */
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
@@ -3620,6 +3627,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+ /* WaDisableDopClockGating */
+ I915_WRITE(GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
/* This is required by WaCatErrorRejectionIssue */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/8] drm/i915: implement WaForceL3Serialization on VLV and IVB
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
2012-10-18 18:07 ` [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 4/8] drm/i915: implement WaDisableVLVClockGating_VBIIssue on VLV Jesse Barnes
` (4 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
References: https://bugs.freedesktop.org/show_bug.cgi?id=50250
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/intel_pm.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 15328a3..969687f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3631,6 +3631,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN7_ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ /* WaForceL3Serialization */
+ I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+ ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
/* This is required by WaCatErrorRejectionIssue */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/8] drm/i915: implement WaDisableVLVClockGating_VBIIssue on VLV
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
2012-10-18 18:07 ` [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB Jesse Barnes
2012-10-18 18:07 ` [PATCH 3/8] drm/i915: implement WaForceL3Serialization " Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 5/8] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB & VLV Jesse Barnes
` (3 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
This allows us to get the right vblank interrupt frequency.
v2: pull in register definition
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 2 ++
drivers/gpu/drm/i915/intel_pm.c | 7 +++++++
2 files changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b07d309..ecb28be 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -552,6 +552,8 @@
#define IIR 0x020a4
#define IMR 0x020a8
#define ISR 0x020ac
+#define VLV_GUNIT_CLOCK_GATE 0x182060
+#define GCFG_DIS (1<<8)
#define VLV_IIR_RW 0x182084
#define VLV_IER 0x1820a0
#define VLV_IIR 0x1820a4
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 969687f..4c86549 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3691,6 +3691,13 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
PIPEA_HLINE_INT_EN | PIPEA_VBLANK_INT_EN |
SPRITEB_FLIPDONE_INT_EN | SPRITEA_FLIPDONE_INT_EN |
PLANEA_FLIPDONE_INT_EN);
+
+ /*
+ * WaDisableVLVClockGating_VBIIssue
+ * Disable clock gating on th GCFG unit to prevent a delay
+ * in the reporting of vblank events.
+ */
+ I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
}
static void g4x_init_clock_gating(struct drm_device *dev)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/8] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB & VLV
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (2 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 4/8] drm/i915: implement WaDisableVLVClockGating_VBIIssue on VLV Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
` (2 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
Workaround for dual port PS dispatch on GT1.
v2: pull in register definition & offset handling
v3: use IVB GT1 macro to get the right regs (Ben)
v4: add for VLV too (Ben)
v5: don't read the reg, it's masked so we'll only enable the one extra bit (Chris)
v6: use a _GT2 suffix for the second reg (Chris)
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_drv.c | 1 +
drivers/gpu/drm/i915/i915_reg.h | 5 +++++
drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++++
3 files changed, 17 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 39282a4..6f03b26 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1131,6 +1131,7 @@ static bool IS_DISPLAYREG(u32 reg)
switch (reg) {
case GEN7_ROW_CHICKEN2:
+ case GEN7_HALF_SLICE_CHICKEN1:
return false;
default:
break;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ecb28be..34067b5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4268,6 +4268,11 @@
#define GEN7_L3LOG_BASE 0xB070
#define GEN7_L3LOG_SIZE 0x80
+#define GEN7_HALF_SLICE_CHICKEN1 0xe100 /* IVB GT1 + VLV */
+#define GEN7_HALF_SLICE_CHICKEN1_GT2 0xf100
+#define GEN7_MAX_PS_THREAD_DEP (8<<12)
+#define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1<<3)
+
#define GEN7_ROW_CHICKEN2 0xe4f4
#define GEN7_ROW_CHICKEN2_GT2 0xf4f4
#define DOP_CLOCK_GATING_DISABLE (1<<0)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4c86549..0c1b270 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3527,6 +3527,14 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
+ /* WaDisablePSDDualDispatchEnable */
+ if (IS_IVB_GT1(dev))
+ I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+ else
+ I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
+ _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -3615,6 +3623,9 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
+ I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (3 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 5/8] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB & VLV Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-23 11:22 ` Chris Wilson
2012-10-18 18:07 ` [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
2012-10-18 18:07 ` [PATCH 8/8] drm/i915: add clock gating regs to VLV offset check function Jesse Barnes
6 siblings, 1 reply; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
So store into the scratch space of the HWS to make sure the invalidate
occurs.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 6 ++++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++++++++++++++++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
3 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 34067b5..c6f63a4 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -241,8 +241,10 @@
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
-#define MI_INVALIDATE_TLB (1<<18)
-#define MI_INVALIDATE_BSD (1<<7)
+#define MI_FLUSH_DW_STORE_INDEX (1<<21)
+#define MI_INVALIDATE_TLB (1<<18)
+#define MI_FLUSH_DW_OP_STOREDW (1<<14)
+#define MI_INVALIDATE_BSD (1<<7)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6c6f95a..e7daa90 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1396,10 +1396,17 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
return ret;
cmd = MI_FLUSH_DW;
+ /*
+ * Bspec vol 1c.5 - video engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
if (invalidate & I915_GEM_GPU_DOMAINS)
- cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+ cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+ MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
@@ -1461,10 +1468,17 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
return ret;
cmd = MI_FLUSH_DW;
+ /*
+ * Bspec vol 1c.3 - blitter engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
if (invalidate & I915_GEM_DOMAIN_RENDER)
- cmd |= MI_INVALIDATE_TLB;
+ cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+ MI_FLUSH_DW_OP_STOREDW;
intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3745d1d..d089520 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -183,6 +183,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
* The area from dword 0x20 to 0x3ff is available for driver usage.
*/
#define I915_GEM_HWS_INDEX 0x20
+#define I915_GEM_HWS_SCRATCH_INDEX 0x28
void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (4 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-23 11:42 ` Chris Wilson
2012-10-18 18:07 ` [PATCH 8/8] drm/i915: add clock gating regs to VLV offset check function Jesse Barnes
6 siblings, 1 reply; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
"If ENABLED, PIPE_CONTROL command will flush the in flight data written
out by render engine to Global Observation point on flush done. Also
Requires stall bit ([20] of DW1) set."
So set the stall bit to ensure proper invalidation.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e7daa90..1e09c62 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -246,7 +246,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
/*
* TLB invalidate requires a post-sync write.
*/
- flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
}
ret = intel_ring_begin(ring, 4);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 8/8] drm/i915: add clock gating regs to VLV offset check function
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (5 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
6 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
So we can write them properly.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_drv.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6f03b26..39c53ad 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1130,8 +1130,17 @@ static bool IS_DISPLAYREG(u32 reg)
return false;
switch (reg) {
+ case _3D_CHICKEN3:
+ case IVB_CHICKEN3:
+ case GEN7_COMMON_SLICE_CHICKEN1:
+ case GEN7_L3CNTLREG1:
+ case GEN7_L3_CHICKEN_MODE_REGISTER:
case GEN7_ROW_CHICKEN2:
+ case GEN7_L3SQCREG4:
+ case GEN7_SQ_CHICKEN_MBCUNIT_CONFIG:
case GEN7_HALF_SLICE_CHICKEN1:
+ case GEN6_MBCTL:
+ case GEN6_UCGCTL2:
return false;
default:
break;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
2012-10-18 18:07 ` [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
@ 2012-10-23 11:22 ` Chris Wilson
2012-10-23 14:28 ` Jesse Barnes
0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2012-10-23 11:22 UTC (permalink / raw)
To: Jesse Barnes, intel-gfx
On Thu, 18 Oct 2012 13:07:17 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> So store into the scratch space of the HWS to make sure the invalidate
> occurs.
Whoops, instant hang. Probably doesn't agree with being called FLUSH_SW
and not FLUSH_DW! ;-)
> + /*
> + * Bspec vol 1c.5 - video engine command streamer:
> + * "If ENABLED, all TLBs will be invalidated once the flush
> + * operation is complete. This bit is only valid when the
> + * Post-Sync Operation field is a value of 1h or 3h."
> + */
> if (invalidate & I915_GEM_GPU_DOMAINS)
> - cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
> + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
> + MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
> intel_ring_emit(ring, cmd);
> - intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
And here is where the error lies. Perhaps this would be clearer if you
do:
#define MI_FLUSH_DW_USE_PPGTT 0
#define MI_FLUSH_DW_USE_GTT (1<<2)
#define I915_GEM_HWS_SCRATCH_INDEX 0x30
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
Then:
intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
Hangs begone!
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall
2012-10-18 18:07 ` [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
@ 2012-10-23 11:42 ` Chris Wilson
2012-10-25 18:28 ` Jesse Barnes
0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2012-10-23 11:42 UTC (permalink / raw)
To: Jesse Barnes, intel-gfx
On Thu, 18 Oct 2012 13:07:18 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> "If ENABLED, PIPE_CONTROL command will flush the in flight data written
> out by render engine to Global Observation point on flush done. Also
> Requires stall bit ([20] of DW1) set."
That quotation doesn't make sense in the context of TLB invalidation,
and the programming guide here very carefully avoids the mention of
requiring any stall bit set for the post-sync op of TLB invalidation.
Maybe quote chapter and verse as well?
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
2012-10-23 11:22 ` Chris Wilson
@ 2012-10-23 14:28 ` Jesse Barnes
0 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-23 14:28 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, 23 Oct 2012 12:22:16 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Thu, 18 Oct 2012 13:07:17 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > So store into the scratch space of the HWS to make sure the invalidate
> > occurs.
>
> Whoops, instant hang. Probably doesn't agree with being called FLUSH_SW
> and not FLUSH_DW! ;-)
>
> > + /*
> > + * Bspec vol 1c.5 - video engine command streamer:
> > + * "If ENABLED, all TLBs will be invalidated once the flush
> > + * operation is complete. This bit is only valid when the
> > + * Post-Sync Operation field is a value of 1h or 3h."
> > + */
> > if (invalidate & I915_GEM_GPU_DOMAINS)
> > - cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
> > + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
> > + MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
> > intel_ring_emit(ring, cmd);
> > - intel_ring_emit(ring, 0);
> > + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
> And here is where the error lies. Perhaps this would be clearer if you
> do:
>
> #define MI_FLUSH_DW_USE_PPGTT 0
> #define MI_FLUSH_DW_USE_GTT (1<<2)
>
> #define I915_GEM_HWS_SCRATCH_INDEX 0x30
> #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>
> Then:
> intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
>
> Hangs begone!
Ah cool, was hoping it was something simple. Damn PPGTT vs GTT always
gets us.
I'll respin with the change.
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall
2012-10-23 11:42 ` Chris Wilson
@ 2012-10-25 18:28 ` Jesse Barnes
0 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-25 18:28 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, 23 Oct 2012 12:42:07 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Thu, 18 Oct 2012 13:07:18 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > "If ENABLED, PIPE_CONTROL command will flush the in flight data written
> > out by render engine to Global Observation point on flush done. Also
> > Requires stall bit ([20] of DW1) set."
>
> That quotation doesn't make sense in the context of TLB invalidation,
> and the programming guide here very carefully avoids the mention of
> requiring any stall bit set for the post-sync op of TLB invalidation.
>
> Maybe quote chapter and verse as well?
I thought the "Also Requires stall bit ([20] of DW1) set." was pretty
clear?
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV
@ 2012-10-25 19:15 Jesse Barnes
0 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-25 19:15 UTC (permalink / raw)
To: intel-gfx
Needs to be set on every context restore as well, so set it as part of
the initial state so we can save/restore it. Note this removes the IVB
workaround value from VLV and uses the default value, just adding in the
L3 cache aging disable bit, since the IVB value is wrong for VLV.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 1 +
drivers/gpu/drm/i915/intel_pm.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index be22aeb..5da227b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3449,6 +3449,7 @@
#define GEN7_L3CNTLREG1 0xB01C
#define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C4FFF8C
+#define GEN7_L3AGDIS (1<<19)
#define GEN7_L3_CHICKEN_MODE_REGISTER 0xB030
#define GEN7_WA_L3_CHICKEN_MODE 0x20000000
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 59068be..d4ddcf2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3635,7 +3635,7 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
- I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+ I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
/* WaForceL3Serialization */
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV
@ 2012-10-18 15:43 Jesse Barnes
0 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-18 15:43 UTC (permalink / raw)
To: intel-gfx
Needs to be set on every context restore as well, so set it as part of
the initial state so we can save/restore it.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 1 +
drivers/gpu/drm/i915/intel_pm.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c31ee5b..876ef96 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3449,6 +3449,7 @@
#define GEN7_L3CNTLREG1 0xB01C
#define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C4FFF8C
+#define GEN7_L3AGDIS (1<<19)
#define GEN7_L3_CHICKEN_MODE_REGISTER 0xB030
#define GEN7_WA_L3_CHICKEN_MODE 0x20000000
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2b3cddf..049c7e2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3613,7 +3613,7 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
- I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+ I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
/* WaForceL3Serialization */
--
1.7.9.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2012-10-25 19:15 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
2012-10-18 18:07 ` [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB Jesse Barnes
2012-10-18 18:07 ` [PATCH 3/8] drm/i915: implement WaForceL3Serialization " Jesse Barnes
2012-10-18 18:07 ` [PATCH 4/8] drm/i915: implement WaDisableVLVClockGating_VBIIssue on VLV Jesse Barnes
2012-10-18 18:07 ` [PATCH 5/8] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB & VLV Jesse Barnes
2012-10-18 18:07 ` [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
2012-10-23 11:22 ` Chris Wilson
2012-10-23 14:28 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
2012-10-23 11:42 ` Chris Wilson
2012-10-25 18:28 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 8/8] drm/i915: add clock gating regs to VLV offset check function Jesse Barnes
-- strict thread matches above, loose matches on Subject: below --
2012-10-25 19:15 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
2012-10-18 15:43 Jesse Barnes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).