All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: swizzling support for snb/ivb
@ 2012-01-31 15:47 Daniel Vetter
  2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-01-31 15:47 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter, Ben Widawsky

We have to do this manually. Somebody had a Great Idea.

I've measured speed-ups just a few percent above the noise level
(below 5% for the best case), but no slowdows. Chris Wilson measured
quite a bit more (10-20% above the usual snb variance) on a more
recent and better tuned version of sna, but also recorded a few
slow-downs on benchmarks know for uglier amounts of snb-induced
variance.

v2: Incorporate Ben Widawsky's preliminary review comments and
elaborate a bit about the performance impact in the changelog.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c        |    2 +-
 drivers/gpu/drm/i915/i915_drv.c        |    4 ++-
 drivers/gpu/drm/i915/i915_drv.h        |    3 +-
 drivers/gpu/drm/i915/i915_gem.c        |   23 +++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_tiling.c |   16 +++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h        |   34 ++++++++++++++++++++++++++++++++
 6 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3f27173..dfef956 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
 	i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
 
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	mutex_unlock(&dev->struct_mutex);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1658cfd..12ddf47 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 		mutex_lock(&dev->struct_mutex);
 		dev_priv->mm.suspended = 0;
 
-		error = i915_gem_init_ringbuffer(dev);
+		error = i915_gem_init_hw(dev);
 		mutex_unlock(&dev->struct_mutex);
 
 		if (HAS_PCH_SPLIT(dev))
@@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
 			!dev_priv->mm.suspended) {
 		dev_priv->mm.suspended = 0;
 
+		i915_gem_init_swizzling(dev);
+
 		dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
 		if (HAS_BSD(dev))
 		    dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 865de80..0845419 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
 					    uint32_t read_domains,
 					    uint32_t write_domain);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
+int __must_check i915_gem_init_hw(struct drm_device *dev);
+void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 void i915_gem_do_init(struct drm_device *dev,
 		      unsigned long start,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 51a2b0c..86fffd2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
+void i915_gem_init_swizzling(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen < 6 ||
+	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+		return;
+
+	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+				 DISP_TILE_SURFACE_SWIZZLING);
+
+	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
+	if (IS_GEN6(dev))
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
+	else
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
+}
 int
-i915_gem_init_ringbuffer(struct drm_device *dev)
+i915_gem_init_hw(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
+	i915_gem_init_swizzling(dev);
+
 	ret = intel_init_render_ring_buffer(dev);
 	if (ret)
 		return ret;
@@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&dev->struct_mutex);
 	dev_priv->mm.suspended = 0;
 
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	if (ret != 0) {
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 861223b..acf89fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -93,8 +93,20 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		uint32_t dimm_c0, dimm_c1;
+		dimm_c0 = I915_READ(MAD_DIMM_C0);
+		dimm_c1 = I915_READ(MAD_DIMM_C1);
+		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		/* Enable swizzling when the channels are populated with
+		 * identically sized dimms. */
+		if (dimm_c0 == dimm_c1) {
+			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+			swizzle_y = I915_BIT_6_SWIZZLE_9;
+		} else {
+			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		}
 	} else if (IS_GEN5(dev)) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f960738..539ef90 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -295,6 +295,12 @@
 #define FENCE_REG_SANDYBRIDGE_0		0x100000
 #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
 
+/* control register for cpu gtt access */
+#define TILECTL				0x101000
+#define   TILECTL_SWZCTL			(1 << 0)
+#define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
+#define   TILECTL_BACKSNOOP_DIS		(1 << 3)
+
 /*
  * Instruction and interrupt control regs
  */
@@ -318,6 +324,11 @@
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
+#define ARB_MODE		0x04030
+#define   ARB_MODE_SWIZZLE_SNB	(1<<4)
+#define   ARB_MODE_SWIZZLE_IVB	(1<<5)
+#define   ARB_MODE_ENABLE(x)	GFX_MODE_ENABLE(x)
+#define   ARB_MODE_DISABLE(x)	GFX_MODE_DISABLE(x)
 #define RENDER_HWS_PGA_GEN7	(0x04080)
 #define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
 #define DONE_REG		0x40b0
@@ -1037,6 +1048,29 @@
 #define C0DRB3			0x10206
 #define C1DRB3			0x10606
 
+/** snb MCH registers for reading the DRAM channel configuration */
+#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define   MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define   MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
+#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
+#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
+#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
+#define   MAD_DIMM_ECC_ON		(0x3 << 24)
+#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
+#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
+#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
+#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
+#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
+#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
+#define   MAD_DIMM_A_SELECT		(0x1 << 16)
+/* DIMM sizes are in multiples of 256mb. */
+#define   MAD_DIMM_B_SIZE_SHIFT		8
+#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
+#define   MAD_DIMM_A_SIZE_SHIFT		0
+#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
+
+
 /* Clocking configuration register */
 #define CLKCFG			0x10c00
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
-- 
1.7.7.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing
  2012-01-31 15:47 [PATCH 1/3] drm/i915: swizzling support for snb/ivb Daniel Vetter
@ 2012-01-31 15:47 ` Daniel Vetter
  2012-02-01 21:37   ` Ben Widawsky
  2012-02-08 22:17   ` Ben Widawsky
  2012-01-31 15:47 ` [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info Daniel Vetter
  2012-02-01 21:35 ` [PATCH 1/3] drm/i915: swizzling support for snb/ivb Ben Widawsky
  2 siblings, 2 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-01-31 15:47 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter, Ben Widawsky

On gen5 we also need to correctly set up swizzling in the display
scanout engine, but only there. Consolidate this into the same
function.

This has a small effect on ums setups - the kernel now also sets this
bit in addition to userspace setting it. Given that this code only
runs when userspace either can't (resume, gpu reset) or explicitly
won't(gem_init) touch the hw this shouldn't have an adverse effect.

Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c      |    5 ++++-
 drivers/gpu/drm/i915/intel_display.c |    6 ------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 86fffd2..27fe07a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3685,13 +3685,16 @@ void i915_gem_init_swizzling(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
-	if (INTEL_INFO(dev)->gen < 6 ||
+	if (INTEL_INFO(dev)->gen < 5 ||
 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
 		return;
 
 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
 				 DISP_TILE_SURFACE_SWIZZLING);
 
+	if (IS_GEN5(dev))
+		return;
+
 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
 	if (IS_GEN6(dev))
 		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fc9bc19..5ab967c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6029,12 +6029,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 
 	intel_wait_for_vblank(dev, pipe);
 
-	if (IS_GEN5(dev)) {
-		/* enable address swizzle for tiling buffer */
-		temp = I915_READ(DISP_ARB_CTL);
-		I915_WRITE(DISP_ARB_CTL, temp | DISP_TILE_SURFACE_SWIZZLING);
-	}
-
 	I915_WRITE(DSPCNTR(plane), dspcntr);
 	POSTING_READ(DSPCNTR(plane));
 
-- 
1.7.7.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info
  2012-01-31 15:47 [PATCH 1/3] drm/i915: swizzling support for snb/ivb Daniel Vetter
  2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
@ 2012-01-31 15:47 ` Daniel Vetter
  2012-02-01 21:39   ` Ben Widawsky
  2012-02-01 21:35 ` [PATCH 1/3] drm/i915: swizzling support for snb/ivb Ben Widawsky
  2 siblings, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2012-01-31 15:47 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter, Ben Widawsky

Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   13 +++++++++++++
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 681cbe4..4ebca6d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1422,6 +1422,19 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 			   I915_READ16(C0DRB3));
 		seq_printf(m, "C1DRB3 = 0x%04x\n",
 			   I915_READ16(C1DRB3));
+	} else if (IS_GEN6(dev) || IS_GEN7(dev)) {
+		seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
+			   I915_READ(MAD_DIMM_C0));
+		seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n",
+			   I915_READ(MAD_DIMM_C1));
+		seq_printf(m, "MAD_DIMM_C2 = 0x%08x\n",
+			   I915_READ(MAD_DIMM_C2));
+		seq_printf(m, "TILECTL = 0x%08x\n",
+			   I915_READ(TILECTL));
+		seq_printf(m, "ARB_MODE = 0x%08x\n",
+			   I915_READ(ARB_MODE));
+		seq_printf(m, "DISP_ARB_CTL = 0x%08x\n",
+			   I915_READ(DISP_ARB_CTL));
 	}
 	mutex_unlock(&dev->struct_mutex);
 
-- 
1.7.7.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] drm/i915: swizzling support for snb/ivb
  2012-01-31 15:47 [PATCH 1/3] drm/i915: swizzling support for snb/ivb Daniel Vetter
  2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
  2012-01-31 15:47 ` [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info Daniel Vetter
@ 2012-02-01 21:35 ` Ben Widawsky
  2012-02-01 22:16   ` Daniel Vetter
  2 siblings, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2012-02-01 21:35 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel Graphics Development

On 01/31/2012 07:47 AM, Daniel Vetter wrote:
> We have to do this manually. Somebody had a Great Idea.
> 
> I've measured speed-ups just a few percent above the noise level
> (below 5% for the best case), but no slowdows. Chris Wilson measured
> quite a bit more (10-20% above the usual snb variance) on a more
> recent and better tuned version of sna, but also recorded a few
> slow-downs on benchmarks know for uglier amounts of snb-induced
> variance.
> 
> v2: Incorporate Ben Widawsky's preliminary review comments and
> elaborate a bit about the performance impact in the changelog.
> 
> Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

You didn't address one questions I really cared about, how is it safe to
ignore channel 3 size? While I'm at it, I wonder what is in these
registers if you have less than 256MB. If the answer is zero, then your
check isn't safe enough below.

As an aside, this will potentially break our simulation environment, but
that's environment fail.

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    2 +-
>  drivers/gpu/drm/i915/i915_drv.c        |    4 ++-
>  drivers/gpu/drm/i915/i915_drv.h        |    3 +-
>  drivers/gpu/drm/i915/i915_gem.c        |   23 +++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem_tiling.c |   16 +++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h        |   34 ++++++++++++++++++++++++++++++++
>  6 files changed, 75 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 3f27173..dfef956 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
>  	i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
>  
>  	mutex_lock(&dev->struct_mutex);
> -	ret = i915_gem_init_ringbuffer(dev);
> +	ret = i915_gem_init_hw(dev);
>  	mutex_unlock(&dev->struct_mutex);
>  	if (ret)
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 1658cfd..12ddf47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
>  		mutex_lock(&dev->struct_mutex);
>  		dev_priv->mm.suspended = 0;
>  
> -		error = i915_gem_init_ringbuffer(dev);
> +		error = i915_gem_init_hw(dev);
>  		mutex_unlock(&dev->struct_mutex);
>  
>  		if (HAS_PCH_SPLIT(dev))
> @@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
>  			!dev_priv->mm.suspended) {
>  		dev_priv->mm.suspended = 0;
>  
> +		i915_gem_init_swizzling(dev);
> +
>  		dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
>  		if (HAS_BSD(dev))
>  		    dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 865de80..0845419 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
>  					    uint32_t read_domains,
>  					    uint32_t write_domain);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
> -int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
> +int __must_check i915_gem_init_hw(struct drm_device *dev);
> +void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  void i915_gem_do_init(struct drm_device *dev,
>  		      unsigned long start,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 51a2b0c..86fffd2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
>  	return 0;
>  }
>  
> +void i915_gem_init_swizzling(struct drm_device *dev)
> +{
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +
> +	if (INTEL_INFO(dev)->gen < 6 ||
> +	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
> +		return;
> +
> +	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
> +				 DISP_TILE_SURFACE_SWIZZLING);
> +
> +	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
> +	if (IS_GEN6(dev))
> +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
> +	else
> +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
> +}
>  int
> -i915_gem_init_ringbuffer(struct drm_device *dev)
> +i915_gem_init_hw(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	int ret;
>  
> +	i915_gem_init_swizzling(dev);
> +
>  	ret = intel_init_render_ring_buffer(dev);
>  	if (ret)
>  		return ret;
> @@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
>  	mutex_lock(&dev->struct_mutex);
>  	dev_priv->mm.suspended = 0;
>  
> -	ret = i915_gem_init_ringbuffer(dev);
> +	ret = i915_gem_init_hw(dev);
>  	if (ret != 0) {
>  		mutex_unlock(&dev->struct_mutex);
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 861223b..acf89fe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -93,8 +93,20 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
>  	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
>  
>  	if (INTEL_INFO(dev)->gen >= 6) {
> -		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> -		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +		uint32_t dimm_c0, dimm_c1;
> +		dimm_c0 = I915_READ(MAD_DIMM_C0);
> +		dimm_c1 = I915_READ(MAD_DIMM_C1);
> +		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +		/* Enable swizzling when the channels are populated with
> +		 * identically sized dimms. */
> +		if (dimm_c0 == dimm_c1) {
> +			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> +			swizzle_y = I915_BIT_6_SWIZZLE_9;
> +		} else {
> +			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> +			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +		}
>  	} else if (IS_GEN5(dev)) {
>  		/* On Ironlake whatever DRAM config, GPU always do
>  		 * same swizzling setup.
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f960738..539ef90 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -295,6 +295,12 @@
>  #define FENCE_REG_SANDYBRIDGE_0		0x100000
>  #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
>  
> +/* control register for cpu gtt access */
> +#define TILECTL				0x101000
> +#define   TILECTL_SWZCTL			(1 << 0)
> +#define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
> +#define   TILECTL_BACKSNOOP_DIS		(1 << 3)
> +
>  /*
>   * Instruction and interrupt control regs
>   */
> @@ -318,6 +324,11 @@
>  #define RING_MAX_IDLE(base)	((base)+0x54)
>  #define RING_HWS_PGA(base)	((base)+0x80)
>  #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
> +#define ARB_MODE		0x04030
> +#define   ARB_MODE_SWIZZLE_SNB	(1<<4)
> +#define   ARB_MODE_SWIZZLE_IVB	(1<<5)
> +#define   ARB_MODE_ENABLE(x)	GFX_MODE_ENABLE(x)
> +#define   ARB_MODE_DISABLE(x)	GFX_MODE_DISABLE(x)
>  #define RENDER_HWS_PGA_GEN7	(0x04080)
>  #define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
>  #define DONE_REG		0x40b0
> @@ -1037,6 +1048,29 @@
>  #define C0DRB3			0x10206
>  #define C1DRB3			0x10606
>  
> +/** snb MCH registers for reading the DRAM channel configuration */
> +#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
> +#define   MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
> +#define   MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
> +#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
> +#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
> +#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
> +#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
> +#define   MAD_DIMM_ECC_ON		(0x3 << 24)
> +#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
> +#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
> +#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
> +#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
> +#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
> +#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
> +#define   MAD_DIMM_A_SELECT		(0x1 << 16)
> +/* DIMM sizes are in multiples of 256mb. */
> +#define   MAD_DIMM_B_SIZE_SHIFT		8
> +#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
> +#define   MAD_DIMM_A_SIZE_SHIFT		0
> +#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
> +
> +

Is that a whitespace error for MAD_DIMM_C1 and MAD_DIMM_C2?

>  /* Clocking configuration register */
>  #define CLKCFG			0x10c00
>  #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing
  2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
@ 2012-02-01 21:37   ` Ben Widawsky
  2012-02-01 22:23     ` Daniel Vetter
  2012-02-08 22:17   ` Ben Widawsky
  1 sibling, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2012-02-01 21:37 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel Graphics Development

On Tue, Jan 31, 2012 at 04:47:55PM +0100, Daniel Vetter wrote:
> On gen5 we also need to correctly set up swizzling in the display
> scanout engine, but only there. Consolidate this into the same
> function.
> 
> This has a small effect on ums setups - the kernel now also sets this
> bit in addition to userspace setting it. Given that this code only
> runs when userspace either can't (resume, gpu reset) or explicitly
> won't(gem_init) touch the hw this shouldn't have an adverse effect.
> 
> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---

Have you tested this well?
From bspec:
Note: For ILK Display Engine can(?) implement the A[6] swizzling
structure however it will not be used – corresponding bits will be set
to “00”
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info
  2012-01-31 15:47 ` [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info Daniel Vetter
@ 2012-02-01 21:39   ` Ben Widawsky
  2012-02-08 22:20     ` Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2012-02-01 21:39 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel Graphics Development

On Tue, Jan 31, 2012 at 04:47:56PM +0100, Daniel Vetter wrote:
> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |   13 +++++++++++++
>  1 files changed, 13 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 681cbe4..4ebca6d 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1422,6 +1422,19 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
>  			   I915_READ16(C0DRB3));
>  		seq_printf(m, "C1DRB3 = 0x%04x\n",
>  			   I915_READ16(C1DRB3));
> +	} else if (IS_GEN6(dev) || IS_GEN7(dev)) {
> +		seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
> +			   I915_READ(MAD_DIMM_C0));
> +		seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n",
> +			   I915_READ(MAD_DIMM_C1));
> +		seq_printf(m, "MAD_DIMM_C2 = 0x%08x\n",
> +			   I915_READ(MAD_DIMM_C2));
> +		seq_printf(m, "TILECTL = 0x%08x\n",
> +			   I915_READ(TILECTL));
> +		seq_printf(m, "ARB_MODE = 0x%08x\n",
> +			   I915_READ(ARB_MODE));
> +		seq_printf(m, "DISP_ARB_CTL = 0x%08x\n",
> +			   I915_READ(DISP_ARB_CTL));
>  	}
>  	mutex_unlock(&dev->struct_mutex);
>  
> -- 
> 1.7.7.5
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] drm/i915: swizzling support for snb/ivb
  2012-02-01 21:35 ` [PATCH 1/3] drm/i915: swizzling support for snb/ivb Ben Widawsky
@ 2012-02-01 22:16   ` Daniel Vetter
  2012-02-01 22:26     ` Chris Wilson
  0 siblings, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2012-02-01 22:16 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, Intel Graphics Development

On Wed, Feb 01, 2012 at 01:35:14PM -0800, Ben Widawsky wrote:
> On 01/31/2012 07:47 AM, Daniel Vetter wrote:
> > We have to do this manually. Somebody had a Great Idea.
> > 
> > I've measured speed-ups just a few percent above the noise level
> > (below 5% for the best case), but no slowdows. Chris Wilson measured
> > quite a bit more (10-20% above the usual snb variance) on a more
> > recent and better tuned version of sna, but also recorded a few
> > slow-downs on benchmarks know for uglier amounts of snb-induced
> > variance.
> > 
> > v2: Incorporate Ben Widawsky's preliminary review comments and
> > elaborate a bit about the performance impact in the changelog.
> > 
> > Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> You didn't address one questions I really cared about, how is it safe to
> ignore channel 3 size? While I'm at it, I wonder what is in these
> registers if you have less than 256MB. If the answer is zero, then your
> check isn't safe enough below.

Hm, I've thought I've answered that in the mail to your review: 3 channel
ddr configurations only exists on i7 chips without a gpu attached.
Furthermore swizzling is only sensible when we have 2 channels anyway.

For the other issue, I suspect 256mb is simply the smallest dimm you can
buy for ddr3 - I don't have the spec for that though, but the smallest
dimm my local supplier sells is 512mb, anyway ;-)

> As an aside, this will potentially break our simulation environment, but
> that's environment fail.

I think we can quirk that by detecting has or something like that ...

Cheers, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing
  2012-02-01 21:37   ` Ben Widawsky
@ 2012-02-01 22:23     ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-02-01 22:23 UTC (permalink / raw)
  To: Daniel Vetter, Intel Graphics Development

On Wed, Feb 01, 2012 at 01:37:29PM -0800, Ben Widawsky wrote:
> On Tue, Jan 31, 2012 at 04:47:55PM +0100, Daniel Vetter wrote:
> > On gen5 we also need to correctly set up swizzling in the display
> > scanout engine, but only there. Consolidate this into the same
> > function.
> > 
> > This has a small effect on ums setups - the kernel now also sets this
> > bit in addition to userspace setting it. Given that this code only
> > runs when userspace either can't (resume, gpu reset) or explicitly
> > won't(gem_init) touch the hw this shouldn't have an adverse effect.
> > 
> > Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > ---
> 
> Have you tested this well?
> From bspec:
> Note: For ILK Display Engine can(?) implement the A[6] swizzling
> structure however it will not be used – corresponding bits will be set
> to “00”

I've simply moved the check around and my and checked whether I haven't
broken things. But resetting that bit seriously creates swizzling havoc on
my screen, so I think it's required ;-)

Cheers, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] drm/i915: swizzling support for snb/ivb
  2012-02-01 22:16   ` Daniel Vetter
@ 2012-02-01 22:26     ` Chris Wilson
  2012-02-01 23:15       ` [PATCH] " Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2012-02-01 22:26 UTC (permalink / raw)
  To: Daniel Vetter, Ben Widawsky; +Cc: Daniel Vetter, Intel Graphics Development

On Wed, 1 Feb 2012 23:16:19 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Wed, Feb 01, 2012 at 01:35:14PM -0800, Ben Widawsky wrote:
> > You didn't address one questions I really cared about, how is it safe to
> > ignore channel 3 size? While I'm at it, I wonder what is in these
> > registers if you have less than 256MB. If the answer is zero, then your
> > check isn't safe enough below.
> 
> Hm, I've thought I've answered that in the mail to your review: 3 channel
> ddr configurations only exists on i7 chips without a gpu attached.
> Furthermore swizzling is only sensible when we have 2 channels anyway.

It almost always sensible to leave a comment behind in the code to
address review questions. What may not appear immediately obvious to
another person is unlikely to occur to anyone perusing the code 18+
months later. Didn't future Daniel warn you about that when he travelled
back from December 2012? :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-01 22:26     ` Chris Wilson
@ 2012-02-01 23:15       ` Daniel Vetter
  2012-02-02  5:30         ` Ben Widawsky
  0 siblings, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2012-02-01 23:15 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter, Ben Widawsky

We have to do this manually. Somebody had a Great Idea.

I've measured speed-ups just a few percent above the noise level
(below 5% for the best case), but no slowdows. Chris Wilson measured
quite a bit more (10-20% above the usual snb variance) on a more
recent and better tuned version of sna, but also recorded a few
slow-downs on benchmarks know for uglier amounts of snb-induced
variance.

v2: Incorporate Ben Widawsky's preliminary review comments and
elaborate a bit about the performance impact in the changelog.

v3: Add a comment as to why we don't need to check the 3rd memory
channel.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c        |    2 +-
 drivers/gpu/drm/i915/i915_drv.c        |    4 ++-
 drivers/gpu/drm/i915/i915_drv.h        |    3 +-
 drivers/gpu/drm/i915/i915_gem.c        |   23 +++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_tiling.c |   19 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h        |   34 ++++++++++++++++++++++++++++++++
 6 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3f27173..dfef956 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
 	i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
 
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	mutex_unlock(&dev->struct_mutex);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1658cfd..12ddf47 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 		mutex_lock(&dev->struct_mutex);
 		dev_priv->mm.suspended = 0;
 
-		error = i915_gem_init_ringbuffer(dev);
+		error = i915_gem_init_hw(dev);
 		mutex_unlock(&dev->struct_mutex);
 
 		if (HAS_PCH_SPLIT(dev))
@@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
 			!dev_priv->mm.suspended) {
 		dev_priv->mm.suspended = 0;
 
+		i915_gem_init_swizzling(dev);
+
 		dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
 		if (HAS_BSD(dev))
 		    dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 865de80..0845419 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
 					    uint32_t read_domains,
 					    uint32_t write_domain);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
+int __must_check i915_gem_init_hw(struct drm_device *dev);
+void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 void i915_gem_do_init(struct drm_device *dev,
 		      unsigned long start,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 51a2b0c..86fffd2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
+void i915_gem_init_swizzling(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen < 6 ||
+	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+		return;
+
+	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+				 DISP_TILE_SURFACE_SWIZZLING);
+
+	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
+	if (IS_GEN6(dev))
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
+	else
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
+}
 int
-i915_gem_init_ringbuffer(struct drm_device *dev)
+i915_gem_init_hw(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
+	i915_gem_init_swizzling(dev);
+
 	ret = intel_init_render_ring_buffer(dev);
 	if (ret)
 		return ret;
@@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&dev->struct_mutex);
 	dev_priv->mm.suspended = 0;
 
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	if (ret != 0) {
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 861223b..1a93066 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		uint32_t dimm_c0, dimm_c1;
+		dimm_c0 = I915_READ(MAD_DIMM_C0);
+		dimm_c1 = I915_READ(MAD_DIMM_C1);
+		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		/* Enable swizzling when the channels are populated with
+		 * identically sized dimms. We don't need to check the 3rd
+		 * channel because no cpu with gpu attached ships in that
+		 * configuration. Also, swizzling only makes sense for 2
+		 * channels anyway. */
+		if (dimm_c0 == dimm_c1) {
+			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+			swizzle_y = I915_BIT_6_SWIZZLE_9;
+		} else {
+			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		}
 	} else if (IS_GEN5(dev)) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f960738..539ef90 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -295,6 +295,12 @@
 #define FENCE_REG_SANDYBRIDGE_0		0x100000
 #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
 
+/* control register for cpu gtt access */
+#define TILECTL				0x101000
+#define   TILECTL_SWZCTL			(1 << 0)
+#define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
+#define   TILECTL_BACKSNOOP_DIS		(1 << 3)
+
 /*
  * Instruction and interrupt control regs
  */
@@ -318,6 +324,11 @@
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
+#define ARB_MODE		0x04030
+#define   ARB_MODE_SWIZZLE_SNB	(1<<4)
+#define   ARB_MODE_SWIZZLE_IVB	(1<<5)
+#define   ARB_MODE_ENABLE(x)	GFX_MODE_ENABLE(x)
+#define   ARB_MODE_DISABLE(x)	GFX_MODE_DISABLE(x)
 #define RENDER_HWS_PGA_GEN7	(0x04080)
 #define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
 #define DONE_REG		0x40b0
@@ -1037,6 +1048,29 @@
 #define C0DRB3			0x10206
 #define C1DRB3			0x10606
 
+/** snb MCH registers for reading the DRAM channel configuration */
+#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define   MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define   MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
+#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
+#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
+#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
+#define   MAD_DIMM_ECC_ON		(0x3 << 24)
+#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
+#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
+#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
+#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
+#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
+#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
+#define   MAD_DIMM_A_SELECT		(0x1 << 16)
+/* DIMM sizes are in multiples of 256mb. */
+#define   MAD_DIMM_B_SIZE_SHIFT		8
+#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
+#define   MAD_DIMM_A_SIZE_SHIFT		0
+#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
+
+
 /* Clocking configuration register */
 #define CLKCFG			0x10c00
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
-- 
1.7.7.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-01 23:15       ` [PATCH] " Daniel Vetter
@ 2012-02-02  5:30         ` Ben Widawsky
  2012-02-02  8:40           ` Daniel Vetter
  2012-02-02  8:58           ` Daniel Vetter
  0 siblings, 2 replies; 22+ messages in thread
From: Ben Widawsky @ 2012-02-02  5:30 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel Graphics Development

On Thu, Feb 02, 2012 at 12:15:38AM +0100, Daniel Vetter wrote:
> We have to do this manually. Somebody had a Great Idea.
> 
> I've measured speed-ups just a few percent above the noise level
> (below 5% for the best case), but no slowdows. Chris Wilson measured
> quite a bit more (10-20% above the usual snb variance) on a more
> recent and better tuned version of sna, but also recorded a few
> slow-downs on benchmarks know for uglier amounts of snb-induced
> variance.
> 
> v2: Incorporate Ben Widawsky's preliminary review comments and
> elaborate a bit about the performance impact in the changelog.
> 
> v3: Add a comment as to why we don't need to check the 3rd memory
> channel.
> 
> Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    2 +-
>  drivers/gpu/drm/i915/i915_drv.c        |    4 ++-
>  drivers/gpu/drm/i915/i915_drv.h        |    3 +-
>  drivers/gpu/drm/i915/i915_gem.c        |   23 +++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem_tiling.c |   19 ++++++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h        |   34 ++++++++++++++++++++++++++++++++
>  6 files changed, 78 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 3f27173..dfef956 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
>  	i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
>  
>  	mutex_lock(&dev->struct_mutex);
> -	ret = i915_gem_init_ringbuffer(dev);
> +	ret = i915_gem_init_hw(dev);
>  	mutex_unlock(&dev->struct_mutex);
>  	if (ret)
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 1658cfd..12ddf47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
>  		mutex_lock(&dev->struct_mutex);
>  		dev_priv->mm.suspended = 0;
>  
> -		error = i915_gem_init_ringbuffer(dev);
> +		error = i915_gem_init_hw(dev);
>  		mutex_unlock(&dev->struct_mutex);
>  
>  		if (HAS_PCH_SPLIT(dev))
> @@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
>  			!dev_priv->mm.suspended) {
>  		dev_priv->mm.suspended = 0;
>  
> +		i915_gem_init_swizzling(dev);
> +
>  		dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
>  		if (HAS_BSD(dev))
>  		    dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 865de80..0845419 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
>  					    uint32_t read_domains,
>  					    uint32_t write_domain);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
> -int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
> +int __must_check i915_gem_init_hw(struct drm_device *dev);
> +void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  void i915_gem_do_init(struct drm_device *dev,
>  		      unsigned long start,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 51a2b0c..86fffd2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
>  	return 0;
>  }
>  
> +void i915_gem_init_swizzling(struct drm_device *dev)
> +{
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +
> +	if (INTEL_INFO(dev)->gen < 6 ||
> +	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
> +		return;
> +
> +	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
> +				 DISP_TILE_SURFACE_SWIZZLING);
> +
> +	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
> +	if (IS_GEN6(dev))
> +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
> +	else
> +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
> +}
>  int
> -i915_gem_init_ringbuffer(struct drm_device *dev)
> +i915_gem_init_hw(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	int ret;
>  
> +	i915_gem_init_swizzling(dev);
> +
>  	ret = intel_init_render_ring_buffer(dev);
>  	if (ret)
>  		return ret;
> @@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
>  	mutex_lock(&dev->struct_mutex);
>  	dev_priv->mm.suspended = 0;
>  
> -	ret = i915_gem_init_ringbuffer(dev);
> +	ret = i915_gem_init_hw(dev);
>  	if (ret != 0) {
>  		mutex_unlock(&dev->struct_mutex);
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 861223b..1a93066 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
>  	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
>  
>  	if (INTEL_INFO(dev)->gen >= 6) {
> -		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> -		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +		uint32_t dimm_c0, dimm_c1;
> +		dimm_c0 = I915_READ(MAD_DIMM_C0);
> +		dimm_c1 = I915_READ(MAD_DIMM_C1);
> +		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +		/* Enable swizzling when the channels are populated with
> +		 * identically sized dimms. We don't need to check the 3rd
> +		 * channel because no cpu with gpu attached ships in that
> +		 * configuration. Also, swizzling only makes sense for 2
> +		 * channels anyway. */
> +		if (dimm_c0 == dimm_c1) {
> +			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> +			swizzle_y = I915_BIT_6_SWIZZLE_9;
> +		} else {
> +			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> +			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +		}
>  	} else if (IS_GEN5(dev)) {
>  		/* On Ironlake whatever DRAM config, GPU always do
>  		 * same swizzling setup.

Hmm, for my curiosity, why doesn't swizzling make sense with 3 channels?
I did some searching and it appears that you're right about no product
shipping with the configuration, but I suspect the comment will help in
case any product ever does.

I'd also say it's not a bad idea to elaborate the assumption that we
never have less than 256MB of memory WARN_ON(dimm_c0 + dimm_c1 == 0).

> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f960738..539ef90 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -295,6 +295,12 @@
>  #define FENCE_REG_SANDYBRIDGE_0		0x100000
>  #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
>  
> +/* control register for cpu gtt access */
> +#define TILECTL				0x101000
> +#define   TILECTL_SWZCTL			(1 << 0)
> +#define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
> +#define   TILECTL_BACKSNOOP_DIS		(1 << 3)
> +
>  /*
>   * Instruction and interrupt control regs
>   */
> @@ -318,6 +324,11 @@
>  #define RING_MAX_IDLE(base)	((base)+0x54)
>  #define RING_HWS_PGA(base)	((base)+0x80)
>  #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
> +#define ARB_MODE		0x04030
> +#define   ARB_MODE_SWIZZLE_SNB	(1<<4)
> +#define   ARB_MODE_SWIZZLE_IVB	(1<<5)
> +#define   ARB_MODE_ENABLE(x)	GFX_MODE_ENABLE(x)
> +#define   ARB_MODE_DISABLE(x)	GFX_MODE_DISABLE(x)
>  #define RENDER_HWS_PGA_GEN7	(0x04080)
>  #define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
>  #define DONE_REG		0x40b0
> @@ -1037,6 +1048,29 @@
>  #define C0DRB3			0x10206
>  #define C1DRB3			0x10606
>  
> +/** snb MCH registers for reading the DRAM channel configuration */
> +#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
> +#define   MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
> +#define   MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
> +#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
> +#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
> +#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
> +#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
> +#define   MAD_DIMM_ECC_ON		(0x3 << 24)
> +#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
> +#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
> +#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
> +#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
> +#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
> +#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
> +#define   MAD_DIMM_A_SELECT		(0x1 << 16)
> +/* DIMM sizes are in multiples of 256mb. */
> +#define   MAD_DIMM_B_SIZE_SHIFT		8
> +#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
> +#define   MAD_DIMM_A_SIZE_SHIFT		0
> +#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
> +
> +

White space still seems wrong to me, but I don't need to see another
version to verify it's correct. I would have expected:

** snb MCH registers for reading the DRAM channel configuration */
define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
define MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
define MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
#define   MAD_DIMM_ECC_MASK		(0x3 << 24)

>  /* Clocking configuration register */
>  #define CLKCFG			0x10c00
>  #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */

I don't really need to see an updated version of the patch for either
suggestion I submitted.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-02  5:30         ` Ben Widawsky
@ 2012-02-02  8:40           ` Daniel Vetter
  2012-02-05  3:13             ` Ben Widawsky
  2012-02-02  8:58           ` Daniel Vetter
  1 sibling, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2012-02-02  8:40 UTC (permalink / raw)
  To: Daniel Vetter, Intel Graphics Development, Chris Wilson

On Wed, Feb 01, 2012 at 09:30:57PM -0800, Ben Widawsky wrote:
> Hmm, for my curiosity, why doesn't swizzling make sense with 3 channels?
> I did some searching and it appears that you're right about no product
> shipping with the configuration, but I suspect the comment will help in
> case any product ever does.

The thing is that gpus are really fond of power-of-two sized dimension.
Actually just a even multiple of 64 stride is good enough: This way if you
walk a texture in y direction you'll always hit the same memory channel
(we switch channels every 64 bytes). So that's why we need swizzling on
dual channel (if you draw the pictures for all the swizzle patterns you'll
notice that odd-even line pair is on two different channels). Iirc the
sampler is also optimize to fetch aligned 2x2 blocks and hence maximizes
bandwidth by using both ddr channels.

Now 3 never divides a power-of-two, so having 3 channels will naturally
result in a channel interleave pattern when walking in y direction.

> I'd also say it's not a bad idea to elaborate the assumption that we
> never have less than 256MB of memory WARN_ON(dimm_c0 + dimm_c1 == 0).

I'm pretty sure that we can't boot with 0mb of ram ;-) The other thing is
that swizzling is a pure preformance optimization - if we enable it on
setup where it's not required we'll only annoy the memory controller with
less efficient access patterns.

> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index f960738..539ef90 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -295,6 +295,12 @@
> >  #define FENCE_REG_SANDYBRIDGE_0		0x100000
> >  #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
> >  
> > +/* control register for cpu gtt access */
> > +#define TILECTL				0x101000
> > +#define   TILECTL_SWZCTL			(1 << 0)
> > +#define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
> > +#define   TILECTL_BACKSNOOP_DIS		(1 << 3)
> > +
> >  /*
> >   * Instruction and interrupt control regs
> >   */
> > @@ -318,6 +324,11 @@
> >  #define RING_MAX_IDLE(base)	((base)+0x54)
> >  #define RING_HWS_PGA(base)	((base)+0x80)
> >  #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
> > +#define ARB_MODE		0x04030
> > +#define   ARB_MODE_SWIZZLE_SNB	(1<<4)
> > +#define   ARB_MODE_SWIZZLE_IVB	(1<<5)
> > +#define   ARB_MODE_ENABLE(x)	GFX_MODE_ENABLE(x)
> > +#define   ARB_MODE_DISABLE(x)	GFX_MODE_DISABLE(x)
> >  #define RENDER_HWS_PGA_GEN7	(0x04080)
> >  #define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
> >  #define DONE_REG		0x40b0
> > @@ -1037,6 +1048,29 @@
> >  #define C0DRB3			0x10206
> >  #define C1DRB3			0x10606
> >  
> > +/** snb MCH registers for reading the DRAM channel configuration */
> > +#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
> > +#define   MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
> > +#define   MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
> > +#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
> > +#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
> > +#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
> > +#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
> > +#define   MAD_DIMM_ECC_ON		(0x3 << 24)
> > +#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
> > +#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
> > +#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
> > +#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
> > +#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
> > +#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
> > +#define   MAD_DIMM_A_SELECT		(0x1 << 16)
> > +/* DIMM sizes are in multiples of 256mb. */
> > +#define   MAD_DIMM_B_SIZE_SHIFT		8
> > +#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
> > +#define   MAD_DIMM_A_SIZE_SHIFT		0
> > +#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
> > +
> > +
> 
> White space still seems wrong to me, but I don't need to see another
> version to verify it's correct. I would have expected:
> 
> ** snb MCH registers for reading the DRAM channel configuration */
> define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
> define MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
> define MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
> #define   MAD_DIMM_ECC_MASK		(0x3 << 24)

Fixed as you've suggested.

> 
> >  /* Clocking configuration register */
> >  #define CLKCFG			0x10c00
> >  #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
> 
> I don't really need to see an updated version of the patch for either
> suggestion I submitted.
> 
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

Thanks for your review.

Cheers, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-02  5:30         ` Ben Widawsky
  2012-02-02  8:40           ` Daniel Vetter
@ 2012-02-02  8:58           ` Daniel Vetter
  2012-02-04 20:59             ` Eric Anholt
  1 sibling, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2012-02-02  8:58 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter, Ben Widawsky

We have to do this manually. Somebody had a Great Idea.

I've measured speed-ups just a few percent above the noise level
(below 5% for the best case), but no slowdows. Chris Wilson measured
quite a bit more (10-20% above the usual snb variance) on a more
recent and better tuned version of sna, but also recorded a few
slow-downs on benchmarks know for uglier amounts of snb-induced
variance.

v2: Incorporate Ben Widawsky's preliminary review comments and
elaborate a bit about the performance impact in the changelog.

v3: Add a comment as to why we don't need to check the 3rd memory
channel.

v4: Fixup whitespace.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c        |    2 +-
 drivers/gpu/drm/i915/i915_drv.c        |    4 ++-
 drivers/gpu/drm/i915/i915_drv.h        |    3 +-
 drivers/gpu/drm/i915/i915_gem.c        |   23 +++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_tiling.c |   19 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h        |   34 ++++++++++++++++++++++++++++++++
 6 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3f27173..dfef956 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
 	i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
 
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	mutex_unlock(&dev->struct_mutex);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1658cfd..12ddf47 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 		mutex_lock(&dev->struct_mutex);
 		dev_priv->mm.suspended = 0;
 
-		error = i915_gem_init_ringbuffer(dev);
+		error = i915_gem_init_hw(dev);
 		mutex_unlock(&dev->struct_mutex);
 
 		if (HAS_PCH_SPLIT(dev))
@@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
 			!dev_priv->mm.suspended) {
 		dev_priv->mm.suspended = 0;
 
+		i915_gem_init_swizzling(dev);
+
 		dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
 		if (HAS_BSD(dev))
 		    dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 865de80..0845419 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
 					    uint32_t read_domains,
 					    uint32_t write_domain);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
+int __must_check i915_gem_init_hw(struct drm_device *dev);
+void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 void i915_gem_do_init(struct drm_device *dev,
 		      unsigned long start,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 51a2b0c..86fffd2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
+void i915_gem_init_swizzling(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen < 6 ||
+	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+		return;
+
+	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+				 DISP_TILE_SURFACE_SWIZZLING);
+
+	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
+	if (IS_GEN6(dev))
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
+	else
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
+}
 int
-i915_gem_init_ringbuffer(struct drm_device *dev)
+i915_gem_init_hw(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
+	i915_gem_init_swizzling(dev);
+
 	ret = intel_init_render_ring_buffer(dev);
 	if (ret)
 		return ret;
@@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&dev->struct_mutex);
 	dev_priv->mm.suspended = 0;
 
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	if (ret != 0) {
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 861223b..1a93066 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		uint32_t dimm_c0, dimm_c1;
+		dimm_c0 = I915_READ(MAD_DIMM_C0);
+		dimm_c1 = I915_READ(MAD_DIMM_C1);
+		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		/* Enable swizzling when the channels are populated with
+		 * identically sized dimms. We don't need to check the 3rd
+		 * channel because no cpu with gpu attached ships in that
+		 * configuration. Also, swizzling only makes sense for 2
+		 * channels anyway. */
+		if (dimm_c0 == dimm_c1) {
+			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+			swizzle_y = I915_BIT_6_SWIZZLE_9;
+		} else {
+			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		}
 	} else if (IS_GEN5(dev)) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f960738..89816fe 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -295,6 +295,12 @@
 #define FENCE_REG_SANDYBRIDGE_0		0x100000
 #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
 
+/* control register for cpu gtt access */
+#define TILECTL				0x101000
+#define   TILECTL_SWZCTL			(1 << 0)
+#define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
+#define   TILECTL_BACKSNOOP_DIS		(1 << 3)
+
 /*
  * Instruction and interrupt control regs
  */
@@ -318,6 +324,11 @@
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
+#define ARB_MODE		0x04030
+#define   ARB_MODE_SWIZZLE_SNB	(1<<4)
+#define   ARB_MODE_SWIZZLE_IVB	(1<<5)
+#define   ARB_MODE_ENABLE(x)	GFX_MODE_ENABLE(x)
+#define   ARB_MODE_DISABLE(x)	GFX_MODE_DISABLE(x)
 #define RENDER_HWS_PGA_GEN7	(0x04080)
 #define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
 #define DONE_REG		0x40b0
@@ -1037,6 +1048,29 @@
 #define C0DRB3			0x10206
 #define C1DRB3			0x10606
 
+/** snb MCH registers for reading the DRAM channel configuration */
+#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
+#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
+#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
+#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
+#define   MAD_DIMM_ECC_ON		(0x3 << 24)
+#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
+#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
+#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
+#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
+#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
+#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
+#define   MAD_DIMM_A_SELECT		(0x1 << 16)
+/* DIMM sizes are in multiples of 256mb. */
+#define   MAD_DIMM_B_SIZE_SHIFT		8
+#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
+#define   MAD_DIMM_A_SIZE_SHIFT		0
+#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
+
+
 /* Clocking configuration register */
 #define CLKCFG			0x10c00
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
-- 
1.7.7.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-02  8:58           ` Daniel Vetter
@ 2012-02-04 20:59             ` Eric Anholt
  2012-02-06 15:45               ` Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Anholt @ 2012-02-04 20:59 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter, Ben Widawsky


[-- Attachment #1.1: Type: text/plain, Size: 3268 bytes --]

On Thu,  2 Feb 2012 09:58:12 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> We have to do this manually. Somebody had a Great Idea.
> 
> I've measured speed-ups just a few percent above the noise level
> (below 5% for the best case), but no slowdows. Chris Wilson measured
> quite a bit more (10-20% above the usual snb variance) on a more
> recent and better tuned version of sna, but also recorded a few
> slow-downs on benchmarks know for uglier amounts of snb-induced
> variance.


> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 51a2b0c..86fffd2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
>  	return 0;
>  }
>  
> +void i915_gem_init_swizzling(struct drm_device *dev)
> +{
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +
> +	if (INTEL_INFO(dev)->gen < 6 ||
> +	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
> +		return;
> +
> +	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
> +				 DISP_TILE_SURFACE_SWIZZLING);
> +
> +	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
> +	if (IS_GEN6(dev))
> +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
> +	else
> +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
> +}

> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 861223b..1a93066 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
>  	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
>  
>  	if (INTEL_INFO(dev)->gen >= 6) {
> -		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> -		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +		uint32_t dimm_c0, dimm_c1;
> +		dimm_c0 = I915_READ(MAD_DIMM_C0);
> +		dimm_c1 = I915_READ(MAD_DIMM_C1);
> +		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +		/* Enable swizzling when the channels are populated with
> +		 * identically sized dimms. We don't need to check the 3rd
> +		 * channel because no cpu with gpu attached ships in that
> +		 * configuration. Also, swizzling only makes sense for 2
> +		 * channels anyway. */
> +		if (dimm_c0 == dimm_c1) {
> +			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> +			swizzle_y = I915_BIT_6_SWIZZLE_9;
> +		} else {
> +			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> +			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +		}

It looks to me like you're making the HW always swizzle bit 6 according
to 9 or 9/10 in i915_gem_init_swizzling, but you're not performing the
software side of swizzling in the !dual channel case.  My guess would be
that when you take your other DIMM the swizzling for pread/pwrite/swap
goes wrong, and that the answer would be to just not look at dimm sizes.

Other than that, looks reasonable to me.  Might make it clear what's
going on in the commit message -- "swizzling support for snb/ivb" made
me think you were fixing up the software side of bit 6 swizzling, when
it looks like you're actually enabling bit 6 swizzling for the first
time.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-02  8:40           ` Daniel Vetter
@ 2012-02-05  3:13             ` Ben Widawsky
  2012-02-06 16:06               ` Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2012-02-05  3:13 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, Intel Graphics Development

On 02/02/12 00:40, Daniel Vetter wrote:
> On Wed, Feb 01, 2012 at 09:30:57PM -0800, Ben Widawsky wrote:
> [...]
>> I'd also say it's not a bad idea to elaborate the assumption that we
>> never have less than 256MB of memory WARN_ON(dimm_c0 + dimm_c1 == 0).
>
> I'm pretty sure that we can't boot with 0mb of ram ;-) The other thing is
> that swizzling is a pure preformance optimization - if we enable it on
> setup where it's not required we'll only annoy the memory controller with
> less efficient access patterns.

What I meant was, this value is still 0 in say, a system with 2
128MB DIMMs.

 > [...]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-04 20:59             ` Eric Anholt
@ 2012-02-06 15:45               ` Daniel Vetter
  2012-02-07 19:56                 ` Eric Anholt
  0 siblings, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2012-02-06 15:45 UTC (permalink / raw)
  To: Eric Anholt; +Cc: Daniel Vetter, Intel Graphics Development, Ben Widawsky

On Sat, Feb 04, 2012 at 09:59:57PM +0100, Eric Anholt wrote:
> On Thu,  2 Feb 2012 09:58:12 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> > We have to do this manually. Somebody had a Great Idea.
> > 
> > I've measured speed-ups just a few percent above the noise level
> > (below 5% for the best case), but no slowdows. Chris Wilson measured
> > quite a bit more (10-20% above the usual snb variance) on a more
> > recent and better tuned version of sna, but also recorded a few
> > slow-downs on benchmarks know for uglier amounts of snb-induced
> > variance.
> 
> 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 51a2b0c..86fffd2 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
> >  	return 0;
> >  }
> >  
> > +void i915_gem_init_swizzling(struct drm_device *dev)
> > +{
> > +	drm_i915_private_t *dev_priv = dev->dev_private;
> > +
> > +	if (INTEL_INFO(dev)->gen < 6 ||
> > +	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
> > +		return;
> > +
> > +	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
> > +				 DISP_TILE_SURFACE_SWIZZLING);
> > +
> > +	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
> > +	if (IS_GEN6(dev))
> > +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
> > +	else
> > +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
> > +}
> 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > index 861223b..1a93066 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > @@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
> >  	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> >  
> >  	if (INTEL_INFO(dev)->gen >= 6) {
> > -		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> > -		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> > +		uint32_t dimm_c0, dimm_c1;
> > +		dimm_c0 = I915_READ(MAD_DIMM_C0);
> > +		dimm_c1 = I915_READ(MAD_DIMM_C1);
> > +		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> > +		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> > +		/* Enable swizzling when the channels are populated with
> > +		 * identically sized dimms. We don't need to check the 3rd
> > +		 * channel because no cpu with gpu attached ships in that
> > +		 * configuration. Also, swizzling only makes sense for 2
> > +		 * channels anyway. */
> > +		if (dimm_c0 == dimm_c1) {
> > +			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> > +			swizzle_y = I915_BIT_6_SWIZZLE_9;
> > +		} else {
> > +			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> > +			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> > +		}
> 
> It looks to me like you're making the HW always swizzle bit 6 according
> to 9 or 9/10 in i915_gem_init_swizzling, but you're not performing the
> software side of swizzling in the !dual channel case.  My guess would be
> that when you take your other DIMM the swizzling for pread/pwrite/swap
> goes wrong, and that the answer would be to just not look at dimm sizes.

Hm, I'm a bit confused here ... So let me explain how swizzling on gen6+
works with this patch:

In the reset/bootup state the gpu doesn't do any additional swizzling for
tiled surfaces. You have to explicitly frob the 3 registers for gt, cpu
gtt window and scanout. I presume the gpu is pretty oblivious to what the
memory controller does and swizzling could also be enabled on
single-channel or L-shaped layouts (iirc I haven't tried that though).

Now the patch just detects whether we have symmetrically populated
channels and sets the swizzle stuff accordingly. i915_gem_init_swizzling
gets called at init, gpu reset and resume time and the programms the hw
bits accordingly to the swizzling we want. So
i915_gem_detect_bit_6_swizzle is a bit a mistdenomer on gen6+, but I've
thought it fits in better this way with the existing code-flow.

It's been quite a while since I've stitched this together but iirc I've
tested all the different possibilities by yanking dimms (safe for the 2
dimms per channel case, my laptop doesn't have enough slots for that). And
we have a set of testcase in i-g-t that should test pread/pwrite and
swapping, so I'm positive that we should catch any issues with this
easily.

> Other than that, looks reasonable to me.  Might make it clear what's
> going on in the commit message -- "swizzling support for snb/ivb" made
> me think you were fixing up the software side of bit 6 swizzling, when
> it looks like you're actually enabling bit 6 swizzling for the first
> time.

Yeah, I'll improve the commit message to be a bit less terse and explain
what's actually going on.

Thanks, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-05  3:13             ` Ben Widawsky
@ 2012-02-06 16:06               ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-02-06 16:06 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, Intel Graphics Development

On Sat, Feb 04, 2012 at 07:13:41PM -0800, Ben Widawsky wrote:
> On 02/02/12 00:40, Daniel Vetter wrote:
> >On Wed, Feb 01, 2012 at 09:30:57PM -0800, Ben Widawsky wrote:
> >[...]
> >>I'd also say it's not a bad idea to elaborate the assumption that we
> >>never have less than 256MB of memory WARN_ON(dimm_c0 + dimm_c1 == 0).
> >
> >I'm pretty sure that we can't boot with 0mb of ram ;-) The other thing is
> >that swizzling is a pure preformance optimization - if we enable it on
> >setup where it's not required we'll only annoy the memory controller with
> >less efficient access patterns.
> 
> What I meant was, this value is still 0 in say, a system with 2
> 128MB DIMMs.

Well, I have a feeling that 256mb is the smallest size for a ddr3 dimm.
But I'm a bit too lazy to check that (and frankly wouldn't know where).
Furthermore swizzling on gen6+ is afaics just a performance thing and
should never affect correctness, so even if 128mb dimms exist I think we
can just not care - performance isn't gonna be stellar anyway ;-)
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-06 15:45               ` Daniel Vetter
@ 2012-02-07 19:56                 ` Eric Anholt
  2012-02-08 22:17                   ` Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Anholt @ 2012-02-07 19:56 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, Intel Graphics Development, Ben Widawsky


[-- Attachment #1.1: Type: text/plain, Size: 3827 bytes --]

On Mon, 6 Feb 2012 16:45:00 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Sat, Feb 04, 2012 at 09:59:57PM +0100, Eric Anholt wrote:
> > On Thu,  2 Feb 2012 09:58:12 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> > > We have to do this manually. Somebody had a Great Idea.
> > > 
> > > I've measured speed-ups just a few percent above the noise level
> > > (below 5% for the best case), but no slowdows. Chris Wilson measured
> > > quite a bit more (10-20% above the usual snb variance) on a more
> > > recent and better tuned version of sna, but also recorded a few
> > > slow-downs on benchmarks know for uglier amounts of snb-induced
> > > variance.
> > 
> > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index 51a2b0c..86fffd2 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
> > >  	return 0;
> > >  }
> > >  
> > > +void i915_gem_init_swizzling(struct drm_device *dev)
> > > +{
> > > +	drm_i915_private_t *dev_priv = dev->dev_private;
> > > +
> > > +	if (INTEL_INFO(dev)->gen < 6 ||
> > > +	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
> > > +		return;
> > > +
> > > +	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
> > > +				 DISP_TILE_SURFACE_SWIZZLING);
> > > +
> > > +	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
> > > +	if (IS_GEN6(dev))
> > > +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
> > > +	else
> > > +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
> > > +}
> > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > > index 861223b..1a93066 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > > @@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
> > >  	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> > >  
> > >  	if (INTEL_INFO(dev)->gen >= 6) {
> > > -		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> > > -		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> > > +		uint32_t dimm_c0, dimm_c1;
> > > +		dimm_c0 = I915_READ(MAD_DIMM_C0);
> > > +		dimm_c1 = I915_READ(MAD_DIMM_C1);
> > > +		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> > > +		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> > > +		/* Enable swizzling when the channels are populated with
> > > +		 * identically sized dimms. We don't need to check the 3rd
> > > +		 * channel because no cpu with gpu attached ships in that
> > > +		 * configuration. Also, swizzling only makes sense for 2
> > > +		 * channels anyway. */
> > > +		if (dimm_c0 == dimm_c1) {
> > > +			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> > > +			swizzle_y = I915_BIT_6_SWIZZLE_9;
> > > +		} else {
> > > +			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> > > +			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> > > +		}
> > 
> > It looks to me like you're making the HW always swizzle bit 6 according
> > to 9 or 9/10 in i915_gem_init_swizzling, but you're not performing the
> > software side of swizzling in the !dual channel case.  My guess would be
> > that when you take your other DIMM the swizzling for pread/pwrite/swap
> > goes wrong, and that the answer would be to just not look at dimm sizes.
> 
> Hm, I'm a bit confused here ... So let me explain how swizzling on gen6+
> works with this patch:

Nah, I was the one confused.  I missed the early return in init.  Makes
sense to me now, and I agree that it should be safe to turn on
regardless, so the only issue might be that we should set swizzles on
more of the time than we actually manage to, which you already noted.

Reviewed-by: Eric Anholt <eric@anholt.net>

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing
  2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
  2012-02-01 21:37   ` Ben Widawsky
@ 2012-02-08 22:17   ` Ben Widawsky
  2012-02-08 22:19     ` Daniel Vetter
  1 sibling, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2012-02-08 22:17 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel Graphics Development

On 01/31/2012 04:47 PM, Daniel Vetter wrote:
> On gen5 we also need to correctly set up swizzling in the display
> scanout engine, but only there. Consolidate this into the same
> function.
> 
> This has a small effect on ums setups - the kernel now also sets this
> bit in addition to userspace setting it. Given that this code only
> runs when userspace either can't (resume, gpu reset) or explicitly
> won't(gem_init) touch the hw this shouldn't have an adverse effect.
> 
> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_gem.c      |    5 ++++-
>  drivers/gpu/drm/i915/intel_display.c |    6 ------
>  2 files changed, 4 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 86fffd2..27fe07a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3685,13 +3685,16 @@ void i915_gem_init_swizzling(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  
> -	if (INTEL_INFO(dev)->gen < 6 ||
> +	if (INTEL_INFO(dev)->gen < 5 ||
>  	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
>  		return;
>  
>  	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
>  				 DISP_TILE_SURFACE_SWIZZLING);
>  
> +	if (IS_GEN5(dev))
> +		return;
> +
>  	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
>  	if (IS_GEN6(dev))
>  		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index fc9bc19..5ab967c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -6029,12 +6029,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
>  
>  	intel_wait_for_vblank(dev, pipe);
>  
> -	if (IS_GEN5(dev)) {
> -		/* enable address swizzle for tiling buffer */
> -		temp = I915_READ(DISP_ARB_CTL);
> -		I915_WRITE(DISP_ARB_CTL, temp | DISP_TILE_SURFACE_SWIZZLING);
> -	}
> -
>  	I915_WRITE(DSPCNTR(plane), dspcntr);
>  	POSTING_READ(DSPCNTR(plane));
>  

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: swizzling support for snb/ivb
  2012-02-07 19:56                 ` Eric Anholt
@ 2012-02-08 22:17                   ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-02-08 22:17 UTC (permalink / raw)
  To: Eric Anholt; +Cc: Daniel Vetter, Intel Graphics Development, Ben Widawsky

On Tue, Feb 07, 2012 at 11:56:36AM -0800, Eric Anholt wrote:
> On Mon, 6 Feb 2012 16:45:00 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Sat, Feb 04, 2012 at 09:59:57PM +0100, Eric Anholt wrote:
> > > On Thu,  2 Feb 2012 09:58:12 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> > > It looks to me like you're making the HW always swizzle bit 6 according
> > > to 9 or 9/10 in i915_gem_init_swizzling, but you're not performing the
> > > software side of swizzling in the !dual channel case.  My guess would be
> > > that when you take your other DIMM the swizzling for pread/pwrite/swap
> > > goes wrong, and that the answer would be to just not look at dimm sizes.
> > 
> > Hm, I'm a bit confused here ... So let me explain how swizzling on gen6+
> > works with this patch:
> 
> Nah, I was the one confused.  I missed the early return in init.  Makes
> sense to me now, and I agree that it should be safe to turn on
> regardless, so the only issue might be that we should set swizzles on
> more of the time than we actually manage to, which you already noted.
> 
> Reviewed-by: Eric Anholt <eric@anholt.net>
Queued for -next, thanks for the review.
-Daniel



-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing
  2012-02-08 22:17   ` Ben Widawsky
@ 2012-02-08 22:19     ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-02-08 22:19 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, Intel Graphics Development

On Wed, Feb 08, 2012 at 11:17:08PM +0100, Ben Widawsky wrote:
> On 01/31/2012 04:47 PM, Daniel Vetter wrote:
> > On gen5 we also need to correctly set up swizzling in the display
> > scanout engine, but only there. Consolidate this into the same
> > function.
> > 
> > This has a small effect on ums setups - the kernel now also sets this
> > bit in addition to userspace setting it. Given that this code only
> > runs when userspace either can't (resume, gpu reset) or explicitly
> > won't(gem_init) touch the hw this shouldn't have an adverse effect.
> > 
> > Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Queued for -next, thanks for the review.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info
  2012-02-01 21:39   ` Ben Widawsky
@ 2012-02-08 22:20     ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-02-08 22:20 UTC (permalink / raw)
  To: Daniel Vetter, Intel Graphics Development

On Wed, Feb 01, 2012 at 01:39:03PM -0800, Ben Widawsky wrote:
> On Tue, Jan 31, 2012 at 04:47:56PM +0100, Daniel Vetter wrote:
> > Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Queued for -next, thanks for the review.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2012-02-08 22:20 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-31 15:47 [PATCH 1/3] drm/i915: swizzling support for snb/ivb Daniel Vetter
2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
2012-02-01 21:37   ` Ben Widawsky
2012-02-01 22:23     ` Daniel Vetter
2012-02-08 22:17   ` Ben Widawsky
2012-02-08 22:19     ` Daniel Vetter
2012-01-31 15:47 ` [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info Daniel Vetter
2012-02-01 21:39   ` Ben Widawsky
2012-02-08 22:20     ` Daniel Vetter
2012-02-01 21:35 ` [PATCH 1/3] drm/i915: swizzling support for snb/ivb Ben Widawsky
2012-02-01 22:16   ` Daniel Vetter
2012-02-01 22:26     ` Chris Wilson
2012-02-01 23:15       ` [PATCH] " Daniel Vetter
2012-02-02  5:30         ` Ben Widawsky
2012-02-02  8:40           ` Daniel Vetter
2012-02-05  3:13             ` Ben Widawsky
2012-02-06 16:06               ` Daniel Vetter
2012-02-02  8:58           ` Daniel Vetter
2012-02-04 20:59             ` Eric Anholt
2012-02-06 15:45               ` Daniel Vetter
2012-02-07 19:56                 ` Eric Anholt
2012-02-08 22:17                   ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.