All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+
@ 2011-08-12 18:18 Jesse Barnes
  2011-08-12 23:55 ` Eric Anholt
  2011-08-30 23:11 ` Ben Widawsky
  0 siblings, 2 replies; 4+ messages in thread
From: Jesse Barnes @ 2011-08-12 18:18 UTC (permalink / raw)
  To: intel-gfx

I'm trying to figure out why this doesn't work.  Anyone have ideas?

On gen6+ (well probably since Cantiga actually) we're supposed to use
PIPE_CONTROL rather than MI_FLUSH for flushing the pipeline and
caches.  This patch doesn't cause hangs or crashes in my testing, but
does prevent glxgears from displaying anything.

The other worrying thing about this is that gen6+ has a command length
field of 3, but we're using 2 in the DRI driver, even on gen6.
Changing it doesn't seem to have any effect, but it's still of concern.

-- 
Jesse Barnes, Intel Open Source Technology Center

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5baaef4..4a456a6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -236,6 +236,8 @@
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define GFX_OP_PIPE_CONTROL_GEN6 ((0x3<<29)|(0x3<<27)|(0x2<<24)|3)
+#define   PIPE_CONTROL_CS_STALL (1<<20)
 #define   PIPE_CONTROL_QW_WRITE	(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
 #define   PIPE_CONTROL_WC_FLUSH	(1<<12)
@@ -244,8 +246,8 @@
 #define   PIPE_CONTROL_ISP_DIS	(1<<9)
 #define   PIPE_CONTROL_NOTIFY	(1<<8)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-#define   PIPE_CONTROL_STALL_EN	(1<<1) /* in addr word, Ironlake+ only */
-
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) /* in addr word, Ironlake+ only */
+#define   PIPE_CONTROL_DEPTH_FLUSH (1<<0)
 
 /*
  * Reset registers
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 47b9b27..cecc1e1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,16 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+/*
+ * 965+ support PIPE_CONTROL commands, which provide finer grained control
+ * over cache flushing.
+ */
+struct pipe_control {
+	struct drm_i915_gem_object *obj;
+	volatile u32 *cpu_page;
+	u32 gtt_offset;
+};
+
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -123,6 +133,112 @@ render_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+static int
+intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
+{
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
+	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+			PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0); /* low dword */
+	intel_ring_emit(ring, 0); /* high dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
+	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
+gen6_render_ring_flush(struct intel_ring_buffer *ring,
+                         u32 invalidate_domains, u32 flush_domains)
+{
+	u32 flags = 0;
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	intel_emit_post_sync_nonzero_flush(ring);
+
+	/* Just flush everything for now */
+	flags |= PIPE_CONTROL_WC_FLUSH;
+	flags |= PIPE_CONTROL_IS_FLUSH;
+	flags |= PIPE_CONTROL_TC_FLUSH;
+	flags |= PIPE_CONTROL_DEPTH_FLUSH;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
+	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, 0); /* lower dword */
+	intel_ring_emit(ring, 0); /* uppwer dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
 static void ring_write_tail(struct intel_ring_buffer *ring,
 			    u32 value)
 {
@@ -206,16 +322,6 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 	return 0;
 }
 
-/*
- * 965+ support PIPE_CONTROL commands, which provide finer grained control
- * over cache flushing.
- */
-struct pipe_control {
-	struct drm_i915_gem_object *obj;
-	volatile u32 *cpu_page;
-	u32 gtt_offset;
-};
-
 static int
 init_pipe_control(struct intel_ring_buffer *ring)
 {
@@ -292,8 +398,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 		I915_WRITE(MI_MODE, mode);
 	}
 
-	if (INTEL_INFO(dev)->gen >= 6) {
-	} else if (IS_GEN5(dev)) {
+	if (INTEL_INFO(dev)->gen >= 5) {
 		ret = init_pipe_control(ring);
 		if (ret)
 			return ret;
@@ -1291,6 +1396,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	*ring = render_ring;
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
+		ring->flush = gen6_render_ring_flush;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
 	} else if (IS_GEN5(dev)) {

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+
  2011-08-12 18:18 [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+ Jesse Barnes
@ 2011-08-12 23:55 ` Eric Anholt
  2011-08-19 17:37   ` Jesse Barnes
  2011-08-30 23:11 ` Ben Widawsky
  1 sibling, 1 reply; 4+ messages in thread
From: Eric Anholt @ 2011-08-12 23:55 UTC (permalink / raw)
  To: Jesse Barnes, intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1081 bytes --]

On Fri, 12 Aug 2011 11:18:45 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> I'm trying to figure out why this doesn't work.  Anyone have ideas?
> 
> On gen6+ (well probably since Cantiga actually) we're supposed to use
> PIPE_CONTROL rather than MI_FLUSH for flushing the pipeline and
> caches.  This patch doesn't cause hangs or crashes in my testing, but
> does prevent glxgears from displaying anything.
> 
> The other worrying thing about this is that gen6+ has a command length
> field of 3, but we're using 2 in the DRI driver, even on gen6.
> Changing it doesn't seem to have any effect, but it's still of concern.

From what I've read, the hardware tends to process shortened packets
using some garbage in whatever fields were left out.

I don't see why we'd need a separate object for the pipe control write
destination.  We could just drop it in some unused bit of the status
page, right?

I'd drop the scratch_addr from packets that don't do a post-sync write,
for clarity.

I don't see what's actually going wrong in this patch, though.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+
  2011-08-12 23:55 ` Eric Anholt
@ 2011-08-19 17:37   ` Jesse Barnes
  0 siblings, 0 replies; 4+ messages in thread
From: Jesse Barnes @ 2011-08-19 17:37 UTC (permalink / raw)
  To: Eric Anholt; +Cc: intel-gfx

On Fri, 12 Aug 2011 16:55:47 -0700
Eric Anholt <eric@anholt.net> wrote:

> On Fri, 12 Aug 2011 11:18:45 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > I'm trying to figure out why this doesn't work.  Anyone have ideas?
> > 
> > On gen6+ (well probably since Cantiga actually) we're supposed to use
> > PIPE_CONTROL rather than MI_FLUSH for flushing the pipeline and
> > caches.  This patch doesn't cause hangs or crashes in my testing, but
> > does prevent glxgears from displaying anything.
> > 
> > The other worrying thing about this is that gen6+ has a command length
> > field of 3, but we're using 2 in the DRI driver, even on gen6.
> > Changing it doesn't seem to have any effect, but it's still of concern.
> 
> From what I've read, the hardware tends to process shortened packets
> using some garbage in whatever fields were left out.
> 
> I don't see why we'd need a separate object for the pipe control write
> destination.  We could just drop it in some unused bit of the status
> page, right?

Yeah, that would be fine.  I just re-used the scratch page from ILK
since we already had code for it, but on SNB it should be safe to just
use the HWS.

> I'd drop the scratch_addr from packets that don't do a post-sync write,
> for clarity.
> 
> I don't see what's actually going wrong in this patch, though.

I tried adding the TLB flush, which was missing, but I still see big
missing regions on the login screen.  I guess even blits somehow aren't
landing or getting sequenced correctly... back to the docs to look for
more clues.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+
  2011-08-12 18:18 [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+ Jesse Barnes
  2011-08-12 23:55 ` Eric Anholt
@ 2011-08-30 23:11 ` Ben Widawsky
  1 sibling, 0 replies; 4+ messages in thread
From: Ben Widawsky @ 2011-08-30 23:11 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Fri, Aug 12, 2011 at 11:18:45AM -0700, Jesse Barnes wrote:
>  	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
> @@ -123,6 +133,112 @@ render_ring_flush(struct intel_ring_buffer *ring,
>  	return 0;
>  }
>  
> +/**
> + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
> + * implementing two workarounds on gen6.  From section 1.4.7.1
> + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
> + *
> + * [DevSNB-C+{W/A}] Before any depth stall flush (including those
> + * produced by non-pipelined state commands), software needs to first
> + * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
> + * 0.
> + *
> + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
> + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
> + *
> + * And the workaround for these two requires this workaround first:
> + *
> + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
> + * BEFORE the pipe-control with a post-sync op and no write-cache
> + * flushes.
> + *
> + * And this last workaround is tricky because of the requirements on
> + * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
> + * volume 2 part 1:
> + *
> + *     "1 of the following must also be set:
> + *      - Render Target Cache Flush Enable ([12] of DW1)
> + *      - Depth Cache Flush Enable ([0] of DW1)
> + *      - Stall at Pixel Scoreboard ([1] of DW1)
> + *      - Depth Stall ([13] of DW1)
> + *      - Post-Sync Operation ([13] of DW1)
> + *      - Notify Enable ([8] of DW1)"
> + *
> + * The cache flushes require the workaround flush that triggered this
> + * one, so we can't use it.  Depth stall would trigger the same.
> + * Post-sync nonzero is what triggered this second workaround, so we
> + * can't use that one either.  Notify enable is IRQs, which aren't
> + * really our business.  That leaves only stall at scoreboard.
> + */
> +static int
> +intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
> +{
> +	struct pipe_control *pc = ring->private;
> +	u32 scratch_addr = pc->gtt_offset + 128;
> +	int ret;
> +
> +
> +	ret = intel_ring_begin(ring, 6);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
> +	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
> +			PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
> +	intel_ring_emit(ring, 0); /* low dword */
> +	intel_ring_emit(ring, 0); /* high dword */
> +	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_advance(ring);
> +
> +	ret = intel_ring_begin(ring, 6);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
> +	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
> +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
> +	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_advance(ring);
> +
> +	return 0;
> +}
> +
> +static int
> +gen6_render_ring_flush(struct intel_ring_buffer *ring,
> +                         u32 invalidate_domains, u32 flush_domains)
> +{
> +	u32 flags = 0;
> +	struct pipe_control *pc = ring->private;
> +	u32 scratch_addr = pc->gtt_offset + 128;
> +	int ret;
> +
> +	/* Force SNB workarounds for PIPE_CONTROL flushes */
> +	intel_emit_post_sync_nonzero_flush(ring);
> +
> +	/* Just flush everything for now */
> +	flags |= PIPE_CONTROL_WC_FLUSH;
> +	flags |= PIPE_CONTROL_IS_FLUSH;
> +	flags |= PIPE_CONTROL_TC_FLUSH;
> +	flags |= PIPE_CONTROL_DEPTH_FLUSH;
> +
> +	ret = intel_ring_begin(ring, 6);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
> +	intel_ring_emit(ring, flags);
> +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
> +	intel_ring_emit(ring, 0); /* lower dword */
> +	intel_ring_emit(ring, 0); /* uppwer dword */
> +	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_advance(ring);
> +
> +	return 0;
> +}
> +
>  static void ring_write_tail(struct intel_ring_buffer *ring,
>  			    u32 value)
>  {

While I'm not convinced this is broken, I think you either want to
specify a qword write (3 << 14) or use n=2 for the pipe control, ie:

intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN);
intel_ring_emit(ring, flags);
intel_ring_emit(ring, 0); /* ignored if no write */
intel_ring_emit(ring, 0); /* ignored if no write*/
intel_ring_advance(ring);

Ben

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-08-30 23:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-12 18:18 [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+ Jesse Barnes
2011-08-12 23:55 ` Eric Anholt
2011-08-19 17:37   ` Jesse Barnes
2011-08-30 23:11 ` Ben Widawsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.