All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6
@ 2012-07-20 17:02 Chris Wilson
  2012-07-20 21:00 ` Ben Widawsky
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Chris Wilson @ 2012-07-20 17:02 UTC (permalink / raw)
  To: intel-gfx

The requirements for the sync flush to be emitted prior to the render
cache flush is only true for SandyBridge. On IvyBridge and friends we
can just emit the flushes with an inline CS stall.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |   33 +++++++++++++++++++------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b35a89a..42ad7ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -214,15 +214,8 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
                          u32 invalidate_domains, u32 flush_domains)
 {
 	u32 flags = 0;
-	struct pipe_control *pc = ring->private;
-	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(ring);
-	if (ret)
-		return ret;
-
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
@@ -242,21 +235,33 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 	if (flush_domains)
 		flags |= PIPE_CONTROL_CS_STALL;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
 	intel_ring_emit(ring, flags);
-	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, 0); /* lower dword */
-	intel_ring_emit(ring, 0); /* uppwer dword */
-	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
 
 	return 0;
 }
 
+static int
+gen6_render_ring_flush__wa(struct intel_ring_buffer *ring,
+			   u32 invalidate_domains, u32 flush_domains)
+{
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	ret = intel_emit_post_sync_nonzero_flush(ring);
+	if (ret)
+		return ret;
+
+	return gen6_render_ring_flush(ring, invalidate_domains, flush_domains);
+}
+
 static void ring_write_tail(struct intel_ring_buffer *ring,
 			    u32 value)
 {
@@ -1374,6 +1379,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
 		ring->flush = gen6_render_ring_flush;
+		if (INTEL_INFO(dev)->gen == 6)
+			ring->flush = gen6_render_ring_flush__wa;
 		ring->irq_get = gen6_ring_get_irq;
 		ring->irq_put = gen6_ring_put_irq;
 		ring->irq_enable_mask = GT_USER_INTERRUPT;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6
  2012-07-20 17:02 [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6 Chris Wilson
@ 2012-07-20 21:00 ` Ben Widawsky
  2012-07-25 12:51 ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Daniel Vetter
  2012-08-08  7:35 ` [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6 Daniel Vetter
  2 siblings, 0 replies; 8+ messages in thread
From: Ben Widawsky @ 2012-07-20 21:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 20 Jul 2012 18:02:28 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> The requirements for the sync flush to be emitted prior to the render
> cache flush is only true for SandyBridge. On IvyBridge and friends we
> can just emit the flushes with an inline CS stall.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Ben Widawsky <ben@bwidawsk.net> [tested on IVB]
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   33
> +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 13
> deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
> b/drivers/gpu/drm/i915/intel_ringbuffer.c index b35a89a..42ad7ad
> 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -214,15 +214,8 @@ gen6_render_ring_flush(struct intel_ring_buffer
> *ring, u32 invalidate_domains, u32 flush_domains)
>  {
>  	u32 flags = 0;
> -	struct pipe_control *pc = ring->private;
> -	u32 scratch_addr = pc->gtt_offset + 128;
>  	int ret;
>  
> -	/* Force SNB workarounds for PIPE_CONTROL flushes */
> -	ret = intel_emit_post_sync_nonzero_flush(ring);
> -	if (ret)
> -		return ret;
> -
>  	/* Just flush everything.  Experiments have shown that
> reducing the
>  	 * number of bits based on the write domains has little
> performance
>  	 * impact.
> @@ -242,21 +235,33 @@ gen6_render_ring_flush(struct intel_ring_buffer
> *ring, if (flush_domains)
>  		flags |= PIPE_CONTROL_CS_STALL;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(ring, 4);
>  	if (ret)
>  		return ret;
>  
> -	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
>  	intel_ring_emit(ring, flags);
> -	intel_ring_emit(ring, scratch_addr |
> PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, 0); /* lower dword */
> -	intel_ring_emit(ring, 0); /* uppwer dword */
> -	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, 0);
>  	intel_ring_advance(ring);
>  
>  	return 0;
>  }
>  
> +static int
> +gen6_render_ring_flush__wa(struct intel_ring_buffer *ring,
> +			   u32 invalidate_domains, u32 flush_domains)
> +{
> +	int ret;
> +
> +	/* Force SNB workarounds for PIPE_CONTROL flushes */
> +	ret = intel_emit_post_sync_nonzero_flush(ring);
> +	if (ret)
> +		return ret;
> +
> +	return gen6_render_ring_flush(ring, invalidate_domains,
> flush_domains); +}
> +
>  static void ring_write_tail(struct intel_ring_buffer *ring,
>  			    u32 value)
>  {
> @@ -1374,6 +1379,8 @@ int intel_init_render_ring_buffer(struct
> drm_device *dev) if (INTEL_INFO(dev)->gen >= 6) {
>  		ring->add_request = gen6_add_request;
>  		ring->flush = gen6_render_ring_flush;
> +		if (INTEL_INFO(dev)->gen == 6)
> +			ring->flush = gen6_render_ring_flush__wa;
>  		ring->irq_get = gen6_ring_get_irq;
>  		ring->irq_put = gen6_ring_put_irq;
>  		ring->irq_enable_mask = GT_USER_INTERRUPT;



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7
  2012-07-20 17:02 [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6 Chris Wilson
  2012-07-20 21:00 ` Ben Widawsky
@ 2012-07-25 12:51 ` Daniel Vetter
  2012-07-25 12:51   ` [PATCH 2/2] drm/i915: disable indirect state pointers in render flush Daniel Vetter
  2012-07-26 17:25   ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Eric Anholt
  2012-08-08  7:35 ` [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6 Daniel Vetter
  2 siblings, 2 replies; 8+ messages in thread
From: Daniel Vetter @ 2012-07-25 12:51 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

We don't yet use this, but now that we start to look into putting that
l3$ we better set the associated flush bit, too.

Also add the only other missing PIPE_CONTROL bit #define.

Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_reg.h         |    2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 81a3de6..721a981 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -311,6 +311,8 @@
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
 #define   PIPE_CONTROL_NOTIFY				(1<<8)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
+#define   PIPE_CONTROL_DC_CACHE_FLUSH			(1<<5) /* gen7+ */
 #define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
 #define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
 #define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 58e6b0e..f52778f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -228,6 +228,8 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+	if (IS_GEN7(ring->dev))
+		flags |= PIPE_CONTROL_DC_CACHE_FLUSH;
 	/*
 	 * Ensure that any following seqno writes only happen when the render
 	 * cache is indeed flushed (but only if the caller actually wants that).
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] drm/i915: disable indirect state pointers in render flush
  2012-07-25 12:51 ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Daniel Vetter
@ 2012-07-25 12:51   ` Daniel Vetter
  2012-07-26 17:33     ` Eric Anholt
  2012-07-26 17:25   ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Eric Anholt
  1 sibling, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2012-07-25 12:51 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

Since we don't guarantee that objects stay at the same gtt offset,
userspace needs to reload all indirect state anyway, even with hw
contexts. The hw provides a little pipe_control flag to disable at
least some these indirect state pointers and hence avoid to
save/restore them at context switch time.

Seems to improve hw context switch throughput as measured by running
glxgears by about 0.5%, barely above the noise on my ivb gt2 here.

Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f52778f..bc95142 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -228,6 +228,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_INDIRECT_STATE_DISABLE;
 	if (IS_GEN7(ring->dev))
 		flags |= PIPE_CONTROL_DC_CACHE_FLUSH;
 	/*
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7
  2012-07-25 12:51 ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Daniel Vetter
  2012-07-25 12:51   ` [PATCH 2/2] drm/i915: disable indirect state pointers in render flush Daniel Vetter
@ 2012-07-26 17:25   ` Eric Anholt
  1 sibling, 0 replies; 8+ messages in thread
From: Eric Anholt @ 2012-07-26 17:25 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter


[-- Attachment #1.1: Type: text/plain, Size: 278 bytes --]

Daniel Vetter <daniel.vetter@ffwll.ch> writes:

> We don't yet use this, but now that we start to look into putting that
> l3$ we better set the associated flush bit, too.
>
> Also add the only other missing PIPE_CONTROL bit #define.

Reviewed-by: Eric Anholt <eric@anholt.net>

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/i915: disable indirect state pointers in render flush
  2012-07-25 12:51   ` [PATCH 2/2] drm/i915: disable indirect state pointers in render flush Daniel Vetter
@ 2012-07-26 17:33     ` Eric Anholt
  2012-07-26 17:47       ` Daniel Vetter
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Anholt @ 2012-07-26 17:33 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter


[-- Attachment #1.1: Type: text/plain, Size: 755 bytes --]

Daniel Vetter <daniel.vetter@ffwll.ch> writes:

> Since we don't guarantee that objects stay at the same gtt offset,
> userspace needs to reload all indirect state anyway, even with hw
> contexts. The hw provides a little pipe_control flag to disable at
> least some these indirect state pointers and hence avoid to
> save/restore them at context switch time.
>
> Seems to improve hw context switch throughput as measured by running
> glxgears by about 0.5%, barely above the noise on my ivb gt2 here.

I'd like to see some better testing than picking an fps number out of
glxgears.  I'm dubious of there being any measurable effect here.  But
then, I haven't even been able to figure out from the specs what exactly
is considered to be "indirect state".

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/i915: disable indirect state pointers in render flush
  2012-07-26 17:33     ` Eric Anholt
@ 2012-07-26 17:47       ` Daniel Vetter
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2012-07-26 17:47 UTC (permalink / raw)
  To: Eric Anholt; +Cc: Intel Graphics Development

On Thu, Jul 26, 2012 at 7:33 PM, Eric Anholt <eric@anholt.net> wrote:
> Daniel Vetter <daniel.vetter@ffwll.ch> writes:
>
>> Since we don't guarantee that objects stay at the same gtt offset,
>> userspace needs to reload all indirect state anyway, even with hw
>> contexts. The hw provides a little pipe_control flag to disable at
>> least some these indirect state pointers and hence avoid to
>> save/restore them at context switch time.
>>
>> Seems to improve hw context switch throughput as measured by running
>> glxgears by about 0.5%, barely above the noise on my ivb gt2 here.
>
> I'd like to see some better testing than picking an fps number out of
> glxgears.  I'm dubious of there being any measurable effect here.  But
> then, I haven't even been able to figure out from the specs what exactly
> is considered to be "indirect state".

I admit that the the glxgears number is rather pointless - it's meant
more as a quick sanity test that things don't go slower. The
motivation why I've done this is more that this could help in catching
mesa bugs, where for some odd reason some indirect state doesn't get
re-emitted (now with hw contexts that's harder to detect than just
running 2 drm clients in parallel). If the hw gives us a bit to clean
(at least parts) of that state out, I think we should use it to
improve testing coverage.
-Daniel
-- 
Daniel Vetter
daniel.vetter@ffwll.ch - +41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6
  2012-07-20 17:02 [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6 Chris Wilson
  2012-07-20 21:00 ` Ben Widawsky
  2012-07-25 12:51 ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Daniel Vetter
@ 2012-08-08  7:35 ` Daniel Vetter
  2 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2012-08-08  7:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jul 20, 2012 at 06:02:28PM +0100, Chris Wilson wrote:
> The requirements for the sync flush to be emitted prior to the render
> cache flush is only true for SandyBridge. On IvyBridge and friends we
> can just emit the flushes with an inline CS stall.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Since I've seen Ken ditch these w/a for ivb+ in mesa, I've figured that
this is ok. Some bspec reading seems to agree. Merged to dinq, thanks for
the patch.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-08-08  7:35 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-20 17:02 [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6 Chris Wilson
2012-07-20 21:00 ` Ben Widawsky
2012-07-25 12:51 ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Daniel Vetter
2012-07-25 12:51   ` [PATCH 2/2] drm/i915: disable indirect state pointers in render flush Daniel Vetter
2012-07-26 17:33     ` Eric Anholt
2012-07-26 17:47       ` Daniel Vetter
2012-07-26 17:25   ` [PATCH 1/2] drm/i915: flush DC writes cached in l3$ on gen7 Eric Anholt
2012-08-08  7:35 ` [PATCH] drm/i915: Only apply the SNB pipe control w/a to gen6 Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.