[PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter
       [not found] <6c3329lntgg@orsmga002.jf.intel.com>
@ 2012-10-09 18:24 ` Chris Wilson
  2012-10-09 18:24   ` [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish Chris Wilson
                     ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Chris Wilson @ 2012-10-09 18:24 UTC (permalink / raw)
  To: intel-gfx

With a fence, we only need to insert a memory barrier around the actual
fence alteration for CPU accesses through the GTT. Performing the
barrier in flush-fence was inserting unnecessary and expensive barriers
for never fenced objects.

Note removing the barriers from flush-fence, which was effectively a
barrier before every direct access through the GTT, revealed that we
where missing a barrier before the first access through the GTT. Lack of
that barrier was sufficient to cause GPU hangs.

v2: Add a couple more comments to explain the new barriers

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c |   40 +++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 05ff790..3c4577b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2771,9 +2771,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
 	POSTING_READ(FENCE_REG_830_0 + reg * 4);
 }
 
+inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
+{
+	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+}
+
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* Ensure that all CPU reads are completed before installing a fence
+	 * and all writes before removing the fence.
+	 */
+	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
+		mb();
+
 	switch (INTEL_INFO(dev)->gen) {
 	case 7:
 	case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
@@ -2783,6 +2796,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
 	case 2: i830_write_fence_reg(dev, reg, obj); break;
 	default: break;
 	}
+
+	/* And similarly be paranoid that no direct access to this region
+	 * is reordered to before the fence is installed.
+	 */
+	if (i915_gem_object_needs_mb(obj))
+		mb();
 }
 
 static inline int fence_number(struct drm_i915_private *dev_priv,
@@ -2812,7 +2831,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 }
 
 static int
-i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
+i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 {
 	if (obj->last_fenced_seqno) {
 		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
@@ -2822,12 +2841,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 		obj->last_fenced_seqno = 0;
 	}
 
-	/* Ensure that all CPU reads are completed before installing a fence
-	 * and all writes before removing the fence.
-	 */
-	if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
-		mb();
-
 	obj->fenced_gpu_access = false;
 	return 0;
 }
@@ -2838,7 +2851,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	ret = i915_gem_object_flush_fence(obj);
+	ret = i915_gem_object_wait_fence(obj);
 	if (ret)
 		return ret;
 
@@ -2912,7 +2925,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 	 * will need to serialise the write to the associated fence register?
 	 */
 	if (obj->fence_dirty) {
-		ret = i915_gem_object_flush_fence(obj);
+		ret = i915_gem_object_wait_fence(obj);
 		if (ret)
 			return ret;
 	}
@@ -2933,7 +2946,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 		if (reg->obj) {
 			struct drm_i915_gem_object *old = reg->obj;
 
-			ret = i915_gem_object_flush_fence(old);
+			ret = i915_gem_object_wait_fence(old);
 			if (ret)
 				return ret;
 
@@ -3244,6 +3257,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
 	i915_gem_object_flush_cpu_write_domain(obj);
 
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * GTT domain upon first access.
+	 */
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		mb();
+
 	old_write_domain = obj->base.write_domain;
 	old_read_domains = obj->base.read_domains;
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish
  2012-10-09 18:24 ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Chris Wilson
@ 2012-10-09 18:24   ` Chris Wilson
  2012-10-11 19:43     ` Jesse Barnes
  2012-10-09 18:24   ` [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page Chris Wilson
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2012-10-09 18:24 UTC (permalink / raw)
  To: intel-gfx

Now that we seem to have brought order to the GTT barriers, the last one
to review is the terminal barrier before we unbind the buffer from the
GTT. This needs to only be performed if the buffer still resides in the
GTT domain, and so we can skip some needless barriers otherwise.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3c4577b..ed8d21a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2526,15 +2526,15 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
 {
 	u32 old_write_domain, old_read_domains;
 
-	/* Act a barrier for all accesses through the GTT */
-	mb();
-
 	/* Force a pagefault for domain tracking on next user access */
 	i915_gem_release_mmap(obj);
 
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
 		return;
 
+	/* Wait for any direct GTT access to complete */
+	mb();
+
 	old_read_domains = obj->base.read_domains;
 	old_write_domain = obj->base.write_domain;
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish
  2012-10-09 18:24   ` [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish Chris Wilson
@ 2012-10-11 19:43     ` Jesse Barnes
  2013-01-19 13:40       ` Daniel Vetter
  0 siblings, 1 reply; 14+ messages in thread
From: Jesse Barnes @ 2012-10-11 19:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  9 Oct 2012 19:24:38 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Now that we seem to have brought order to the GTT barriers, the last one
> to review is the terminal barrier before we unbind the buffer from the
> GTT. This needs to only be performed if the buffer still resides in the
> GTT domain, and so we can skip some needless barriers otherwise.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c |    6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 3c4577b..ed8d21a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2526,15 +2526,15 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
>  {
>  	u32 old_write_domain, old_read_domains;
>  
> -	/* Act a barrier for all accesses through the GTT */
> -	mb();
> -
>  	/* Force a pagefault for domain tracking on next user access */
>  	i915_gem_release_mmap(obj);
>  
>  	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
>  		return;
>  
> +	/* Wait for any direct GTT access to complete */
> +	mb();
> +
>  	old_read_domains = obj->base.read_domains;
>  	old_write_domain = obj->base.write_domain;
>  

Yeah this looks like a better place to put it.  You're trying to
serialize this against a subsequent fence or map operation?

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish
  2012-10-11 19:43     ` Jesse Barnes
@ 2013-01-19 13:40       ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2013-01-19 13:40 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Thu, Oct 11, 2012 at 12:43:42PM -0700, Jesse Barnes wrote:
> On Tue,  9 Oct 2012 19:24:38 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > Now that we seem to have brought order to the GTT barriers, the last one
> > to review is the terminal barrier before we unbind the buffer from the
> > GTT. This needs to only be performed if the buffer still resides in the
> > GTT domain, and so we can skip some needless barriers otherwise.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c |    6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 3c4577b..ed8d21a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2526,15 +2526,15 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
> >  {
> >  	u32 old_write_domain, old_read_domains;
> >  
> > -	/* Act a barrier for all accesses through the GTT */
> > -	mb();
> > -
> >  	/* Force a pagefault for domain tracking on next user access */
> >  	i915_gem_release_mmap(obj);
> >  
> >  	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
> >  		return;
> >  
> > +	/* Wait for any direct GTT access to complete */
> > +	mb();
> > +
> >  	old_read_domains = obj->base.read_domains;
> >  	old_write_domain = obj->base.write_domain;
> >  
> 
> Yeah this looks like a better place to put it.  You're trying to
> serialize this against a subsequent fence or map operation?
> 
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Patches 1-2 queued for next, thanks.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page
  2012-10-09 18:24 ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Chris Wilson
  2012-10-09 18:24   ` [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish Chris Wilson
@ 2012-10-09 18:24   ` Chris Wilson
  2012-10-11 19:46     ` Jesse Barnes
  2012-10-09 18:24   ` [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers Chris Wilson
  2012-10-11 19:41   ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Jesse Barnes
  3 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2012-10-09 18:24 UTC (permalink / raw)
  To: intel-gfx

Hopefully this will reduce a few of the missed IRQ warnings.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e069e69..133beb6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 	/* Workaround to force correct ordering between irq and seqno writes on
 	 * ivb (and maybe also on snb) by reading from a CS register (like
 	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency)
+	if (!lazy_coherency) {
 		intel_ring_get_active_head(ring);
+		mb();
+	}
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
 static u32
 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 {
+	if (!lazy_coherency)
+		mb();
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
@@ -719,6 +723,8 @@ static u32
 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 {
 	struct pipe_control *pc = ring->private;
+	if (!lazy_coherency)
+		mb();
 	return pc->cpu_page[0];
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2ea7a31..40b252e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -160,8 +160,6 @@ static inline u32
 intel_read_status_page(struct intel_ring_buffer *ring,
 		       int reg)
 {
-	/* Ensure that the compiler doesn't optimize away the load. */
-	barrier();
 	return ring->status_page.page_addr[reg];
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page
  2012-10-09 18:24   ` [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page Chris Wilson
@ 2012-10-11 19:46     ` Jesse Barnes
  2012-10-19 20:40       ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Jesse Barnes @ 2012-10-11 19:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  9 Oct 2012 19:24:39 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Hopefully this will reduce a few of the missed IRQ warnings.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
>  2 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index e069e69..133beb6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  	/* Workaround to force correct ordering between irq and seqno writes on
>  	 * ivb (and maybe also on snb) by reading from a CS register (like
>  	 * ACTHD) before reading the status page. */
> -	if (!lazy_coherency)
> +	if (!lazy_coherency) {
>  		intel_ring_get_active_head(ring);
> +		mb();
> +	}
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
>  static u32
>  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  {
> +	if (!lazy_coherency)
> +		mb();
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> @@ -719,6 +723,8 @@ static u32
>  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  {
>  	struct pipe_control *pc = ring->private;
> +	if (!lazy_coherency)
> +		mb();
>  	return pc->cpu_page[0];
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2ea7a31..40b252e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -160,8 +160,6 @@ static inline u32
>  intel_read_status_page(struct intel_ring_buffer *ring,
>  		       int reg)
>  {
> -	/* Ensure that the compiler doesn't optimize away the load. */
> -	barrier();
>  	return ring->status_page.page_addr[reg];
>  }
>  

This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
should have nothing to do with what the GPU just wrote to the status
page.  It'll slow down the read a bit but shouldn't affect coherence at
all...  An MMIO read from the GPU otoh should flush any stubborn DMA
buffers.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page
  2012-10-11 19:46     ` Jesse Barnes
@ 2012-10-19 20:40       ` Chris Wilson
  2012-10-19 20:52         ` Jesse Barnes
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2012-10-19 20:40 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Thu, 11 Oct 2012 12:46:00 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Tue,  9 Oct 2012 19:24:39 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > Hopefully this will reduce a few of the missed IRQ warnings.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
> >  2 files changed, 7 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index e069e69..133beb6 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  	/* Workaround to force correct ordering between irq and seqno writes on
> >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> >  	 * ACTHD) before reading the status page. */
> > -	if (!lazy_coherency)
> > +	if (!lazy_coherency) {
> >  		intel_ring_get_active_head(ring);
> > +		mb();
> > +	}
> >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> >  }
> >  
> >  static u32
> >  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  {
> > +	if (!lazy_coherency)
> > +		mb();
> >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> >  }
> >  
> > @@ -719,6 +723,8 @@ static u32
> >  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  {
> >  	struct pipe_control *pc = ring->private;
> > +	if (!lazy_coherency)
> > +		mb();
> >  	return pc->cpu_page[0];
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index 2ea7a31..40b252e 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -160,8 +160,6 @@ static inline u32
> >  intel_read_status_page(struct intel_ring_buffer *ring,
> >  		       int reg)
> >  {
> > -	/* Ensure that the compiler doesn't optimize away the load. */
> > -	barrier();
> >  	return ring->status_page.page_addr[reg];
> >  }
> >  
> 
> This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
> should have nothing to do with what the GPU just wrote to the status
> page.  It'll slow down the read a bit but shouldn't affect coherence at
> all...  An MMIO read from the GPU otoh should flush any stubborn DMA
> buffers.

Absolutely convinced? Aren't we here more worried about the view of the
shared cache from any particular core and so need to treat this as an
SMP programming problem, in which case we do need to worry about memory
barriers around dependent reads and writes between processors?

But it is definitely more voodoo...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page
  2012-10-19 20:40       ` Chris Wilson
@ 2012-10-19 20:52         ` Jesse Barnes
  2013-01-19 12:02           ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Jesse Barnes @ 2012-10-19 20:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 21:40:17 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Thu, 11 Oct 2012 12:46:00 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > On Tue,  9 Oct 2012 19:24:39 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > Hopefully this will reduce a few of the missed IRQ warnings.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
> > >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
> > >  2 files changed, 7 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > index e069e69..133beb6 100644
> > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > >  	/* Workaround to force correct ordering between irq and seqno writes on
> > >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> > >  	 * ACTHD) before reading the status page. */
> > > -	if (!lazy_coherency)
> > > +	if (!lazy_coherency) {
> > >  		intel_ring_get_active_head(ring);
> > > +		mb();
> > > +	}
> > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > >  }
> > >  
> > >  static u32
> > >  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > >  {
> > > +	if (!lazy_coherency)
> > > +		mb();
> > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > >  }
> > >  
> > > @@ -719,6 +723,8 @@ static u32
> > >  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > >  {
> > >  	struct pipe_control *pc = ring->private;
> > > +	if (!lazy_coherency)
> > > +		mb();
> > >  	return pc->cpu_page[0];
> > >  }
> > >  
> > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > index 2ea7a31..40b252e 100644
> > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > @@ -160,8 +160,6 @@ static inline u32
> > >  intel_read_status_page(struct intel_ring_buffer *ring,
> > >  		       int reg)
> > >  {
> > > -	/* Ensure that the compiler doesn't optimize away the load. */
> > > -	barrier();
> > >  	return ring->status_page.page_addr[reg];
> > >  }
> > >  
> > 
> > This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
> > should have nothing to do with what the GPU just wrote to the status
> > page.  It'll slow down the read a bit but shouldn't affect coherence at
> > all...  An MMIO read from the GPU otoh should flush any stubborn DMA
> > buffers.
> 
> Absolutely convinced? Aren't we here more worried about the view of the
> shared cache from any particular core and so need to treat this as an
> SMP programming problem, in which case we do need to worry about memory
> barriers around dependent reads and writes between processors?
> 
> But it is definitely more voodoo...

If it's an SMP issue, barriers won't help, we need actual
synchronization in the form of locks or something.

My current theory is that while cached and uncached memory accesses are
strongly ordered (i.e. appear in program order from a given CPU), they
don't necessarily synchronize that way against each other, especially
when WC mappings are in play.  So in those cases, fences will be safer
to use before any subsequent access that depends on a previous access
of a different type.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page
  2012-10-19 20:52         ` Jesse Barnes
@ 2013-01-19 12:02           ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2013-01-19 12:02 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Fri, 19 Oct 2012 13:52:49 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Fri, 19 Oct 2012 21:40:17 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > On Thu, 11 Oct 2012 12:46:00 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > > On Tue,  9 Oct 2012 19:24:39 +0100
> > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > 
> > > > Hopefully this will reduce a few of the missed IRQ warnings.
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
> > > >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
> > > >  2 files changed, 7 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > > index e069e69..133beb6 100644
> > > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > > @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > > >  	/* Workaround to force correct ordering between irq and seqno writes on
> > > >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> > > >  	 * ACTHD) before reading the status page. */
> > > > -	if (!lazy_coherency)
> > > > +	if (!lazy_coherency) {
> > > >  		intel_ring_get_active_head(ring);
> > > > +		mb();
> > > > +	}
> > > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > > >  }
> > > >  
> > > >  static u32
> > > >  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > > >  {
> > > > +	if (!lazy_coherency)
> > > > +		mb();
> > > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > > >  }
> > > >  
> > > > @@ -719,6 +723,8 @@ static u32
> > > >  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > > >  {
> > > >  	struct pipe_control *pc = ring->private;
> > > > +	if (!lazy_coherency)
> > > > +		mb();
> > > >  	return pc->cpu_page[0];
> > > >  }
> > > >  
> > > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > > index 2ea7a31..40b252e 100644
> > > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > > @@ -160,8 +160,6 @@ static inline u32
> > > >  intel_read_status_page(struct intel_ring_buffer *ring,
> > > >  		       int reg)
> > > >  {
> > > > -	/* Ensure that the compiler doesn't optimize away the load. */
> > > > -	barrier();
> > > >  	return ring->status_page.page_addr[reg];
> > > >  }
> > > >  
> > > 
> > > This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
> > > should have nothing to do with what the GPU just wrote to the status
> > > page.  It'll slow down the read a bit but shouldn't affect coherence at
> > > all...  An MMIO read from the GPU otoh should flush any stubborn DMA
> > > buffers.
> > 
> > Absolutely convinced? Aren't we here more worried about the view of the
> > shared cache from any particular core and so need to treat this as an
> > SMP programming problem, in which case we do need to worry about memory
> > barriers around dependent reads and writes between processors?
> > 
> > But it is definitely more voodoo...
> 
> If it's an SMP issue, barriers won't help, we need actual
> synchronization in the form of locks or something.

Glad you agree. How are locks implemented? :)

Irrespectively of the contentious patches Daniel thought would be a good
idea, we need the first 2 to fix the mb() around fences. Poke.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers
  2012-10-09 18:24 ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Chris Wilson
  2012-10-09 18:24   ` [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish Chris Wilson
  2012-10-09 18:24   ` [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page Chris Wilson
@ 2012-10-09 18:24   ` Chris Wilson
  2012-10-11 19:52     ` Jesse Barnes
  2012-10-11 20:46     ` Daniel Vetter
  2012-10-11 19:41   ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Jesse Barnes
  3 siblings, 2 replies; 14+ messages in thread
From: Chris Wilson @ 2012-10-09 18:24 UTC (permalink / raw)
  To: intel-gfx

We need to treat the GPU core as a distinct processor and so apply the
same SMP memory barriers. In this case, in addition to flushing the
chipset cache, which is a no-op on LLC platforms, apply a write barrier
beforehand. And then when we invalidate the CPU cache, make sure the
memory is coherent (again this was a no-op on LLC platforms).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    |    1 +
 drivers/gpu/drm/i915/i915_gem.c |    1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 8b0f6d19..1223128 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -1706,6 +1706,7 @@ EXPORT_SYMBOL(intel_gtt_get);
 
 void intel_gtt_chipset_flush(void)
 {
+	wmb();
 	if (intel_private.driver->chipset_flush)
 		intel_private.driver->chipset_flush();
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed8d21a..b1ebb88 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3528,6 +3528,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 		i915_gem_clflush_object(obj);
+		mb(); /* in case the clflush above is optimised away */
 
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers
  2012-10-09 18:24   ` [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers Chris Wilson
@ 2012-10-11 19:52     ` Jesse Barnes
  2012-10-19 20:48       ` Chris Wilson
  2012-10-11 20:46     ` Daniel Vetter
  1 sibling, 1 reply; 14+ messages in thread
From: Jesse Barnes @ 2012-10-11 19:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  9 Oct 2012 19:24:40 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> We need to treat the GPU core as a distinct processor and so apply the
> same SMP memory barriers. In this case, in addition to flushing the
> chipset cache, which is a no-op on LLC platforms, apply a write barrier
> beforehand. And then when we invalidate the CPU cache, make sure the
> memory is coherent (again this was a no-op on LLC platforms).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/char/agp/intel-gtt.c    |    1 +
>  drivers/gpu/drm/i915/i915_gem.c |    1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 8b0f6d19..1223128 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -1706,6 +1706,7 @@ EXPORT_SYMBOL(intel_gtt_get);
>  
>  void intel_gtt_chipset_flush(void)
>  {
> +	wmb();
>  	if (intel_private.driver->chipset_flush)
>  		intel_private.driver->chipset_flush();
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ed8d21a..b1ebb88 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3528,6 +3528,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>  	/* Flush the CPU cache if it's still invalid. */
>  	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
>  		i915_gem_clflush_object(obj);
> +		mb(); /* in case the clflush above is optimised away */
>  
>  		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
>  	}

These need more comments too.

I think the first is to make sure any previous loads have completed
before we start using the new object?  If so, don't we want reads to
complete first too?

The second one looks unnecessary.  If the object isn't in the CPU
domain, there should be no loads/stores against it right?

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers
  2012-10-11 19:52     ` Jesse Barnes
@ 2012-10-19 20:48       ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2012-10-19 20:48 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Thu, 11 Oct 2012 12:52:15 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Tue,  9 Oct 2012 19:24:40 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > We need to treat the GPU core as a distinct processor and so apply the
> > same SMP memory barriers. In this case, in addition to flushing the
> > chipset cache, which is a no-op on LLC platforms, apply a write barrier
> > beforehand. And then when we invalidate the CPU cache, make sure the
> > memory is coherent (again this was a no-op on LLC platforms).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/char/agp/intel-gtt.c    |    1 +
> >  drivers/gpu/drm/i915/i915_gem.c |    1 +
> >  2 files changed, 2 insertions(+)
> > 
> > diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> > index 8b0f6d19..1223128 100644
> > --- a/drivers/char/agp/intel-gtt.c
> > +++ b/drivers/char/agp/intel-gtt.c
> > @@ -1706,6 +1706,7 @@ EXPORT_SYMBOL(intel_gtt_get);
> >  
> >  void intel_gtt_chipset_flush(void)
> >  {
> > +	wmb();
> >  	if (intel_private.driver->chipset_flush)
> >  		intel_private.driver->chipset_flush();
> >  }
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index ed8d21a..b1ebb88 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3528,6 +3528,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
> >  	/* Flush the CPU cache if it's still invalid. */
> >  	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
> >  		i915_gem_clflush_object(obj);
> > +		mb(); /* in case the clflush above is optimised away */
> >  
> >  		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
> >  	}
> 
> These need more comments too.
> 
> I think the first is to make sure any previous loads have completed
> before we start using the new object?  If so, don't we want reads to
> complete first too?

The flush is only used to make sure the writes written from the CPU hit
the cache and/or chipset buffers before we flush them from the chipset
buffer. Userspace is welcome to race read/writes between cores and the
GPU, and there is nothing we can do to prevent that without adopting a
strict coherency model.

Also note that in the past I have proposed this wmb() to fix some
observed incoherency in the cursor sprite: #21442.
 
> The second one looks unnecessary.  If the object isn't in the CPU
> domain, there should be no loads/stores against it right?

Just depends on the programming model between CPU/GPU. The barrier is
there to make sure all the writes into the shared cache from another
core (the gpu in this case) is complete before we begin our reads.
Assuming that the GPU behaves as another core...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers
  2012-10-09 18:24   ` [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers Chris Wilson
  2012-10-11 19:52     ` Jesse Barnes
@ 2012-10-11 20:46     ` Daniel Vetter
  1 sibling, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2012-10-11 20:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Oct 09, 2012 at 07:24:40PM +0100, Chris Wilson wrote:
> We need to treat the GPU core as a distinct processor and so apply the
> same SMP memory barriers. In this case, in addition to flushing the
> chipset cache, which is a no-op on LLC platforms, apply a write barrier
> beforehand. And then when we invalidate the CPU cache, make sure the
> memory is coherent (again this was a no-op on LLC platforms).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I think this one here deserves some love still:
- the fancy new pwrite/pread code does some crazy coherency tricks behind
  the domain tracking code. This patch misses those.
- like Jesse said: comments.
- I'd still wish we'd have some i-g-t tests for this stuff ...

And now my crazy new theory: We've already had some bug reports that
suggested that we're not fully coherent around unbind/rebind and papered
over it with:

commit c501ae7f332cdaf42e31af30b72b4b66cbbb1604
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Dec 14 13:57:23 2011 +0100

    drm/i915: Only clear the GPU domains upon a successful finish

And now we have the cpu_reloc regression from Dave Airlie which could be
explained with similar rebinding penalties (if we're creative). I hope
somewhat that we could explain these with the lack of proper memory
barriers ... So if you can gather tested-by's with the above duct-tape
reverted and these patches applied, I'd be almost as happy as with some
i-g-t tests for these patches.

Cheers, Daniel
> ---
>  drivers/char/agp/intel-gtt.c    |    1 +
>  drivers/gpu/drm/i915/i915_gem.c |    1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 8b0f6d19..1223128 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -1706,6 +1706,7 @@ EXPORT_SYMBOL(intel_gtt_get);
>  
>  void intel_gtt_chipset_flush(void)
>  {
> +	wmb();
>  	if (intel_private.driver->chipset_flush)
>  		intel_private.driver->chipset_flush();
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ed8d21a..b1ebb88 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3528,6 +3528,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>  	/* Flush the CPU cache if it's still invalid. */
>  	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
>  		i915_gem_clflush_object(obj);
> +		mb(); /* in case the clflush above is optimised away */
>  
>  		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
>  	}
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter
  2012-10-09 18:24 ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Chris Wilson
                     ` (2 preceding siblings ...)
  2012-10-09 18:24   ` [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers Chris Wilson
@ 2012-10-11 19:41   ` Jesse Barnes
  3 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2012-10-11 19:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  9 Oct 2012 19:24:37 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> With a fence, we only need to insert a memory barrier around the actual
> fence alteration for CPU accesses through the GTT. Performing the
> barrier in flush-fence was inserting unnecessary and expensive barriers
> for never fenced objects.
> 
> Note removing the barriers from flush-fence, which was effectively a
> barrier before every direct access through the GTT, revealed that we
> where missing a barrier before the first access through the GTT. Lack of
> that barrier was sufficient to cause GPU hangs.
> 
> v2: Add a couple more comments to explain the new barriers
> 

The docs are slippery on MMIO vs cached accesses (less so on actual I/O
port ops), but this does look correct.

You might improve the comments a little and quote the IA32 manuals a
bit, saying that you're trying to order previous cached accesses with
subsequent MMIO accesses that will affect what the CPU reads or writes.

Other than that:
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2013-01-19 13:38 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <6c3329lntgg@orsmga002.jf.intel.com>
2012-10-09 18:24 ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Chris Wilson
2012-10-09 18:24   ` [PATCH 2/4] drm/i915: Only apply the mb() when flushing the GTT domain during a finish Chris Wilson
2012-10-11 19:43     ` Jesse Barnes
2013-01-19 13:40       ` Daniel Vetter
2012-10-09 18:24   ` [PATCH 3/4] drm/i915: Insert a full mb() before reading the seqno from the status page Chris Wilson
2012-10-11 19:46     ` Jesse Barnes
2012-10-19 20:40       ` Chris Wilson
2012-10-19 20:52         ` Jesse Barnes
2013-01-19 12:02           ` Chris Wilson
2012-10-09 18:24   ` [PATCH 4/4] drm/i915: Review the memory barriers around CPU access to buffers Chris Wilson
2012-10-11 19:52     ` Jesse Barnes
2012-10-19 20:48       ` Chris Wilson
2012-10-11 20:46     ` Daniel Vetter
2012-10-11 19:41   ` [PATCH 1/4] drm/i915: Only insert the mb() before updating the fence parameter Jesse Barnes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.