All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
@ 2015-10-23 17:43 Chris Wilson
  2015-10-23 17:43 ` [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell Chris Wilson
  2015-10-23 17:50 ` [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms Ville Syrjälä
  0 siblings, 2 replies; 17+ messages in thread
From: Chris Wilson @ 2015-10-23 17:43 UTC (permalink / raw)
  To: intel-gfx

As the HWS is mapped into the GPU as uncached, writes into that page do
not automatically snoop the CPU cache and therefore if we want to see
coherent values we need to clear the stale cacheline first. Note, we
already had a workaround for BXT-A for an identical issue, but since we
never enabled snooping it applies to all non-llc platforms.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 70 ++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3308337bb6c2..7393dd00d26d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1751,41 +1751,36 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 	return 0;
 }
 
-static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+static u32 llc_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
-static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
+static void llc_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 {
 	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
 }
 
-static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+static u32 wc_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
-
 	/*
 	 * On BXT A steppings there is a HW coherency issue whereby the
 	 * MI_STORE_DATA_IMM storing the completed request's seqno
-	 * occasionally doesn't invalidate the CPU cache. Work around this by
-	 * clflushing the corresponding cacheline whenever the caller wants
-	 * the coherency to be guaranteed. Note that this cacheline is known
-	 * to be clean at this point, since we only write it in
-	 * bxt_a_set_seqno(), where we also do a clflush after the write. So
-	 * this clflush in practice becomes an invalidate operation.
+	 * occasionally doesn't invalidate the CPU cache. However, as
+	 * we do not mark the context for snoopable access, we have to flush
+	 * the stale cacheline before seeing new values anyway.
 	 */
-
 	if (!lazy_coherency)
 		intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
 
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
-static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
+static void wc_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 {
 	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
 
-	/* See bxt_a_get_seqno() explaining the reason for the clflush. */
+	/* See wc_get_seqno() explaining the reason for the clflush. */
 	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
@@ -1973,12 +1968,12 @@ static int logical_render_ring_init(struct drm_device *dev)
 		ring->init_hw = gen8_init_render_ring;
 	ring->init_context = gen8_init_rcs_context;
 	ring->cleanup = intel_fini_pipe_control;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
+	if (!HAS_LLC(dev_priv)) {
+		ring->get_seqno = wc_get_seqno;
+		ring->set_seqno = wc_set_seqno;
 	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
+		ring->get_seqno = llc_get_seqno;
+		ring->set_seqno = llc_set_seqno;
 	}
 	ring->emit_request = gen8_emit_request;
 	ring->emit_flush = gen8_emit_flush_render;
@@ -2025,12 +2020,12 @@ static int logical_bsd_ring_init(struct drm_device *dev)
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
+	if (!HAS_LLC(dev_priv)) {
+		ring->get_seqno = wc_get_seqno;
+		ring->set_seqno = wc_set_seqno;
 	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
+		ring->get_seqno = llc_get_seqno;
+		ring->set_seqno = llc_set_seqno;
 	}
 	ring->emit_request = gen8_emit_request;
 	ring->emit_flush = gen8_emit_flush;
@@ -2055,8 +2050,13 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	ring->get_seqno = gen8_get_seqno;
-	ring->set_seqno = gen8_set_seqno;
+	if (!HAS_LLC(dev_priv)) {
+		ring->get_seqno = wc_get_seqno;
+		ring->set_seqno = wc_set_seqno;
+	} else {
+		ring->get_seqno = llc_get_seqno;
+		ring->set_seqno = llc_set_seqno;
+	}
 	ring->emit_request = gen8_emit_request;
 	ring->emit_flush = gen8_emit_flush;
 	ring->irq_get = gen8_logical_ring_get_irq;
@@ -2080,12 +2080,12 @@ static int logical_blt_ring_init(struct drm_device *dev)
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
+	if (!HAS_LLC(dev_priv)) {
+		ring->get_seqno = wc_get_seqno;
+		ring->set_seqno = wc_set_seqno;
 	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
+		ring->get_seqno = llc_get_seqno;
+		ring->set_seqno = llc_set_seqno;
 	}
 	ring->emit_request = gen8_emit_request;
 	ring->emit_flush = gen8_emit_flush;
@@ -2110,12 +2110,12 @@ static int logical_vebox_ring_init(struct drm_device *dev)
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
+	if (!HAS_LLC(dev_priv)) {
+		ring->get_seqno = wc_get_seqno;
+		ring->set_seqno = wc_set_seqno;
 	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
+		ring->get_seqno = llc_get_seqno;
+		ring->set_seqno = llc_set_seqno;
 	}
 	ring->emit_request = gen8_emit_request;
 	ring->emit_flush = gen8_emit_flush;
-- 
2.6.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-10-23 17:43 [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms Chris Wilson
@ 2015-10-23 17:43 ` Chris Wilson
  2015-10-23 19:45   ` Chris Wilson
  2015-10-30 16:14   ` Daniel Vetter
  2015-10-23 17:50 ` [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms Ville Syrjälä
  1 sibling, 2 replies; 17+ messages in thread
From: Chris Wilson @ 2015-10-23 17:43 UTC (permalink / raw)
  To: intel-gfx

When accessing through the GTT from one CPU whilst concurrently updating
the GGTT PTEs in another thread, the hardware likes to return random
data. As we have strong serialisation prevent us from modifying the PTE
of an active GTT mmapping, we have to conclude that it whilst modifying
other PTE's that error occurs. (I have not looked for any pattern such
as modifying PTE within the same page or cacheline as active PTE -
though checking whether revoking neighbouring objects should be enough
to test that theory.) The corruption also seems restricted to Braswell
and disappears with maxcpus=0. This patch stops all access through the
GTT by other CPUs when we update any PTE by stopping the machine around
the GGTT update.

Testcase: igt/gem_concurrent_blit
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig        |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 051eab33e4c7..fcd77b27514d 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -10,6 +10,7 @@ config DRM_I915
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
 	select TMPFS
+	select STOP_MACHINE
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 016739eefd45..4d357e18e5d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/seq_file.h>
+#include <linux/stop_machine.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -2533,6 +2534,26 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	return 0;
 }
 
+struct ggtt_bind_vma__cb {
+	struct i915_vma *vma;
+	enum i915_cache_level cache_level;
+	u32 flags;
+};
+
+static int ggtt_bind_vma__cb(void *_arg)
+{
+	struct ggtt_bind_vma__cb *arg = _arg;
+	return ggtt_bind_vma(arg->vma, arg->cache_level, arg->flags);
+}
+
+static int ggtt_bind_vma__BKL(struct i915_vma *vma,
+			      enum i915_cache_level cache_level,
+			      u32 flags)
+{
+	struct ggtt_bind_vma__cb arg = { vma, cache_level, flags };
+	return stop_machine(ggtt_bind_vma__cb, &arg, NULL);
+}
+
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
@@ -3000,6 +3021,9 @@ static int gen8_gmch_probe(struct drm_device *dev,
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
 
+	if (IS_CHERRYVIEW(dev))
+		dev_priv->gtt.base.bind_vma = ggtt_bind_vma__BKL;
+
 	return ret;
 }
 
-- 
2.6.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
  2015-10-23 17:43 [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms Chris Wilson
  2015-10-23 17:43 ` [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell Chris Wilson
@ 2015-10-23 17:50 ` Ville Syrjälä
  2015-10-23 17:56   ` Chris Wilson
  1 sibling, 1 reply; 17+ messages in thread
From: Ville Syrjälä @ 2015-10-23 17:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Oct 23, 2015 at 06:43:31PM +0100, Chris Wilson wrote:
> As the HWS is mapped into the GPU as uncached,

Since when?

> writes into that page do
> not automatically snoop the CPU cache and therefore if we want to see
> coherent values we need to clear the stale cacheline first. Note, we
> already had a workaround for BXT-A for an identical issue, but since we
> never enabled snooping it applies to all non-llc platforms.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 70 ++++++++++++++++++++--------------------
>  1 file changed, 35 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3308337bb6c2..7393dd00d26d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1751,41 +1751,36 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
>  	return 0;
>  }
>  
> -static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +static u32 llc_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
>  {
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> -static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
> +static void llc_set_seqno(struct intel_engine_cs *ring, u32 seqno)
>  {
>  	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
>  }
>  
> -static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +static u32 wc_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
>  {
> -
>  	/*
>  	 * On BXT A steppings there is a HW coherency issue whereby the
>  	 * MI_STORE_DATA_IMM storing the completed request's seqno
> -	 * occasionally doesn't invalidate the CPU cache. Work around this by
> -	 * clflushing the corresponding cacheline whenever the caller wants
> -	 * the coherency to be guaranteed. Note that this cacheline is known
> -	 * to be clean at this point, since we only write it in
> -	 * bxt_a_set_seqno(), where we also do a clflush after the write. So
> -	 * this clflush in practice becomes an invalidate operation.
> +	 * occasionally doesn't invalidate the CPU cache. However, as
> +	 * we do not mark the context for snoopable access, we have to flush
> +	 * the stale cacheline before seeing new values anyway.
>  	 */
> -
>  	if (!lazy_coherency)
>  		intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
>  
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> -static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
> +static void wc_set_seqno(struct intel_engine_cs *ring, u32 seqno)
>  {
>  	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
>  
> -	/* See bxt_a_get_seqno() explaining the reason for the clflush. */
> +	/* See wc_get_seqno() explaining the reason for the clflush. */
>  	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> @@ -1973,12 +1968,12 @@ static int logical_render_ring_init(struct drm_device *dev)
>  		ring->init_hw = gen8_init_render_ring;
>  	ring->init_context = gen8_init_rcs_context;
>  	ring->cleanup = intel_fini_pipe_control;
> -	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
> -		ring->get_seqno = bxt_a_get_seqno;
> -		ring->set_seqno = bxt_a_set_seqno;
> +	if (!HAS_LLC(dev_priv)) {
> +		ring->get_seqno = wc_get_seqno;
> +		ring->set_seqno = wc_set_seqno;
>  	} else {
> -		ring->get_seqno = gen8_get_seqno;
> -		ring->set_seqno = gen8_set_seqno;
> +		ring->get_seqno = llc_get_seqno;
> +		ring->set_seqno = llc_set_seqno;
>  	}
>  	ring->emit_request = gen8_emit_request;
>  	ring->emit_flush = gen8_emit_flush_render;
> @@ -2025,12 +2020,12 @@ static int logical_bsd_ring_init(struct drm_device *dev)
>  		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
>  
>  	ring->init_hw = gen8_init_common_ring;
> -	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
> -		ring->get_seqno = bxt_a_get_seqno;
> -		ring->set_seqno = bxt_a_set_seqno;
> +	if (!HAS_LLC(dev_priv)) {
> +		ring->get_seqno = wc_get_seqno;
> +		ring->set_seqno = wc_set_seqno;
>  	} else {
> -		ring->get_seqno = gen8_get_seqno;
> -		ring->set_seqno = gen8_set_seqno;
> +		ring->get_seqno = llc_get_seqno;
> +		ring->set_seqno = llc_set_seqno;
>  	}
>  	ring->emit_request = gen8_emit_request;
>  	ring->emit_flush = gen8_emit_flush;
> @@ -2055,8 +2050,13 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
>  		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
>  
>  	ring->init_hw = gen8_init_common_ring;
> -	ring->get_seqno = gen8_get_seqno;
> -	ring->set_seqno = gen8_set_seqno;
> +	if (!HAS_LLC(dev_priv)) {
> +		ring->get_seqno = wc_get_seqno;
> +		ring->set_seqno = wc_set_seqno;
> +	} else {
> +		ring->get_seqno = llc_get_seqno;
> +		ring->set_seqno = llc_set_seqno;
> +	}
>  	ring->emit_request = gen8_emit_request;
>  	ring->emit_flush = gen8_emit_flush;
>  	ring->irq_get = gen8_logical_ring_get_irq;
> @@ -2080,12 +2080,12 @@ static int logical_blt_ring_init(struct drm_device *dev)
>  		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
>  
>  	ring->init_hw = gen8_init_common_ring;
> -	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
> -		ring->get_seqno = bxt_a_get_seqno;
> -		ring->set_seqno = bxt_a_set_seqno;
> +	if (!HAS_LLC(dev_priv)) {
> +		ring->get_seqno = wc_get_seqno;
> +		ring->set_seqno = wc_set_seqno;
>  	} else {
> -		ring->get_seqno = gen8_get_seqno;
> -		ring->set_seqno = gen8_set_seqno;
> +		ring->get_seqno = llc_get_seqno;
> +		ring->set_seqno = llc_set_seqno;
>  	}
>  	ring->emit_request = gen8_emit_request;
>  	ring->emit_flush = gen8_emit_flush;
> @@ -2110,12 +2110,12 @@ static int logical_vebox_ring_init(struct drm_device *dev)
>  		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
>  
>  	ring->init_hw = gen8_init_common_ring;
> -	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
> -		ring->get_seqno = bxt_a_get_seqno;
> -		ring->set_seqno = bxt_a_set_seqno;
> +	if (!HAS_LLC(dev_priv)) {
> +		ring->get_seqno = wc_get_seqno;
> +		ring->set_seqno = wc_set_seqno;
>  	} else {
> -		ring->get_seqno = gen8_get_seqno;
> -		ring->set_seqno = gen8_set_seqno;
> +		ring->get_seqno = llc_get_seqno;
> +		ring->set_seqno = llc_set_seqno;
>  	}
>  	ring->emit_request = gen8_emit_request;
>  	ring->emit_flush = gen8_emit_flush;
> -- 
> 2.6.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
  2015-10-23 17:50 ` [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms Ville Syrjälä
@ 2015-10-23 17:56   ` Chris Wilson
  2015-10-23 18:22     ` Ville Syrjälä
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2015-10-23 17:56 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Fri, Oct 23, 2015 at 08:50:42PM +0300, Ville Syrjälä wrote:
> On Fri, Oct 23, 2015 at 06:43:31PM +0100, Chris Wilson wrote:
> > As the HWS is mapped into the GPU as uncached,
> 
> Since when?

Since it is embedded into execlists' default context which is allocated
using the system default cache level, i.e. uncached on !llc. See
intel_lr_context_deferred_alloc()
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
  2015-10-23 17:56   ` Chris Wilson
@ 2015-10-23 18:22     ` Ville Syrjälä
  2015-10-23 18:29       ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Ville Syrjälä @ 2015-10-23 18:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Fri, Oct 23, 2015 at 06:56:41PM +0100, Chris Wilson wrote:
> On Fri, Oct 23, 2015 at 08:50:42PM +0300, Ville Syrjälä wrote:
> > On Fri, Oct 23, 2015 at 06:43:31PM +0100, Chris Wilson wrote:
> > > As the HWS is mapped into the GPU as uncached,
> > 
> > Since when?
> 
> Since it is embedded into execlists' default context which is allocated
> using the system default cache level, i.e. uncached on !llc. See
> intel_lr_context_deferred_alloc()

Oh right. That doesn't actually matter since it's mapped through ggtt
which means it always goes through PAT 0.

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
  2015-10-23 18:22     ` Ville Syrjälä
@ 2015-10-23 18:29       ` Chris Wilson
  2015-10-23 18:41         ` Ville Syrjälä
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2015-10-23 18:29 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Fri, Oct 23, 2015 at 09:22:38PM +0300, Ville Syrjälä wrote:
> On Fri, Oct 23, 2015 at 06:56:41PM +0100, Chris Wilson wrote:
> > On Fri, Oct 23, 2015 at 08:50:42PM +0300, Ville Syrjälä wrote:
> > > On Fri, Oct 23, 2015 at 06:43:31PM +0100, Chris Wilson wrote:
> > > > As the HWS is mapped into the GPU as uncached,
> > > 
> > > Since when?
> > 
> > Since it is embedded into execlists' default context which is allocated
> > using the system default cache level, i.e. uncached on !llc. See
> > intel_lr_context_deferred_alloc()
> 
> Oh right. That doesn't actually matter since it's mapped through ggtt
> which means it always goes through PAT 0.

Oh, that again. Doesn't that mean we broke i915.enable_ppgtt=0?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
  2015-10-23 18:29       ` Chris Wilson
@ 2015-10-23 18:41         ` Ville Syrjälä
  2015-10-23 19:08           ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Ville Syrjälä @ 2015-10-23 18:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Fri, Oct 23, 2015 at 07:29:08PM +0100, Chris Wilson wrote:
> On Fri, Oct 23, 2015 at 09:22:38PM +0300, Ville Syrjälä wrote:
> > On Fri, Oct 23, 2015 at 06:56:41PM +0100, Chris Wilson wrote:
> > > On Fri, Oct 23, 2015 at 08:50:42PM +0300, Ville Syrjälä wrote:
> > > > On Fri, Oct 23, 2015 at 06:43:31PM +0100, Chris Wilson wrote:
> > > > > As the HWS is mapped into the GPU as uncached,
> > > > 
> > > > Since when?
> > > 
> > > Since it is embedded into execlists' default context which is allocated
> > > using the system default cache level, i.e. uncached on !llc. See
> > > intel_lr_context_deferred_alloc()
> > 
> > Oh right. That doesn't actually matter since it's mapped through ggtt
> > which means it always goes through PAT 0.
> 
> Oh, that again. Doesn't that mean we broke i915.enable_ppgtt=0?

Just means everything is snooped in that case. As long as the hardware
doesn't get too upset about all the snooping it should work. Can't
really recall if I actually tried it though. I think I did.

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
  2015-10-23 18:41         ` Ville Syrjälä
@ 2015-10-23 19:08           ` Chris Wilson
  2015-10-23 19:36             ` Ville Syrjälä
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2015-10-23 19:08 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Fri, Oct 23, 2015 at 09:41:29PM +0300, Ville Syrjälä wrote:
> On Fri, Oct 23, 2015 at 07:29:08PM +0100, Chris Wilson wrote:
> > On Fri, Oct 23, 2015 at 09:22:38PM +0300, Ville Syrjälä wrote:
> > > On Fri, Oct 23, 2015 at 06:56:41PM +0100, Chris Wilson wrote:
> > > > On Fri, Oct 23, 2015 at 08:50:42PM +0300, Ville Syrjälä wrote:
> > > > > On Fri, Oct 23, 2015 at 06:43:31PM +0100, Chris Wilson wrote:
> > > > > > As the HWS is mapped into the GPU as uncached,
> > > > > 
> > > > > Since when?
> > > > 
> > > > Since it is embedded into execlists' default context which is allocated
> > > > using the system default cache level, i.e. uncached on !llc. See
> > > > intel_lr_context_deferred_alloc()
> > > 
> > > Oh right. That doesn't actually matter since it's mapped through ggtt
> > > which means it always goes through PAT 0.
> > 
> > Oh, that again. Doesn't that mean we broke i915.enable_ppgtt=0?
> 
> Just means everything is snooped in that case. As long as the hardware
> doesn't get too upset about all the snooping it should work. Can't
> really recall if I actually tried it though. I think I did.

Just wondering if it means we start getting cacheline dirt on the
scanout. Though since snooping only occurs on flushes, I guess it
actually means it hits the backing storage and then is pushed into the
cpu cache. The other worry is whether we are then generating fsb snoop
traffic on every context switch. Just idle thoughts as I realise I don't
know as much about snooping as I'd like.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms
  2015-10-23 19:08           ` Chris Wilson
@ 2015-10-23 19:36             ` Ville Syrjälä
  0 siblings, 0 replies; 17+ messages in thread
From: Ville Syrjälä @ 2015-10-23 19:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Fri, Oct 23, 2015 at 08:08:34PM +0100, Chris Wilson wrote:
> On Fri, Oct 23, 2015 at 09:41:29PM +0300, Ville Syrjälä wrote:
> > On Fri, Oct 23, 2015 at 07:29:08PM +0100, Chris Wilson wrote:
> > > On Fri, Oct 23, 2015 at 09:22:38PM +0300, Ville Syrjälä wrote:
> > > > On Fri, Oct 23, 2015 at 06:56:41PM +0100, Chris Wilson wrote:
> > > > > On Fri, Oct 23, 2015 at 08:50:42PM +0300, Ville Syrjälä wrote:
> > > > > > On Fri, Oct 23, 2015 at 06:43:31PM +0100, Chris Wilson wrote:
> > > > > > > As the HWS is mapped into the GPU as uncached,
> > > > > > 
> > > > > > Since when?
> > > > > 
> > > > > Since it is embedded into execlists' default context which is allocated
> > > > > using the system default cache level, i.e. uncached on !llc. See
> > > > > intel_lr_context_deferred_alloc()
> > > > 
> > > > Oh right. That doesn't actually matter since it's mapped through ggtt
> > > > which means it always goes through PAT 0.
> > > 
> > > Oh, that again. Doesn't that mean we broke i915.enable_ppgtt=0?
> > 
> > Just means everything is snooped in that case. As long as the hardware
> > doesn't get too upset about all the snooping it should work. Can't
> > really recall if I actually tried it though. I think I did.
> 
> Just wondering if it means we start getting cacheline dirt on the
> scanout. Though since snooping only occurs on flushes, I guess it
> actually means it hits the backing storage and then is pushed into the
> cpu cache. The other worry is whether we are then generating fsb snoop
> traffic on every context switch. Just idle thoughts as I realise I don't
> know as much about snooping as I'd like.

TBH I never gave !ppgtt too much thought.

The snoops for the context switches have crossed my mind. I was
thinking that maybe we could map the status page through ppgtt and
the rest of the context through ggtt, and then we could make the
PAT 0 non-snooped. But looks like the per-process status page still
needs to be mapped through the ggtt.

But maybe we could also map it through the ppgtt and use the ppgtt
mapping for seqno writes? That's assuming we don't need to look at
whatever else gets stored in the status page through the ggtt mapping.

Or I suppose we could take you approach and just make ggtt non-snooped
and take the clflush hit for seqno reads. No idea which is worse. Would
need to gather some numbers I suppose.

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-10-23 17:43 ` [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell Chris Wilson
@ 2015-10-23 19:45   ` Chris Wilson
  2015-10-30 16:14   ` Daniel Vetter
  1 sibling, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2015-10-23 19:45 UTC (permalink / raw)
  To: intel-gfx

On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
> (I have not looked for any pattern such
> as modifying PTE within the same page or cacheline as active PTE -
> though checking whether revoking neighbouring objects should be enough
> to test that theory.)

So, fwiw, I revoked the CPU PTE for the neighbouring objects (each object
is 1MiB in size and so should occupy 2KiB in the GGTT) but the
corruption persisted. If I didn't make a mistake that should squash the
theory that it is access through PTE neighbouring the update that are
trampled upon. I had also earlier ioremaped the GGTT (dev_priv->gtt.gsm)
as uncached, and thrown countless mb() around to rule out incomplete
writes to the GGTT prior to CPU access. I haven't tried idling the GPU,
as other tests within gem_concurrent_blit demonstrate that is not a GPU
flushing issue.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-10-23 17:43 ` [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell Chris Wilson
  2015-10-23 19:45   ` Chris Wilson
@ 2015-10-30 16:14   ` Daniel Vetter
  2015-10-30 16:58     ` Chris Wilson
  1 sibling, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2015-10-30 16:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
> When accessing through the GTT from one CPU whilst concurrently updating
> the GGTT PTEs in another thread, the hardware likes to return random
> data. As we have strong serialisation prevent us from modifying the PTE
> of an active GTT mmapping, we have to conclude that it whilst modifying
> other PTE's that error occurs. (I have not looked for any pattern such
> as modifying PTE within the same page or cacheline as active PTE -
> though checking whether revoking neighbouring objects should be enough
> to test that theory.) The corruption also seems restricted to Braswell
> and disappears with maxcpus=0. This patch stops all access through the
> GTT by other CPUs when we update any PTE by stopping the machine around
> the GGTT update.
> 
> Testcase: igt/gem_concurrent_blit
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Wild guess, since it wouldn't be the first time hw engineers screwed this
up.

Cheers, Daniel

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d1c5cf89fe77..de983c8e6e54 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2337,12 +2337,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 
 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 {
-#ifdef writeq
-	writeq(pte, addr);
-#else
 	iowrite32((u32)pte, addr);
 	iowrite32(pte >> 32, addr + 4);
-#endif
 }
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-10-30 16:14   ` Daniel Vetter
@ 2015-10-30 16:58     ` Chris Wilson
  2015-11-17 16:37       ` Daniel Vetter
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2015-10-30 16:58 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, Oct 30, 2015 at 05:14:21PM +0100, Daniel Vetter wrote:
> On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
> > When accessing through the GTT from one CPU whilst concurrently updating
> > the GGTT PTEs in another thread, the hardware likes to return random
> > data. As we have strong serialisation prevent us from modifying the PTE
> > of an active GTT mmapping, we have to conclude that it whilst modifying
> > other PTE's that error occurs. (I have not looked for any pattern such
> > as modifying PTE within the same page or cacheline as active PTE -
> > though checking whether revoking neighbouring objects should be enough
> > to test that theory.) The corruption also seems restricted to Braswell
> > and disappears with maxcpus=0. This patch stops all access through the
> > GTT by other CPUs when we update any PTE by stopping the machine around
> > the GGTT update.
> > 
> > Testcase: igt/gem_concurrent_blit
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Wild guess, since it wouldn't be the first time hw engineers screwed this
> up.
> 
> Cheers, Daniel
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index d1c5cf89fe77..de983c8e6e54 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2337,12 +2337,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>  
>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>  {
> -#ifdef writeq
> -	writeq(pte, addr);
> -#else
>  	iowrite32((u32)pte, addr);
>  	iowrite32(pte >> 32, addr + 4);
> -#endif

Tried:
 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
  {
  -#ifdef writeq
  -       writeq(pte, addr);
  -#else
  -       iowrite32((u32)pte, addr);
  -       iowrite32(pte >> 32, addr + 4);
  -#endif
  +       iowrite32(0, addr);
  +       wmb();
  +       iowrite32(upper_32_bits(pte), addr + 4);
  +       iowrite32(lower_32_bits(pte), addr);
  +       wmb();
   }
    
and just the plain iowrite(lower), iowrite(upper), neither helps.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-10-30 16:58     ` Chris Wilson
@ 2015-11-17 16:37       ` Daniel Vetter
  2015-11-18 23:08         ` Jesse Barnes
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2015-11-17 16:37 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Fri, Oct 30, 2015 at 04:58:41PM +0000, Chris Wilson wrote:
> On Fri, Oct 30, 2015 at 05:14:21PM +0100, Daniel Vetter wrote:
> > On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
> > > When accessing through the GTT from one CPU whilst concurrently updating
> > > the GGTT PTEs in another thread, the hardware likes to return random
> > > data. As we have strong serialisation prevent us from modifying the PTE
> > > of an active GTT mmapping, we have to conclude that it whilst modifying
> > > other PTE's that error occurs. (I have not looked for any pattern such
> > > as modifying PTE within the same page or cacheline as active PTE -
> > > though checking whether revoking neighbouring objects should be enough
> > > to test that theory.) The corruption also seems restricted to Braswell
> > > and disappears with maxcpus=0. This patch stops all access through the
> > > GTT by other CPUs when we update any PTE by stopping the machine around
> > > the GGTT update.
> > > 
> > > Testcase: igt/gem_concurrent_blit
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > Wild guess, since it wouldn't be the first time hw engineers screwed this
> > up.
> > 
> > Cheers, Daniel
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index d1c5cf89fe77..de983c8e6e54 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -2337,12 +2337,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> >  
> >  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> >  {
> > -#ifdef writeq
> > -	writeq(pte, addr);
> > -#else
> >  	iowrite32((u32)pte, addr);
> >  	iowrite32(pte >> 32, addr + 4);
> > -#endif
> 
> Tried:
>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>   {
>   -#ifdef writeq
>   -       writeq(pte, addr);
>   -#else
>   -       iowrite32((u32)pte, addr);
>   -       iowrite32(pte >> 32, addr + 4);
>   -#endif
>   +       iowrite32(0, addr);
>   +       wmb();
>   +       iowrite32(upper_32_bits(pte), addr + 4);
>   +       iowrite32(lower_32_bits(pte), addr);
>   +       wmb();
>    }
>     
> and just the plain iowrite(lower), iowrite(upper), neither helps.

Added a note about this and applied to dinq. Yay for awesome hw.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-11-17 16:37       ` Daniel Vetter
@ 2015-11-18 23:08         ` Jesse Barnes
  2015-11-19  9:14           ` Daniel Vetter
  0 siblings, 1 reply; 17+ messages in thread
From: Jesse Barnes @ 2015-11-18 23:08 UTC (permalink / raw)
  To: Daniel Vetter, Chris Wilson, intel-gfx

On 11/17/2015 08:37 AM, Daniel Vetter wrote:
> On Fri, Oct 30, 2015 at 04:58:41PM +0000, Chris Wilson wrote:
>> On Fri, Oct 30, 2015 at 05:14:21PM +0100, Daniel Vetter wrote:
>>> On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
>>>> When accessing through the GTT from one CPU whilst concurrently updating
>>>> the GGTT PTEs in another thread, the hardware likes to return random
>>>> data. As we have strong serialisation prevent us from modifying the PTE
>>>> of an active GTT mmapping, we have to conclude that it whilst modifying
>>>> other PTE's that error occurs. (I have not looked for any pattern such
>>>> as modifying PTE within the same page or cacheline as active PTE -
>>>> though checking whether revoking neighbouring objects should be enough
>>>> to test that theory.) The corruption also seems restricted to Braswell
>>>> and disappears with maxcpus=0. This patch stops all access through the
>>>> GTT by other CPUs when we update any PTE by stopping the machine around
>>>> the GGTT update.
>>>>
>>>> Testcase: igt/gem_concurrent_blit
>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>
>>> Wild guess, since it wouldn't be the first time hw engineers screwed this
>>> up.
>>>
>>> Cheers, Daniel
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>>> index d1c5cf89fe77..de983c8e6e54 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>>> @@ -2337,12 +2337,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>>>  
>>>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>>>  {
>>> -#ifdef writeq
>>> -	writeq(pte, addr);
>>> -#else
>>>  	iowrite32((u32)pte, addr);
>>>  	iowrite32(pte >> 32, addr + 4);
>>> -#endif
>>
>> Tried:
>>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>>   {
>>   -#ifdef writeq
>>   -       writeq(pte, addr);
>>   -#else
>>   -       iowrite32((u32)pte, addr);
>>   -       iowrite32(pte >> 32, addr + 4);
>>   -#endif
>>   +       iowrite32(0, addr);
>>   +       wmb();
>>   +       iowrite32(upper_32_bits(pte), addr + 4);
>>   +       iowrite32(lower_32_bits(pte), addr);
>>   +       wmb();
>>    }
>>     
>> and just the plain iowrite(lower), iowrite(upper), neither helps.
> 
> Added a note about this and applied to dinq. Yay for awesome hw.

I thought Ville explained how this wasn't necessary?

Jesse
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-11-18 23:08         ` Jesse Barnes
@ 2015-11-19  9:14           ` Daniel Vetter
  2015-11-19  9:35             ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2015-11-19  9:14 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Nov 18, 2015 at 03:08:47PM -0800, Jesse Barnes wrote:
> On 11/17/2015 08:37 AM, Daniel Vetter wrote:
> > On Fri, Oct 30, 2015 at 04:58:41PM +0000, Chris Wilson wrote:
> >> On Fri, Oct 30, 2015 at 05:14:21PM +0100, Daniel Vetter wrote:
> >>> On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
> >>>> When accessing through the GTT from one CPU whilst concurrently updating
> >>>> the GGTT PTEs in another thread, the hardware likes to return random
> >>>> data. As we have strong serialisation prevent us from modifying the PTE
> >>>> of an active GTT mmapping, we have to conclude that it whilst modifying
> >>>> other PTE's that error occurs. (I have not looked for any pattern such
> >>>> as modifying PTE within the same page or cacheline as active PTE -
> >>>> though checking whether revoking neighbouring objects should be enough
> >>>> to test that theory.) The corruption also seems restricted to Braswell
> >>>> and disappears with maxcpus=0. This patch stops all access through the
> >>>> GTT by other CPUs when we update any PTE by stopping the machine around
> >>>> the GGTT update.
> >>>>
> >>>> Testcase: igt/gem_concurrent_blit
> >>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
> >>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>
> >>> Wild guess, since it wouldn't be the first time hw engineers screwed this
> >>> up.
> >>>
> >>> Cheers, Daniel
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >>> index d1c5cf89fe77..de983c8e6e54 100644
> >>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >>> @@ -2337,12 +2337,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> >>>  
> >>>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> >>>  {
> >>> -#ifdef writeq
> >>> -	writeq(pte, addr);
> >>> -#else
> >>>  	iowrite32((u32)pte, addr);
> >>>  	iowrite32(pte >> 32, addr + 4);
> >>> -#endif
> >>
> >> Tried:
> >>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> >>   {
> >>   -#ifdef writeq
> >>   -       writeq(pte, addr);
> >>   -#else
> >>   -       iowrite32((u32)pte, addr);
> >>   -       iowrite32(pte >> 32, addr + 4);
> >>   -#endif
> >>   +       iowrite32(0, addr);
> >>   +       wmb();
> >>   +       iowrite32(upper_32_bits(pte), addr + 4);
> >>   +       iowrite32(lower_32_bits(pte), addr);
> >>   +       wmb();
> >>    }
> >>     
> >> and just the plain iowrite(lower), iowrite(upper), neither helps.
> > 
> > Added a note about this and applied to dinq. Yay for awesome hw.
> 
> I thought Ville explained how this wasn't necessary?

Ville can't repro, Chris claims it fixes something, I don't have a
system. We probably should dig into this more, but since I didn't see
anything going on I figured I can just pull it in for now.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-11-19  9:14           ` Daniel Vetter
@ 2015-11-19  9:35             ` Chris Wilson
  2015-11-19 16:35               ` Jesse Barnes
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2015-11-19  9:35 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, Nov 19, 2015 at 10:14:08AM +0100, Daniel Vetter wrote:
> On Wed, Nov 18, 2015 at 03:08:47PM -0800, Jesse Barnes wrote:
> > On 11/17/2015 08:37 AM, Daniel Vetter wrote:
> > > On Fri, Oct 30, 2015 at 04:58:41PM +0000, Chris Wilson wrote:
> > >> On Fri, Oct 30, 2015 at 05:14:21PM +0100, Daniel Vetter wrote:
> > >>> On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
> > >>>> When accessing through the GTT from one CPU whilst concurrently updating
> > >>>> the GGTT PTEs in another thread, the hardware likes to return random
> > >>>> data. As we have strong serialisation prevent us from modifying the PTE
> > >>>> of an active GTT mmapping, we have to conclude that it whilst modifying
> > >>>> other PTE's that error occurs. (I have not looked for any pattern such
> > >>>> as modifying PTE within the same page or cacheline as active PTE -
> > >>>> though checking whether revoking neighbouring objects should be enough
> > >>>> to test that theory.) The corruption also seems restricted to Braswell
> > >>>> and disappears with maxcpus=0. This patch stops all access through the
> > >>>> GTT by other CPUs when we update any PTE by stopping the machine around
> > >>>> the GGTT update.
> > >>>>
> > >>>> Testcase: igt/gem_concurrent_blit
> > >>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
> > >>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > >>>
> > >>> Wild guess, since it wouldn't be the first time hw engineers screwed this
> > >>> up.
> > >>>
> > >>> Cheers, Daniel
> > >>>
> > >>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > >>> index d1c5cf89fe77..de983c8e6e54 100644
> > >>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > >>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > >>> @@ -2337,12 +2337,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> > >>>  
> > >>>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> > >>>  {
> > >>> -#ifdef writeq
> > >>> -	writeq(pte, addr);
> > >>> -#else
> > >>>  	iowrite32((u32)pte, addr);
> > >>>  	iowrite32(pte >> 32, addr + 4);
> > >>> -#endif
> > >>
> > >> Tried:
> > >>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> > >>   {
> > >>   -#ifdef writeq
> > >>   -       writeq(pte, addr);
> > >>   -#else
> > >>   -       iowrite32((u32)pte, addr);
> > >>   -       iowrite32(pte >> 32, addr + 4);
> > >>   -#endif
> > >>   +       iowrite32(0, addr);
> > >>   +       wmb();
> > >>   +       iowrite32(upper_32_bits(pte), addr + 4);
> > >>   +       iowrite32(lower_32_bits(pte), addr);
> > >>   +       wmb();
> > >>    }
> > >>     
> > >> and just the plain iowrite(lower), iowrite(upper), neither helps.
> > > 
> > > Added a note about this and applied to dinq. Yay for awesome hw.
> > 
> > I thought Ville explained how this wasn't necessary?
> 
> Ville can't repro, Chris claims it fixes something, I don't have a
> system. We probably should dig into this more, but since I didn't see
> anything going on I figured I can just pull it in for now.

Both myself, old QA (when they finally got around to running some of the
coherency tests), new QA and VPG have reported coherency issues with
access through the GGTT.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell
  2015-11-19  9:35             ` Chris Wilson
@ 2015-11-19 16:35               ` Jesse Barnes
  0 siblings, 0 replies; 17+ messages in thread
From: Jesse Barnes @ 2015-11-19 16:35 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On 11/19/2015 01:35 AM, Chris Wilson wrote:
> On Thu, Nov 19, 2015 at 10:14:08AM +0100, Daniel Vetter wrote:
>> On Wed, Nov 18, 2015 at 03:08:47PM -0800, Jesse Barnes wrote:
>>> On 11/17/2015 08:37 AM, Daniel Vetter wrote:
>>>> On Fri, Oct 30, 2015 at 04:58:41PM +0000, Chris Wilson wrote:
>>>>> On Fri, Oct 30, 2015 at 05:14:21PM +0100, Daniel Vetter wrote:
>>>>>> On Fri, Oct 23, 2015 at 06:43:32PM +0100, Chris Wilson wrote:
>>>>>>> When accessing through the GTT from one CPU whilst concurrently updating
>>>>>>> the GGTT PTEs in another thread, the hardware likes to return random
>>>>>>> data. As we have strong serialisation prevent us from modifying the PTE
>>>>>>> of an active GTT mmapping, we have to conclude that it whilst modifying
>>>>>>> other PTE's that error occurs. (I have not looked for any pattern such
>>>>>>> as modifying PTE within the same page or cacheline as active PTE -
>>>>>>> though checking whether revoking neighbouring objects should be enough
>>>>>>> to test that theory.) The corruption also seems restricted to Braswell
>>>>>>> and disappears with maxcpus=0. This patch stops all access through the
>>>>>>> GTT by other CPUs when we update any PTE by stopping the machine around
>>>>>>> the GGTT update.
>>>>>>>
>>>>>>> Testcase: igt/gem_concurrent_blit
>>>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89079
>>>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>
>>>>>> Wild guess, since it wouldn't be the first time hw engineers screwed this
>>>>>> up.
>>>>>>
>>>>>> Cheers, Daniel
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>>>>>> index d1c5cf89fe77..de983c8e6e54 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>>>>>> @@ -2337,12 +2337,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>>>>>>  
>>>>>>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>>>>>>  {
>>>>>> -#ifdef writeq
>>>>>> -	writeq(pte, addr);
>>>>>> -#else
>>>>>>  	iowrite32((u32)pte, addr);
>>>>>>  	iowrite32(pte >> 32, addr + 4);
>>>>>> -#endif
>>>>>
>>>>> Tried:
>>>>>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>>>>>   {
>>>>>   -#ifdef writeq
>>>>>   -       writeq(pte, addr);
>>>>>   -#else
>>>>>   -       iowrite32((u32)pte, addr);
>>>>>   -       iowrite32(pte >> 32, addr + 4);
>>>>>   -#endif
>>>>>   +       iowrite32(0, addr);
>>>>>   +       wmb();
>>>>>   +       iowrite32(upper_32_bits(pte), addr + 4);
>>>>>   +       iowrite32(lower_32_bits(pte), addr);
>>>>>   +       wmb();
>>>>>    }
>>>>>     
>>>>> and just the plain iowrite(lower), iowrite(upper), neither helps.
>>>>
>>>> Added a note about this and applied to dinq. Yay for awesome hw.
>>>
>>> I thought Ville explained how this wasn't necessary?
>>
>> Ville can't repro, Chris claims it fixes something, I don't have a
>> system. We probably should dig into this more, but since I didn't see
>> anything going on I figured I can just pull it in for now.
> 
> Both myself, old QA (when they finally got around to running some of the
> coherency tests), new QA and VPG have reported coherency issues with
> access through the GGTT.

I can believe it; it would be good to find the root cause the hw issue
though.  Obviously we're not understanding something fully...

Jesse

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2015-11-19 16:35 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-23 17:43 [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms Chris Wilson
2015-10-23 17:43 ` [PATCH 2/2] drm/i915: Serialise updates to GGTT with access through GGTT on Braswell Chris Wilson
2015-10-23 19:45   ` Chris Wilson
2015-10-30 16:14   ` Daniel Vetter
2015-10-30 16:58     ` Chris Wilson
2015-11-17 16:37       ` Daniel Vetter
2015-11-18 23:08         ` Jesse Barnes
2015-11-19  9:14           ` Daniel Vetter
2015-11-19  9:35             ` Chris Wilson
2015-11-19 16:35               ` Jesse Barnes
2015-10-23 17:50 ` [PATCH 1/2] drm/i915/execlists: HWS is uncached on !llc platforms Ville Syrjälä
2015-10-23 17:56   ` Chris Wilson
2015-10-23 18:22     ` Ville Syrjälä
2015-10-23 18:29       ` Chris Wilson
2015-10-23 18:41         ` Ville Syrjälä
2015-10-23 19:08           ` Chris Wilson
2015-10-23 19:36             ` Ville Syrjälä

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.