All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Treat WC a separate cache domain
@ 2017-04-05 21:07 Chris Wilson
  2017-04-05 21:25 ` ✓ Fi.CI.BAT: success for " Patchwork
  2017-04-07  7:16 ` [PATCH] " Joonas Lahtinen
  0 siblings, 2 replies; 4+ messages in thread
From: Chris Wilson @ 2017-04-05 21:07 UTC (permalink / raw)
  To: intel-gfx

When discussing a new WC mmap, we based the interface upon the
assumption that GTT was fully coherent. How naive! Commits 3b5724d702ef
("drm/i915: Wait for writes through the GTT to land before reading
back") and ed4596ea992d ("drm/i915/guc: WA to address the Ringbuffer
coherency issue") demonstrate that writes through the GTT are indeed
delayed and may be overtaken by direct WC access. To be safe, if
userspace is mixing WC mmaps with other potential GTT access (pwrite,
GTT mmaps) it should use set_domain(WC).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96563
Testcase: igt/gem_pwrite/small-gtt*
Testcase: igt/drv_selftest/coherency
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h                    |  5 +-
 drivers/gpu/drm/i915/i915_gem.c                    | 77 ++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_guc_log.c               |  6 +-
 .../gpu/drm/i915/selftests/i915_gem_coherency.c    | 10 +--
 drivers/gpu/drm/i915/selftests/i915_gem_request.c  |  2 +-
 include/uapi/drm/i915_drm.h                        |  2 +
 6 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9b0949f6c1a..37d2dc7308e7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3447,8 +3447,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
 
 int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
-				  bool write);
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
 int __must_check
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
 struct i915_vma * __must_check
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4ca88f2539c0..497c669d0096 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1591,10 +1591,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out_unpin;
 
-	if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
@@ -1737,6 +1739,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
  *     into userspace. (This view is aligned and sized appropriately for
  *     fenced access.)
  *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
  * Restrictions:
  *
  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1764,7 +1769,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
  */
 int i915_gem_mmap_gtt_version(void)
 {
-	return 1;
+	return 2;
 }
 
 static inline struct i915_ggtt_view
@@ -3390,6 +3395,70 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 }
 
 /**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
+	if (ret)
+		return ret;
+
+	if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_cpu_write_domain(obj);
+	i915_gem_object_flush_gtt_write_domain(obj);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->base.read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->base.read_domains = I915_GEM_DOMAIN_WC;
+		obj->base.write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
  * @write: ask for write access or read only
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 6fb63a3c65b0..16d3b8719cab 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -359,12 +359,16 @@ static int guc_log_runtime_create(struct intel_guc *guc)
 	void *vaddr;
 	struct rchan *guc_log_relay_chan;
 	size_t n_subbufs, subbuf_size;
-	int ret = 0;
+	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	GEM_BUG_ON(guc_log_has_runtime(guc));
 
+	ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
+	if (ret)
+		return ret;
+
 	/* Create a WC (Uncached for read) vmalloc mapping of log
 	 * buffer pages, so that we can directly get the data
 	 * (up-to-date) from memory.
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index f08d0179b3df..95d4aebc0181 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -138,10 +138,7 @@ static int wc_set(struct drm_i915_gem_object *obj,
 	typeof(v) *map;
 	int err;
 
-	/* XXX GTT write followed by WC write go missing */
-	i915_gem_object_flush_gtt_write_domain(obj);
-
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	err = i915_gem_object_set_to_wc_domain(obj, true);
 	if (err)
 		return err;
 
@@ -162,10 +159,7 @@ static int wc_get(struct drm_i915_gem_object *obj,
 	typeof(v) map;
 	int err;
 
-	/* XXX WC write followed by GTT write go missing */
-	i915_gem_object_flush_gtt_write_domain(obj);
-
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	err = i915_gem_object_set_to_wc_domain(obj, false);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 98b7aac41eec..6664cb2eb0b8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -580,7 +580,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 	if (err)
 		goto err;
 
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	err = i915_gem_object_set_to_wc_domain(obj, true);
 	if (err)
 		goto err;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3554495bef13..9ee06ec8a2d6 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -666,6 +666,8 @@ struct drm_i915_gem_relocation_entry {
 #define I915_GEM_DOMAIN_VERTEX		0x00000020
 /** GTT domain - aperture and scanout */
 #define I915_GEM_DOMAIN_GTT		0x00000040
+/** WC domain - uncached access */
+#define I915_GEM_DOMAIN_WC		0x00000080
 /** @} */
 
 struct drm_i915_gem_exec_object {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Treat WC a separate cache domain
  2017-04-05 21:07 [PATCH] drm/i915: Treat WC a separate cache domain Chris Wilson
@ 2017-04-05 21:25 ` Patchwork
  2017-04-07  7:16 ` [PATCH] " Joonas Lahtinen
  1 sibling, 0 replies; 4+ messages in thread
From: Patchwork @ 2017-04-05 21:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Treat WC a separate cache domain
URL   : https://patchwork.freedesktop.org/series/22549/
State : success

== Summary ==

Series 22549v1 drm/i915: Treat WC a separate cache domain
https://patchwork.freedesktop.org/api/1.0/series/22549/revisions/1/mbox/

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time: 432s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time: 426s
fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time: 581s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time: 508s
fi-bxt-t5700     total:278  pass:258  dwarn:0   dfail:0   fail:0   skip:20  time: 534s
fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time: 491s
fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time: 485s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 413s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 415s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time: 425s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 490s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 474s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 459s
fi-kbl-7560u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 565s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 446s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time: 566s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time: 457s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 497s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time: 435s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time: 529s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time: 404s

39a0f48c8bc7c528cc705016dafa08a9dedfd36b drm-tip: 2017y-04m-05d-13h-22m-47s UTC integration manifest
72b1aff drm/i915: Treat WC a separate cache domain

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4415/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/i915: Treat WC a separate cache domain
  2017-04-05 21:07 [PATCH] drm/i915: Treat WC a separate cache domain Chris Wilson
  2017-04-05 21:25 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-04-07  7:16 ` Joonas Lahtinen
  2017-04-07  7:44   ` Chris Wilson
  1 sibling, 1 reply; 4+ messages in thread
From: Joonas Lahtinen @ 2017-04-07  7:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2017-04-05 at 22:07 +0100, Chris Wilson wrote:
> When discussing a new WC mmap, we based the interface upon the
> assumption that GTT was fully coherent. How naive! Commits 3b5724d702ef
> ("drm/i915: Wait for writes through the GTT to land before reading
> back") and ed4596ea992d ("drm/i915/guc: WA to address the Ringbuffer
> coherency issue") demonstrate that writes through the GTT are indeed
> delayed and may be overtaken by direct WC access. To be safe, if
> userspace is mixing WC mmaps with other potential GTT access (pwrite,
> GTT mmaps) it should use set_domain(WC).
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96563
> Testcase: igt/gem_pwrite/small-gtt*
> Testcase: igt/drv_selftest/coherency
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> +int
> +i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
> +{
> +	int ret;
> +
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> > +	ret = i915_gem_object_wait(obj,
> > +				   I915_WAIT_INTERRUPTIBLE |
> > +				   I915_WAIT_LOCKED |
> +				   (write ? I915_WAIT_ALL : 0),

Could construct into flags variable.

> +	/* Flush and acquire obj->pages so that we are coherent through
> +	 * direct access in memory with previous cached writes through
> +	 * shmemfs and that our cache domain tracking remains valid.
> +	 * For example, if the obj->filp was moved to swap without us
> +	 * being notified and releasing the pages, we would mistakenly
> +	 * continue to assume that the obj remained out of the CPU cached
> +	 * domain.
> +	 */
> +	ret = i915_gem_object_pin_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_flush_cpu_write_domain(obj);
> +	i915_gem_object_flush_gtt_write_domain(obj);

I was thinking if WC should be a "substate" of CPU domain, because we
don't have i915_gem_object_flush_wc_write_domain.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/i915: Treat WC a separate cache domain
  2017-04-07  7:16 ` [PATCH] " Joonas Lahtinen
@ 2017-04-07  7:44   ` Chris Wilson
  0 siblings, 0 replies; 4+ messages in thread
From: Chris Wilson @ 2017-04-07  7:44 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Apr 07, 2017 at 10:16:52AM +0300, Joonas Lahtinen wrote:
> On ke, 2017-04-05 at 22:07 +0100, Chris Wilson wrote:
> > When discussing a new WC mmap, we based the interface upon the
> > assumption that GTT was fully coherent. How naive! Commits 3b5724d702ef
> > ("drm/i915: Wait for writes through the GTT to land before reading
> > back") and ed4596ea992d ("drm/i915/guc: WA to address the Ringbuffer
> > coherency issue") demonstrate that writes through the GTT are indeed
> > delayed and may be overtaken by direct WC access. To be safe, if
> > userspace is mixing WC mmaps with other potential GTT access (pwrite,
> > GTT mmaps) it should use set_domain(WC).
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96563
> > Testcase: igt/gem_pwrite/small-gtt*
> > Testcase: igt/drv_selftest/coherency
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> <SNIP>
> 
> > +int
> > +i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
> > +{
> > +	int ret;
> > +
> > +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> > +
> > > +	ret = i915_gem_object_wait(obj,
> > > +				   I915_WAIT_INTERRUPTIBLE |
> > > +				   I915_WAIT_LOCKED |
> > +				   (write ? I915_WAIT_ALL : 0),
> 
> Could construct into flags variable.
> 
> > +	/* Flush and acquire obj->pages so that we are coherent through
> > +	 * direct access in memory with previous cached writes through
> > +	 * shmemfs and that our cache domain tracking remains valid.
> > +	 * For example, if the obj->filp was moved to swap without us
> > +	 * being notified and releasing the pages, we would mistakenly
> > +	 * continue to assume that the obj remained out of the CPU cached
> > +	 * domain.
> > +	 */
> > +	ret = i915_gem_object_pin_pages(obj);
> > +	if (ret)
> > +		return ret;
> > +
> > +	i915_gem_object_flush_cpu_write_domain(obj);
> > +	i915_gem_object_flush_gtt_write_domain(obj);
> 
> I was thinking if WC should be a "substate" of CPU domain, because we
> don't have i915_gem_object_flush_wc_write_domain.

There isn't any to do in i915_gem_object_flush_wc_write_domain()...
Other than a wmb. I guess for completeness that should be added.
Probably via a flush_other_write_domain().
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-04-07  7:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-05 21:07 [PATCH] drm/i915: Treat WC a separate cache domain Chris Wilson
2017-04-05 21:25 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-04-07  7:16 ` [PATCH] " Joonas Lahtinen
2017-04-07  7:44   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.