[PATCH v2 1/2] drm/i915: Skip clflushes for all non-page backed objects

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 1/2] drm/i915: Skip clflushes for all non-page backed objects
@ 2017-02-20 20:59 Chris Wilson
  2017-02-20 20:59 ` [PATCH v2 2/2] drm/i915: Perform object clflushing asynchronously Chris Wilson
  2017-02-20 21:22 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: Skip clflushes for all non-page backed objects Patchwork
  0 siblings, 2 replies; 4+ messages in thread
From: Chris Wilson @ 2017-02-20 20:59 UTC (permalink / raw)
  To: intel-gfx

Generalise the skip for physical and stolen objects by skipping anything
we do not have a valid address for inside the sg.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d93032875f28..9d382f110b5c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3155,14 +3155,19 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * to GPU, and we can ignore the cache flush because it'll happen
 	 * again at bind time.
 	 */
-	if (!obj->mm.pages)
+	if (!obj->mm.pages) {
+		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
 		return;
+	}
 
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
 	 */
-	if (obj->stolen || obj->phys_handle)
+	if (!i915_gem_object_has_struct_page(obj))
 		return;
 
 	/* If the GPU is snooping the contents of the CPU cache,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 2/2] drm/i915: Perform object clflushing asynchronously
  2017-02-20 20:59 [PATCH v2 1/2] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
@ 2017-02-20 20:59 ` Chris Wilson
  2017-02-21 14:49   ` Joonas Lahtinen
  2017-02-20 21:22 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: Skip clflushes for all non-page backed objects Patchwork
  1 sibling, 1 reply; 4+ messages in thread
From: Chris Wilson @ 2017-02-20 20:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Flushing the cachelines for an object is slow, can be as much as 100ms
for a large framebuffer. We currently do this under the struct_mutex BKL
on execution or on pageflip. But now with the ability to add fences to
obj->resv for both flips and execbuf (and we naturally wait on the fence
before CPU access), we can move the clflush operation to a workqueue and
signal a fence for completion, thereby doing the work asynchronously and
not blocking the driver or its clients.

Suggested-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/i915_drv.h            |   8 +-
 drivers/gpu/drm/i915/i915_gem.c            |  66 ++--------
 drivers/gpu/drm/i915/i915_gem_clflush.c    | 192 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   8 +-
 drivers/gpu/drm/i915/intel_display.c       |  49 ++++----
 6 files changed, 238 insertions(+), 86 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1490d8622234..b1b580337c7a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 # GEM code
 i915-y += i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
+	  i915_gem_clflush.o \
 	  i915_gem_context.o \
 	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e7046df1b5e..de00768667b0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3389,7 +3389,13 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+
+void i915_gem_clflush_init(struct drm_i915_private *i915);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9d382f110b5c..c8525d30b025 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1621,23 +1621,17 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_sw_finish *args = data;
 	struct drm_i915_gem_object *obj;
-	int err = 0;
 
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;
 
 	/* Pinned buffers may be scanout, so flush the cache */
-	if (READ_ONCE(obj->pin_display)) {
-		err = i915_mutex_lock_interruptible(dev);
-		if (!err) {
-			i915_gem_object_flush_cpu_write_domain(obj);
-			mutex_unlock(&dev->struct_mutex);
-		}
-	}
+	if (READ_ONCE(obj->pin_display))
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 
 	i915_gem_object_put(obj);
-	return err;
+	return 0;
 }
 
 /**
@@ -3148,46 +3142,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 	return 0;
 }
 
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-			     bool force)
-{
-	/* If we don't have a page list set up, then we're not pinned
-	 * to GPU, and we can ignore the cache flush because it'll happen
-	 * again at bind time.
-	 */
-	if (!obj->mm.pages) {
-		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
-		return;
-	}
-
-	/*
-	 * Stolen memory is always coherent with the GPU as it is explicitly
-	 * marked as wc by the system, or the system is cache-coherent.
-	 * Similarly, we only access struct pages through the CPU cache, so
-	 * anything not backed by physical memory we consider to be always
-	 * coherent and not need clflushing.
-	 */
-	if (!i915_gem_object_has_struct_page(obj))
-		return;
-
-	/* If the GPU is snooping the contents of the CPU cache,
-	 * we do not need to manually clear the CPU cache lines.  However,
-	 * the caches are only snooped when the render cache is
-	 * flushed/invalidated.  As we always have to emit invalidations
-	 * and flushes when moving into and out of the RENDER domain, correct
-	 * snooping behaviour occurs naturally as the result of our domain
-	 * tracking.
-	 */
-	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
-		obj->cache_dirty = true;
-		return;
-	}
-
-	trace_i915_gem_object_clflush(obj);
-	drm_clflush_sg(obj->mm.pages);
-	obj->cache_dirty = false;
-}
-
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -3231,8 +3185,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
-	i915_gem_clflush_object(obj, obj->pin_display);
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 
 	obj->base.write_domain = 0;
 	trace_i915_gem_object_change_domain(obj,
@@ -3603,10 +3556,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
-		i915_gem_clflush_object(obj, true);
-		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	}
+	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 
 	old_write_domain = obj->base.write_domain;
 	old_read_domains = obj->base.read_domains;
@@ -3680,8 +3631,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, false);
-
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
 
@@ -4553,6 +4503,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
+	i915_gem_clflush_init(dev_priv);
+
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
new file mode 100644
index 000000000000..83293705cf2b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_frontbuffer.h"
+
+static DEFINE_SPINLOCK(clflush_lock);
+static u64 clflush_context;
+
+struct clflushf {
+	struct dma_fence dma;
+	struct i915_sw_fence wait;
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+};
+
+static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
+{
+	return "clflush";
+}
+
+static bool i915_clflush_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static void i915_clflush_release(struct dma_fence *fence)
+{
+	struct clflushf *f = container_of(fence, typeof(*f), dma);
+
+	i915_sw_fence_fini(&f->wait);
+
+	BUILD_BUG_ON(offsetof(typeof(*f), dma));
+	dma_fence_free(&f->dma);
+}
+
+static const struct dma_fence_ops i915_clflush_ops = {
+	.get_driver_name = i915_clflush_get_driver_name,
+	.get_timeline_name = i915_clflush_get_timeline_name,
+	.enable_signaling = i915_clflush_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = i915_clflush_release,
+};
+
+static bool cpu_cache_is_coherent(struct drm_device *dev,
+				  enum i915_cache_level level)
+{
+	return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
+}
+
+static void __i915_do_clflush(struct drm_i915_gem_object *obj)
+{
+	drm_clflush_sg(obj->mm.pages);
+	obj->cache_dirty = false;
+
+	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+}
+
+static void i915_clflush_work(struct work_struct *work)
+{
+	struct clflushf *f = container_of(work, typeof(*f), work);
+	struct drm_i915_gem_object *obj = f->obj;
+
+	if (i915_gem_object_pin_pages(obj)) {
+		obj->cache_dirty = true;
+		goto out;
+	}
+
+	__i915_do_clflush(obj);
+
+	i915_gem_object_unpin_pages(obj);
+
+out:
+	i915_gem_object_put(obj);
+
+	dma_fence_signal(&f->dma);
+	dma_fence_put(&f->dma);
+}
+
+static int __i915_sw_fence_call
+i915_clflush_notify(struct i915_sw_fence *fence,
+		    enum i915_sw_fence_notify state)
+{
+	struct clflushf *f = container_of(fence, typeof(*f), wait);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		schedule_work(&f->work);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&f->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
+{
+	struct clflushf *f;
+
+	/*
+	 * Stolen memory is always coherent with the GPU as it is explicitly
+	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
+	 */
+	if (!i915_gem_object_has_struct_page(obj))
+		return;
+
+	/* If the GPU is snooping the contents of the CPU cache,
+	 * we do not need to manually clear the CPU cache lines.  However,
+	 * the caches are only snooped when the render cache is
+	 * flushed/invalidated.  As we always have to emit invalidations
+	 * and flushes when moving into and out of the RENDER domain, correct
+	 * snooping behaviour occurs naturally as the result of our domain
+	 * tracking.
+	 */
+	if (!(flags & I915_CLFLUSH_FORCE) &&
+	    cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
+		obj->cache_dirty = true;
+		return;
+	}
+
+	trace_i915_gem_object_clflush(obj);
+
+	f = NULL;
+	if (!(flags & I915_CLFLUSH_SYNC))
+		f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (f) {
+		dma_fence_init(&f->dma,
+			       &i915_clflush_ops,
+			       &clflush_lock,
+			       clflush_context,
+			       0);
+		i915_sw_fence_init(&f->wait, i915_clflush_notify);
+
+		f->obj = i915_gem_object_get(obj);
+		INIT_WORK(&f->work, i915_clflush_work);
+
+		dma_fence_get(&f->dma); /* 1 for fence, +1 for scheduled work */
+
+		i915_sw_fence_await_reservation(&f->wait,
+						obj->resv, NULL,
+						false, I915_FENCE_TIMEOUT,
+						GFP_KERNEL);
+
+		reservation_object_lock(obj->resv, NULL);
+		reservation_object_add_excl_fence(obj->resv, &f->dma);
+		reservation_object_unlock(obj->resv);
+
+		i915_sw_fence_commit(&f->wait);
+	} else if (obj->mm.pages) {
+		__i915_do_clflush(obj);
+	} else {
+		obj->cache_dirty = true;
+	}
+}
+
+void i915_gem_clflush_init(struct drm_i915_private *i915)
+{
+	clflush_context = dma_fence_context_alloc(1);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index da0846fe2ad6..341bbeb010da 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,13 +1114,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
+		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+			i915_gem_clflush_object(obj, 0);
+			obj->base.write_domain = 0;
+		}
+
 		ret = i915_gem_request_await_object
 			(req, obj, obj->base.pending_write_domain);
 		if (ret)
 			return ret;
-
-		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			i915_gem_clflush_object(obj, false);
 	}
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f6feb93d4bb1..f10feee58ffd 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -13189,6 +13189,29 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
 	int ret;
 
+	if (obj) {
+		if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+		    INTEL_INFO(dev_priv)->cursor_needs_physical) {
+			const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+
+			ret = i915_gem_object_attach_phys(obj, align);
+			if (ret) {
+				DRM_DEBUG_KMS("failed to attach phys object\n");
+				return ret;
+			}
+		} else {
+			struct i915_vma *vma;
+
+			vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+			if (IS_ERR(vma)) {
+				DRM_DEBUG_KMS("failed to pin object\n");
+				return PTR_ERR(vma);
+			}
+
+			to_intel_plane_state(new_state)->vma = vma;
+		}
+	}
+
 	if (!obj && !old_obj)
 		return 0;
 
@@ -13241,26 +13264,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
 	}
 
-	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
-	    INTEL_INFO(dev_priv)->cursor_needs_physical) {
-		int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
-		ret = i915_gem_object_attach_phys(obj, align);
-		if (ret) {
-			DRM_DEBUG_KMS("failed to attach phys object\n");
-			return ret;
-		}
-	} else {
-		struct i915_vma *vma;
-
-		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-		if (IS_ERR(vma)) {
-			DRM_DEBUG_KMS("failed to pin object\n");
-			return PTR_ERR(vma);
-		}
-
-		to_intel_plane_state(new_state)->vma = vma;
-	}
-
 	return 0;
 }
 
@@ -14279,15 +14282,11 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 					struct drm_clip_rect *clips,
 					unsigned num_clips)
 {
-	struct drm_device *dev = fb->dev;
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 	struct drm_i915_gem_object *obj = intel_fb->obj;
 
-	mutex_lock(&dev->struct_mutex);
 	if (obj->pin_display && obj->cache_dirty)
-		i915_gem_clflush_object(obj, true);
-	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	mutex_unlock(&dev->struct_mutex);
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 
 	return 0;
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: Skip clflushes for all non-page backed objects
  2017-02-20 20:59 [PATCH v2 1/2] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
  2017-02-20 20:59 ` [PATCH v2 2/2] drm/i915: Perform object clflushing asynchronously Chris Wilson
@ 2017-02-20 21:22 ` Patchwork
  1 sibling, 0 replies; 4+ messages in thread
From: Patchwork @ 2017-02-20 21:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/2] drm/i915: Skip clflushes for all non-page backed objects
URL   : https://patchwork.freedesktop.org/series/19976/
State : failure

== Summary ==

Series 19976v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/19976/revisions/1/mbox/

Test kms_frontbuffer_tracking:
        Subgroup basic:
                pass       -> FAIL       (fi-bxt-j4205)

fi-bdw-5557u     total:253  pass:242  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:253  pass:214  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:253  pass:233  dwarn:0   dfail:0   fail:1   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:253  pass:226  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:253  pass:222  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:253  pass:203  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:253  pass:236  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:253  pass:231  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:253  pass:225  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:253  pass:224  dwarn:0   dfail:0   fail:0   skip:29 

0a03ea9496b49746b4964d05cc1f483385d1df8a drm-tip: 2017y-02m-20d-17h-19m-56s UTC integration manifest
d30258f drm/i915: Perform object clflushing asynchronously
dfb7ac8 drm/i915: Skip clflushes for all non-page backed objects

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3905/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 2/2] drm/i915: Perform object clflushing asynchronously
  2017-02-20 20:59 ` [PATCH v2 2/2] drm/i915: Perform object clflushing asynchronously Chris Wilson
@ 2017-02-21 14:49   ` Joonas Lahtinen
  0 siblings, 0 replies; 4+ messages in thread
From: Joonas Lahtinen @ 2017-02-21 14:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Matthew Auld

On ma, 2017-02-20 at 20:59 +0000, Chris Wilson wrote:
> Flushing the cachelines for an object is slow, can be as much as 100ms
> for a large framebuffer. We currently do this under the struct_mutex BKL
> on execution or on pageflip. But now with the ability to add fences to
> obj->resv for both flips and execbuf (and we naturally wait on the fence
> before CPU access), we can move the clflush operation to a workqueue and
> signal a fence for completion, thereby doing the work asynchronously and
> not blocking the driver or its clients.
> 
> Suggested-by: Akash Goel <akash.goel@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>

<SNIP>

> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3389,7 +3389,13 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
>  void i915_gem_reset(struct drm_i915_private *dev_priv);
>  void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
>  void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
> -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);

i915_gem_clflush.h

> +
> +void i915_gem_clflush_init(struct drm_i915_private *i915);
> +void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
> > +			     unsigned int flags);
> +#define I915_CLFLUSH_FORCE BIT(0)
> +#define I915_CLFLUSH_SYNC BIT(1)
> +
>  void i915_gem_init_mmio(struct drm_i915_private *i915);
>  int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
>  int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);

<SNIP>

> +static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
> +{
> +	return "i915";

Why not DRIVER_NAME?

> +static void i915_clflush_release(struct dma_fence *fence)
> +{
> +	struct clflushf *f = container_of(fence, typeof(*f), dma);

Better variable and struct name needed.

> +
> +	i915_sw_fence_fini(&f->wait);
> +
> +	BUILD_BUG_ON(offsetof(typeof(*f), dma));

Maybe make a comment at the struct definition, too?

> +static bool cpu_cache_is_coherent(struct drm_device *dev,
> +				  enum i915_cache_level level)
> +{
> +	return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
> +}

We're not using this elsewhere?

> +
> +static void __i915_do_clflush(struct drm_i915_gem_object *obj)
> +{
> +	drm_clflush_sg(obj->mm.pages);
> +	obj->cache_dirty = false;
> +
> +	intel_fb_obj_flush(obj, false, ORIGIN_CPU);

This being hardcoded, use of ORIGIN_DIRTYFB disappears. Nobody is going
to miss it?

> +}
> +
> +static void i915_clflush_work(struct work_struct *work)
> +{
> +	struct clflushf *f = container_of(work, typeof(*f), work);
> +	struct drm_i915_gem_object *obj = f->obj;
> +
> +	if (i915_gem_object_pin_pages(obj)) {
> +		obj->cache_dirty = true;
> +		goto out;
> +	}

For the time being, I'd just WARN here.

> +
> +	__i915_do_clflush(obj);
> +
> +	i915_gem_object_unpin_pages(obj);
> +
> +out:
> +	i915_gem_object_put(obj);
> +
> +	dma_fence_signal(&f->dma);
> +	dma_fence_put(&f->dma);
> +}
> +

<SNIP>

> @@ -14279,15 +14282,11 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
>  					struct drm_clip_rect *clips,
>  					unsigned num_clips)
>  {
> -	struct drm_device *dev = fb->dev;
>  	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
>  	struct drm_i915_gem_object *obj = intel_fb->obj;
>  
> -	mutex_lock(&dev->struct_mutex);
>  	if (obj->pin_display && obj->cache_dirty)
> -		i915_gem_clflush_object(obj, true);
> -	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> -	mutex_unlock(&dev->struct_mutex);
> +		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);

You removed mutex_lock, add READ_ONCE for code coherency.

With above addressed;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas

>  
>  	return 0;
>  }
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-02-21 14:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-20 20:59 [PATCH v2 1/2] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
2017-02-20 20:59 ` [PATCH v2 2/2] drm/i915: Perform object clflushing asynchronously Chris Wilson
2017-02-21 14:49   ` Joonas Lahtinen
2017-02-20 21:22 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: Skip clflushes for all non-page backed objects Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.