All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint
@ 2017-02-22  9:41 Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 13+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

The change_domain tracepoint has been inaccurate for a few years - it
doesn't fully capture the domains, especially with userspace bypassing
them. It is defunct, misleading and time to be removed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            | 30 ------------------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 ---
 drivers/gpu/drm/i915/i915_trace.h          | 24 ------------------------
 3 files changed, 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d93032875f28..9c5f091c771f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3214,9 +3214,6 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
 
 	obj->base.write_domain = 0;
-	trace_i915_gem_object_change_domain(obj,
-					    obj->base.read_domains,
-					    I915_GEM_DOMAIN_GTT);
 }
 
 /** Flushes the CPU write domain for the object if it's dirty. */
@@ -3230,9 +3227,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
 
 	obj->base.write_domain = 0;
-	trace_i915_gem_object_change_domain(obj,
-					    obj->base.read_domains,
-					    I915_GEM_DOMAIN_CPU);
 }
 
 /**
@@ -3246,7 +3240,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
-	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3284,9 +3277,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
 		mb();
 
-	old_write_domain = obj->base.write_domain;
-	old_read_domains = obj->base.read_domains;
-
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
@@ -3298,10 +3288,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->mm.dirty = true;
 	}
 
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-
 	i915_gem_object_unpin_pages(obj);
 	return 0;
 }
@@ -3538,7 +3524,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view)
 {
 	struct i915_vma *vma;
-	u32 old_read_domains, old_write_domain;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3603,19 +3588,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 	}
 
-	old_write_domain = obj->base.write_domain;
-	old_read_domains = obj->base.read_domains;
-
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
 	obj->base.write_domain = 0;
 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-
 	return vma;
 
 err_unpin_display:
@@ -3651,7 +3629,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 int
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 {
-	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3670,9 +3647,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	i915_gem_object_flush_gtt_write_domain(obj);
 
-	old_write_domain = obj->base.write_domain;
-	old_read_domains = obj->base.read_domains;
-
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 		i915_gem_clflush_object(obj, false);
@@ -3693,10 +3667,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	}
 
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dfed503301a6..6fb29832bc63 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1317,8 +1317,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		u32 old_read = obj->base.read_domains;
-		u32 old_write = obj->base.write_domain;
 
 		obj->base.write_domain = obj->base.pending_write_domain;
 		if (obj->base.write_domain)
@@ -1329,7 +1327,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 
 		i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
 		eb_export_fence(obj, req, vma->exec_entry->flags);
-		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 0c2ed1289bfc..2dbef3147a60 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -175,30 +175,6 @@ TRACE_EVENT(i915_vma_unbind,
 		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
 );
 
-TRACE_EVENT(i915_gem_object_change_domain,
-	    TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write),
-	    TP_ARGS(obj, old_read, old_write),
-
-	    TP_STRUCT__entry(
-			     __field(struct drm_i915_gem_object *, obj)
-			     __field(u32, read_domains)
-			     __field(u32, write_domain)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->obj = obj;
-			   __entry->read_domains = obj->base.read_domains | (old_read << 16);
-			   __entry->write_domain = obj->base.write_domain | (old_write << 16);
-			   ),
-
-	    TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x",
-		      __entry->obj,
-		      __entry->read_domains >> 16,
-		      __entry->read_domains & 0xffff,
-		      __entry->write_domain >> 16,
-		      __entry->write_domain & 0xffff)
-);
-
 TRACE_EVENT(i915_gem_object_pwrite,
 	    TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len),
 	    TP_ARGS(obj, offset, len),
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22 11:02   ` Joonas Lahtinen
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

For use in the next patch, take the current is-coherent helper and add
it to i915_gem_object.h

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |  6 ++++++
 drivers/gpu/drm/i915/i915_gem.c | 20 ++++++--------------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e7046df1b5e..b0e451efb519 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4111,4 +4111,10 @@ int remap_io_mapping(struct vm_area_struct *vma,
 		     unsigned long addr, unsigned long pfn, unsigned long size,
 		     struct io_mapping *iomap);
 
+static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
+{
+	return (obj->cache_level != I915_CACHE_NONE ||
+		HAS_LLC(to_i915(obj->base.dev)));
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9c5f091c771f..312154b3d04c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -48,18 +48,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 
-static bool cpu_cache_is_coherent(struct drm_device *dev,
-				  enum i915_cache_level level)
-{
-	return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
-}
-
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return false;
 
-	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+	if (!i915_gem_object_is_coherent(obj))
 		return true;
 
 	return obj->pin_display;
@@ -255,7 +249,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
 	if (needs_clflush &&
 	    (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-	    !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+	    !i915_gem_object_is_coherent(obj))
 		drm_clflush_sg(pages);
 
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
@@ -796,8 +790,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	 * anyway again before the next pread happens.
 	 */
 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
-							obj->cache_level);
+		*needs_clflush = !i915_gem_object_is_coherent(obj);
 
 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
@@ -853,8 +846,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	 * before writing.
 	 */
 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
-							 obj->cache_level);
+		*needs_clflush |= !i915_gem_object_is_coherent(obj);
 
 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
@@ -3173,7 +3165,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
 	 */
-	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
+	if (!force && i915_gem_object_is_coherent(obj)) {
 		obj->cache_dirty = true;
 		return;
 	}
@@ -3412,7 +3404,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	}
 
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
-	    cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+	    i915_gem_object_is_coherent(obj))
 		obj->cache_dirty = true;
 
 	list_for_each_entry(vma, &obj->vma_list, obj_link)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22  9:54   ` Chris Wilson
  2017-02-22 10:57   ` Joonas Lahtinen
  2017-02-22  9:41 ` [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 13+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c        | 41 +++++++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
 drivers/gpu/drm/i915/intel_display.c   |  9 ++------
 3 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 312154b3d04c..79ea585e82ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1613,23 +1613,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_sw_finish *args = data;
 	struct drm_i915_gem_object *obj;
-	int err = 0;
 
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;
 
 	/* Pinned buffers may be scanout, so flush the cache */
-	if (READ_ONCE(obj->pin_display)) {
-		err = i915_mutex_lock_interruptible(dev);
-		if (!err) {
-			i915_gem_object_flush_cpu_write_domain(obj);
-			mutex_unlock(&dev->struct_mutex);
-		}
-	}
-
+	i915_gem_object_flush_to_display(obj);
 	i915_gem_object_put(obj);
-	return err;
+
+	return 0;
 }
 
 /**
@@ -3221,6 +3214,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 	obj->base.write_domain = 0;
 }
 
+static void __i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj)
+{
+	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
+		return;
+
+	i915_gem_clflush_object(obj, true);
+	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+
+	obj->base.write_domain = 0;
+}
+
+void i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj)
+{
+	if (!READ_ONCE(obj->pin_display))
+		return;
+
+	mutex_lock(&obj->base.dev->struct_mutex);
+	__i915_gem_object_flush_to_display(obj);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3575,15 +3589,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
-		i915_gem_clflush_object(obj, true);
-		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	}
+	__i915_gem_object_flush_to_display(obj);
+	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
-	obj->base.write_domain = 0;
 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 
 	return vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index ef4893a4f08c..2e4d80fb5204 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -364,5 +364,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 	return engine;
 }
 
+void i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj);
+
 #endif
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f6feb93d4bb1..c43cc80d993b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14279,15 +14279,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 					struct drm_clip_rect *clips,
 					unsigned num_clips)
 {
-	struct drm_device *dev = fb->dev;
-	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
-	struct drm_i915_gem_object *obj = intel_fb->obj;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
-	mutex_lock(&dev->struct_mutex);
-	if (obj->pin_display && obj->cache_dirty)
-		i915_gem_clflush_object(obj, true);
+	i915_gem_object_flush_to_display(obj);
 	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Generalise the skip for physical and stolen objects by skipping anything
we do not have a valid address for inside the sg.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 79ea585e82ac..2cf49b5c7594 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3140,14 +3140,19 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * to GPU, and we can ignore the cache flush because it'll happen
 	 * again at bind time.
 	 */
-	if (!obj->mm.pages)
+	if (!obj->mm.pages) {
+		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
 		return;
+	}
 
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
 	 */
-	if (obj->stolen || obj->phys_handle)
+	if (!i915_gem_object_has_struct_page(obj))
 		return;
 
 	/* If the GPU is snooping the contents of the CPU cache,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-22  9:41 ` [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22 11:01   ` Joonas Lahtinen
  2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Flushing the cachelines for an object is slow, can be as much as 100ms
for a large framebuffer. We currently do this under the struct_mutex BKL
on execution or on pageflip. But now with the ability to add fences to
obj->resv for both flips and execbuf (and we naturally wait on the fence
before CPU access), we can move the clflush operation to a workqueue and
signal a fence for completion, thereby doing the work asynchronously and
not blocking the driver or its clients.

v2: Introduce i915_gem_clflush.h and use a new name, split out some
extras into separate patches.

Suggested-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/i915_drv.h            |   2 +-
 drivers/gpu/drm/i915/i915_gem.c            |  54 +--------
 drivers/gpu/drm/i915/i915_gem_clflush.c    | 189 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_clflush.h    |  37 ++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   9 +-
 drivers/gpu/drm/i915/intel_display.c       |  44 ++++---
 7 files changed, 264 insertions(+), 72 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1490d8622234..b1b580337c7a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 # GEM code
 i915-y += i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
+	  i915_gem_clflush.o \
 	  i915_gem_context.o \
 	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b0e451efb519..efdf16143c46 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3389,7 +3389,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2cf49b5c7594..53c16bb4663e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,6 +29,7 @@
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
@@ -3133,46 +3134,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 	return 0;
 }
 
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-			     bool force)
-{
-	/* If we don't have a page list set up, then we're not pinned
-	 * to GPU, and we can ignore the cache flush because it'll happen
-	 * again at bind time.
-	 */
-	if (!obj->mm.pages) {
-		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
-		return;
-	}
-
-	/*
-	 * Stolen memory is always coherent with the GPU as it is explicitly
-	 * marked as wc by the system, or the system is cache-coherent.
-	 * Similarly, we only access struct pages through the CPU cache, so
-	 * anything not backed by physical memory we consider to be always
-	 * coherent and not need clflushing.
-	 */
-	if (!i915_gem_object_has_struct_page(obj))
-		return;
-
-	/* If the GPU is snooping the contents of the CPU cache,
-	 * we do not need to manually clear the CPU cache lines.  However,
-	 * the caches are only snooped when the render cache is
-	 * flushed/invalidated.  As we always have to emit invalidations
-	 * and flushes when moving into and out of the RENDER domain, correct
-	 * snooping behaviour occurs naturally as the result of our domain
-	 * tracking.
-	 */
-	if (!force && i915_gem_object_is_coherent(obj)) {
-		obj->cache_dirty = true;
-		return;
-	}
-
-	trace_i915_gem_object_clflush(obj);
-	drm_clflush_sg(obj->mm.pages);
-	obj->cache_dirty = false;
-}
-
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -3213,9 +3174,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
-	i915_gem_clflush_object(obj, obj->pin_display);
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-
+	i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 	obj->base.write_domain = 0;
 }
 
@@ -3224,9 +3183,7 @@ static void __i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj)
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
 		return;
 
-	i915_gem_clflush_object(obj, true);
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-
+	i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 	obj->base.write_domain = 0;
 }
 
@@ -3657,8 +3614,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, false);
-
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
 
@@ -4526,6 +4482,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
+	i915_gem_clflush_init(dev_priv);
+
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
new file mode 100644
index 000000000000..c5fee02f3173
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_frontbuffer.h"
+#include "i915_gem_clflush.h"
+
+static DEFINE_SPINLOCK(clflush_lock);
+static u64 clflush_context;
+
+struct clflush {
+	struct dma_fence dma; /* Must be first for dma_fence_free() */
+	struct i915_sw_fence wait;
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+};
+
+static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
+{
+	return "clflush";
+}
+
+static bool i915_clflush_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static void i915_clflush_release(struct dma_fence *fence)
+{
+	struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
+
+	i915_sw_fence_fini(&clflush->wait);
+
+	BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
+	dma_fence_free(&clflush->dma);
+}
+
+static const struct dma_fence_ops i915_clflush_ops = {
+	.get_driver_name = i915_clflush_get_driver_name,
+	.get_timeline_name = i915_clflush_get_timeline_name,
+	.enable_signaling = i915_clflush_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = i915_clflush_release,
+};
+
+static void __i915_do_clflush(struct drm_i915_gem_object *obj)
+{
+	drm_clflush_sg(obj->mm.pages);
+	obj->cache_dirty = false;
+
+	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+}
+
+static void i915_clflush_work(struct work_struct *work)
+{
+	struct clflush *clflush = container_of(work, typeof(*clflush), work);
+	struct drm_i915_gem_object *obj = clflush->obj;
+
+	if (!obj->cache_dirty)
+		goto out;
+
+	if (i915_gem_object_pin_pages(obj)) {
+		DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
+		goto out;
+	}
+
+	__i915_do_clflush(obj);
+
+	i915_gem_object_unpin_pages(obj);
+
+out:
+	i915_gem_object_put(obj);
+
+	dma_fence_signal(&clflush->dma);
+	dma_fence_put(&clflush->dma);
+}
+
+static int __i915_sw_fence_call
+i915_clflush_notify(struct i915_sw_fence *fence,
+		    enum i915_sw_fence_notify state)
+{
+	struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		schedule_work(&clflush->work);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&clflush->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
+{
+	struct clflush *clflush;
+
+	/*
+	 * Stolen memory is always coherent with the GPU as it is explicitly
+	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
+	 */
+	if (!i915_gem_object_has_struct_page(obj))
+		return;
+
+	obj->cache_dirty = true;
+
+	/* If the GPU is snooping the contents of the CPU cache,
+	 * we do not need to manually clear the CPU cache lines.  However,
+	 * the caches are only snooped when the render cache is
+	 * flushed/invalidated.  As we always have to emit invalidations
+	 * and flushes when moving into and out of the RENDER domain, correct
+	 * snooping behaviour occurs naturally as the result of our domain
+	 * tracking.
+	 */
+	if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
+		return;
+
+	trace_i915_gem_object_clflush(obj);
+
+	clflush = NULL;
+	if (!(flags & I915_CLFLUSH_SYNC))
+		clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+	if (clflush) {
+		dma_fence_init(&clflush->dma,
+			       &i915_clflush_ops,
+			       &clflush_lock,
+			       clflush_context,
+			       0);
+		i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
+
+		clflush->obj = i915_gem_object_get(obj);
+		INIT_WORK(&clflush->work, i915_clflush_work);
+
+		dma_fence_get(&clflush->dma);
+
+		i915_sw_fence_await_reservation(&clflush->wait,
+						obj->resv, NULL,
+						false, I915_FENCE_TIMEOUT,
+						GFP_KERNEL);
+
+		reservation_object_lock(obj->resv, NULL);
+		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
+		reservation_object_unlock(obj->resv);
+
+		i915_sw_fence_commit(&clflush->wait);
+	} else if (obj->mm.pages) {
+		__i915_do_clflush(obj);
+	} else {
+		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
+	}
+}
+
+void i915_gem_clflush_init(struct drm_i915_private *i915)
+{
+	clflush_context = dma_fence_context_alloc(1);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
new file mode 100644
index 000000000000..b62d61a2d15f
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+void i915_gem_clflush_init(struct drm_i915_private *i915);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6fb29832bc63..35d2cb979452 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
@@ -1114,13 +1115,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
+		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+			i915_gem_clflush_object(obj, 0);
+			obj->base.write_domain = 0;
+		}
+
 		ret = i915_gem_request_await_object
 			(req, obj, obj->base.pending_write_domain);
 		if (ret)
 			return ret;
-
-		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			i915_gem_clflush_object(obj, false);
 	}
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c43cc80d993b..2e6ec488d5a6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -37,6 +37,7 @@
 #include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -13189,6 +13190,29 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
 	int ret;
 
+	if (obj) {
+		if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+		    INTEL_INFO(dev_priv)->cursor_needs_physical) {
+			const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+
+			ret = i915_gem_object_attach_phys(obj, align);
+			if (ret) {
+				DRM_DEBUG_KMS("failed to attach phys object\n");
+				return ret;
+			}
+		} else {
+			struct i915_vma *vma;
+
+			vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+			if (IS_ERR(vma)) {
+				DRM_DEBUG_KMS("failed to pin object\n");
+				return PTR_ERR(vma);
+			}
+
+			to_intel_plane_state(new_state)->vma = vma;
+		}
+	}
+
 	if (!obj && !old_obj)
 		return 0;
 
@@ -13241,26 +13265,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
 	}
 
-	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
-	    INTEL_INFO(dev_priv)->cursor_needs_physical) {
-		int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
-		ret = i915_gem_object_attach_phys(obj, align);
-		if (ret) {
-			DRM_DEBUG_KMS("failed to attach phys object\n");
-			return ret;
-		}
-	} else {
-		struct i915_vma *vma;
-
-		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-		if (IS_ERR(vma)) {
-			DRM_DEBUG_KMS("failed to pin object\n");
-			return PTR_ERR(vma);
-		}
-
-		to_intel_plane_state(new_state)->vma = vma;
-	}
-
 	return 0;
 }
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (3 preceding siblings ...)
  2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22 11:04   ` Joonas Lahtinen
  2017-02-22 10:22 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint Patchwork
  2017-02-22 10:30 ` [PATCH v3 1/6] " Joonas Lahtinen
  6 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Setting retire=true is identical to using origin=ORIGIN_CS, so make the
same simplification for intel_fb_obj_flush() as already employed for
intel_fb_obj_invalidate().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c          | 12 ++++++------
 drivers/gpu/drm/i915/i915_gem_clflush.c  |  2 +-
 drivers/gpu/drm/i915/intel_display.c     |  2 +-
 drivers/gpu/drm/i915/intel_frontbuffer.c |  3 +--
 drivers/gpu/drm/i915/intel_frontbuffer.h |  8 ++------
 5 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53c16bb4663e..9beac37736b5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -628,7 +628,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(to_i915(obj->base.dev));
 
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 	return 0;
 }
 
@@ -1275,7 +1275,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		user_data += page_length;
 		offset += page_length;
 	}
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 
 	mutex_lock(&i915->drm.struct_mutex);
 out_unpin:
@@ -1411,7 +1411,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 	i915_gem_obj_finish_shmem_access(obj);
 	return ret;
 }
@@ -3162,7 +3162,7 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
 
-	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
+	intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
 
 	obj->base.write_domain = 0;
 }
@@ -3552,7 +3552,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
 	__i915_gem_object_flush_to_display(obj);
-	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
+	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
@@ -3953,7 +3953,7 @@ frontbuffer_retire(struct i915_gem_active *active,
 	struct drm_i915_gem_object *obj =
 		container_of(active, typeof(*obj), frontbuffer_write);
 
-	intel_fb_obj_flush(obj, true, ORIGIN_CS);
+	intel_fb_obj_flush(obj, ORIGIN_CS);
 }
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index c5fee02f3173..d925fb582ba7 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -74,7 +74,7 @@ static void __i915_do_clflush(struct drm_i915_gem_object *obj)
 	drm_clflush_sg(obj->mm.pages);
 	obj->cache_dirty = false;
 
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 }
 
 static void i915_clflush_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2e6ec488d5a6..fb6ddaecd216 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14286,7 +14286,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
 	i915_gem_object_flush_to_display(obj);
-	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
+	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index 966de4c7c7a2..fcfc217e754e 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -114,13 +114,12 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
 }
 
 void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  bool retire,
 			  enum fb_op_origin origin,
 			  unsigned int frontbuffer_bits)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
-	if (retire) {
+	if (origin == ORIGIN_CS) {
 		spin_lock(&dev_priv->fb_tracking.lock);
 		/* Filter out new bits since rendering started. */
 		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h
index 7bab41218cf7..63cd9a753a72 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.h
@@ -38,7 +38,6 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 			       enum fb_op_origin origin,
 			       unsigned int frontbuffer_bits);
 void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  bool retire,
 			  enum fb_op_origin origin,
 			  unsigned int frontbuffer_bits);
 
@@ -69,15 +68,12 @@ static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 /**
  * intel_fb_obj_flush - flush frontbuffer object
  * @obj: GEM object to flush
- * @retire: set when retiring asynchronous rendering
  * @origin: which operation caused the flush
  *
  * This function gets called every time rendering on the given object has
- * completed and frontbuffer caching can be started again. If @retire is true
- * then any delayed flushes will be unblocked.
+ * completed and frontbuffer caching can be started again.
  */
 static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-				      bool retire,
 				      enum fb_op_origin origin)
 {
 	unsigned int frontbuffer_bits;
@@ -86,7 +82,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 	if (!frontbuffer_bits)
 		return;
 
-	__intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits);
+	__intel_fb_obj_flush(obj, origin, frontbuffer_bits);
 }
 
 #endif /* __INTEL_FRONTBUFFER_H__ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
@ 2017-02-22  9:54   ` Chris Wilson
  2017-02-22 10:57   ` Joonas Lahtinen
  1 sibling, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2017-02-22  9:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

On Wed, Feb 22, 2017 at 09:41:18AM +0000, Chris Wilson wrote:

Reason goes here.

We have three different paths by which userspace wants to flush the
display plane (i.e. objects with obj->pin_display). Use a common helper
to identify those paths and to simplify a later change.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (4 preceding siblings ...)
  2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
@ 2017-02-22 10:22 ` Patchwork
  2017-02-22 10:30 ` [PATCH v3 1/6] " Joonas Lahtinen
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2017-02-22 10:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint
URL   : https://patchwork.freedesktop.org/series/20047/
State : success

== Summary ==

Series 20047v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/20047/revisions/1/mbox/

fi-bdw-5557u     total:253  pass:242  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:253  pass:214  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:253  pass:234  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:253  pass:226  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:253  pass:222  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:253  pass:203  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:253  pass:236  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:253  pass:231  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:253  pass:225  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:253  pass:224  dwarn:0   dfail:0   fail:0   skip:29 

e922d8924920be31fc76f5813bc1fde5d6cbf950 drm-tip: 2017y-02m-21d-16h-18m-14s UTC integration manifest
17c552e drm/i915: Remove 'retire' parameter from intel_fb_obj_flush
ca22004 drm/i915: Perform object clflushing asynchronously
c9d53f1 drm/i915: Skip clflushes for all non-page backed objects
13774c5 drm/i915: Amalgamate flushing of display objects
6d01261 drm/i915: Move cpu_cache_is_coherent() to header
2909fce drm/i915: Remove change_domain tracepoint

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3923/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (5 preceding siblings ...)
  2017-02-22 10:22 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint Patchwork
@ 2017-02-22 10:30 ` Joonas Lahtinen
  6 siblings, 0 replies; 13+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 10:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> The change_domain tracepoint has been inaccurate for a few years - it
> doesn't fully capture the domains, especially with userspace bypassing
> them. It is defunct, misleading and time to be removed.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
  2017-02-22  9:54   ` Chris Wilson
@ 2017-02-22 10:57   ` Joonas Lahtinen
  1 sibling, 0 replies; 13+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 10:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

Add the message from your reply.

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> @@ -1613,23 +1613,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>  {
>  	struct drm_i915_gem_sw_finish *args = data;
>  	struct drm_i915_gem_object *obj;
> -	int err = 0;
>  
>  	obj = i915_gem_object_lookup(file, args->handle);
>  	if (!obj)
>  		return -ENOENT;
>  
>  	/* Pinned buffers may be scanout, so flush the cache */
> -	if (READ_ONCE(obj->pin_display)) {
> -		err = i915_mutex_lock_interruptible(dev);
> -		if (!err) {
> -			i915_gem_object_flush_cpu_write_domain(obj);
> -			mutex_unlock(&dev->struct_mutex);
> -		}
> -	}
> -
> +	i915_gem_object_flush_to_display(obj);

.._flush_if_display() and this is

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously
  2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
@ 2017-02-22 11:01   ` Joonas Lahtinen
  0 siblings, 0 replies; 13+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 11:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> Flushing the cachelines for an object is slow, can be as much as 100ms
> for a large framebuffer. We currently do this under the struct_mutex BKL
> on execution or on pageflip. But now with the ability to add fences to
> obj->resv for both flips and execbuf (and we naturally wait on the fence
> before CPU access), we can move the clflush operation to a workqueue and
> signal a fence for completion, thereby doing the work asynchronously and
> not blocking the driver or its clients.
> 
> v2: Introduce i915_gem_clflush.h and use a new name, split out some
> extras into separate patches.
> 
> Suggested-by: Akash Goel <akash.goel@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>

Reviewed-by: Joonas Lahtien <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
@ 2017-02-22 11:02   ` Joonas Lahtinen
  0 siblings, 0 replies; 13+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 11:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> For use in the next patch, take the current is-coherent helper and add
> it to i915_gem_object.h
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush
  2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
@ 2017-02-22 11:04   ` Joonas Lahtinen
  0 siblings, 0 replies; 13+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 11:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> Setting retire=true is identical to using origin=ORIGIN_CS, so make the
> same simplification for intel_fb_obj_flush() as already employed for
> intel_fb_obj_invalidate().
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-02-22 11:04 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
2017-02-22 11:02   ` Joonas Lahtinen
2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
2017-02-22  9:54   ` Chris Wilson
2017-02-22 10:57   ` Joonas Lahtinen
2017-02-22  9:41 ` [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
2017-02-22 11:01   ` Joonas Lahtinen
2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
2017-02-22 11:04   ` Joonas Lahtinen
2017-02-22 10:22 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint Patchwork
2017-02-22 10:30 ` [PATCH v3 1/6] " Joonas Lahtinen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.