Remove defunct flushing list (v2)

All of lore.kernel.org
 help / color / mirror / Atom feed

* Remove defunct flushing list (v2)
@ 2012-07-13 13:14 Chris Wilson
  2012-07-13 13:14 ` [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation Chris Wilson
                   ` (12 more replies)
  0 siblings, 13 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

This is turning into a dinq BUG_ON special. Have fun!
-chris

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 15:28   ` Ben Widawsky
                     ` (2 more replies)
  2012-07-13 13:14 ` [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj Chris Wilson
                   ` (11 subsequent siblings)
  12 siblings, 3 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_context.c |   12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9ae3f2c..90857f8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -225,6 +225,13 @@ static int create_default_context(struct drm_i915_private *dev_priv)
 		return ret;
 	}
 
+	ret = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+	if (ret) {
+		i915_gem_object_unpin(ctx->obj);
+		do_destroy(ctx);
+		return ret;
+	}
+
 	ret = do_switch(NULL, ctx, 0);
 	if (ret) {
 		i915_gem_object_unpin(ctx->obj);
@@ -396,8 +403,6 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from_obj != NULL) {
-		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_gem_object_move_to_active(from_obj, ring, seqno);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -405,6 +410,9 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
+		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		i915_gem_object_move_to_active(from_obj, ring, seqno);
 		from_obj->dirty = 1;
 		BUG_ON(from_obj->ring != to->ring);
 		i915_gem_object_unpin(from_obj);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
  2012-07-13 13:14 ` [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 15:25   ` Ben Widawsky
  2012-07-13 15:37   ` Daniel Vetter
  2012-07-13 13:14 ` [PATCH 03/13] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
                   ` (10 subsequent siblings)
  12 siblings, 2 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

Otherwise we end up trying to unpin a freed object and BUG.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_context.c |   82 +++++++++++--------------------
 1 file changed, 30 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 90857f8..b0a855f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -97,8 +97,7 @@
 
 static struct i915_hw_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
-static int do_switch(struct drm_i915_gem_object *from_obj,
-		     struct i915_hw_context *to, u32 seqno);
+static int do_switch(struct i915_hw_context *to);
 
 static int get_context_size(struct drm_device *dev)
 {
@@ -220,26 +219,24 @@ static int create_default_context(struct drm_i915_private *dev_priv)
 	 */
 	dev_priv->ring[RCS].default_context = ctx;
 	ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false);
-	if (ret) {
-		do_destroy(ctx);
-		return ret;
-	}
+	if (ret)
+		goto err_destroy;
 
 	ret = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
-	if (ret) {
-		i915_gem_object_unpin(ctx->obj);
-		do_destroy(ctx);
-		return ret;
-	}
+	if (ret)
+		goto err_unpin;
 
-	ret = do_switch(NULL, ctx, 0);
-	if (ret) {
-		i915_gem_object_unpin(ctx->obj);
-		do_destroy(ctx);
-	} else {
-		DRM_DEBUG_DRIVER("Default HW context loaded\n");
-	}
+	ret = do_switch(ctx);
+	if (ret)
+		goto err_unpin;
 
+	DRM_DEBUG_DRIVER("Default HW context loaded\n");
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin(ctx->obj);
+err_destroy:
+	do_destroy(ctx);
 	return ret;
 }
 
@@ -366,16 +363,14 @@ mi_set_context(struct intel_ring_buffer *ring,
 	return ret;
 }
 
-static int do_switch(struct drm_i915_gem_object *from_obj,
-		     struct i915_hw_context *to,
-		     u32 seqno)
+static int do_switch(struct i915_hw_context *to)
 {
-	struct intel_ring_buffer *ring = NULL;
+	struct drm_i915_gem_object *from_obj = to->ring->last_context_obj;
 	u32 hw_flags = 0;
 	int ret;
 
-	BUG_ON(to == NULL);
-	BUG_ON(from_obj != NULL && from_obj->pin_count == 0);
+	if (from_obj == to->obj)
+		return 0;
 
 	ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false);
 	if (ret)
@@ -389,8 +384,7 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
 	else if (WARN_ON_ONCE(from_obj == to->obj)) /* not yet expected */
 		hw_flags |= MI_FORCE_RESTORE;
 
-	ring = to->ring;
-	ret = mi_set_context(ring, to, hw_flags);
+	ret = mi_set_context(to->ring, to, hw_flags);
 	if (ret) {
 		i915_gem_object_unpin(to->obj);
 		return ret;
@@ -403,6 +397,7 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from_obj != NULL) {
+		u32 seqno = i915_gem_next_request_seqno(to->ring);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -412,13 +407,15 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
 		 */
 		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_gem_object_move_to_active(from_obj, ring, seqno);
+		i915_gem_object_move_to_active(from_obj, to->ring, seqno);
 		from_obj->dirty = 1;
 		BUG_ON(from_obj->ring != to->ring);
 		i915_gem_object_unpin(from_obj);
+		drm_gem_object_unreference(&from_obj->base);
 	}
 
-	ring->last_context_obj = to->obj;
+	drm_gem_object_reference(&to->obj->base);
+	to->ring->last_context_obj = to->obj;
 	to->is_initialized = true;
 
 	return 0;
@@ -442,10 +439,7 @@ int i915_switch_context(struct intel_ring_buffer *ring,
 			int to_id)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct drm_i915_file_private *file_priv = NULL;
 	struct i915_hw_context *to;
-	struct drm_i915_gem_object *from_obj = ring->last_context_obj;
-	int ret;
 
 	if (dev_priv->hw_contexts_disabled)
 		return 0;
@@ -453,34 +447,18 @@ int i915_switch_context(struct intel_ring_buffer *ring,
 	if (ring != &dev_priv->ring[RCS])
 		return 0;
 
-	if (file)
-		file_priv = file->driver_priv;
-
 	if (to_id == DEFAULT_CONTEXT_ID) {
 		to = ring->default_context;
 	} else {
-		to = i915_gem_context_get(file_priv, to_id);
+		if (file == NULL)
+			return -EINVAL;
+
+		to = i915_gem_context_get(file->driver_priv, to_id);
 		if (to == NULL)
 			return -ENOENT;
 	}
 
-	if (from_obj == to->obj)
-		return 0;
-
-	ret = do_switch(from_obj, to, i915_gem_next_request_seqno(to->ring));
-	if (ret)
-		return ret;
-
-	/* Just to make the code a little cleaner we take the object reference
-	 * after the switch was successful. It would be more intuitive to ref
-	 * the 'to' object before the switch but we know the refcount must be >0
-	 * if context_get() succeeded, and we hold struct mutex. So it's safe to
-	 * do this here/now
-	 */
-	drm_gem_object_reference(&to->obj->base);
-	if (from_obj != NULL)
-		drm_gem_object_unreference(&from_obj->base);
-	return ret;
+	return do_switch(to);
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 03/13] drm/i915: Allow late allocation of request for i915_add_request()
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
  2012-07-13 13:14 ` [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation Chris Wilson
  2012-07-13 13:14 ` [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 04/13] drm/i915: Replace the pending_gpu_write flag with an explicit seqno Chris Wilson
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

Request preallocation was added to i915_add_request() in order to
support the overlay. However, not all users care and can quite happily
ignore the failure to allocate the request as they will simply repeat
the request in the future.

By pushing the allocation down into i915_add_request(), we can then
remove some rather ugly error handling in the callers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    6 ++--
 drivers/gpu/drm/i915/i915_gem.c            |   43 +++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +----
 3 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 627fe35..51e234e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1358,9 +1358,9 @@ void i915_gem_init_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_idle(struct drm_device *dev);
-int __must_check i915_add_request(struct intel_ring_buffer *ring,
-				  struct drm_file *file,
-				  struct drm_i915_gem_request *request);
+int i915_add_request(struct intel_ring_buffer *ring,
+		     struct drm_file *file,
+		     struct drm_i915_gem_request *request);
 int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
 				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53c3946..1782369 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1597,7 +1597,12 @@ i915_add_request(struct intel_ring_buffer *ring,
 		ring->gpu_caches_dirty = false;
 	}
 
-	BUG_ON(request == NULL);
+	if (request == NULL) {
+		request = kmalloc(sizeof(*request), GFP_KERNEL);
+		if (request == NULL)
+			return -ENOMEM;
+	}
+
 	seqno = i915_gem_next_request_seqno(ring);
 
 	/* Record the position of the start of the request so that
@@ -1608,8 +1613,10 @@ i915_add_request(struct intel_ring_buffer *ring,
 	request_ring_position = intel_ring_get_tail(ring);
 
 	ret = ring->add_request(ring, &seqno);
-	if (ret)
-	    return ret;
+	if (ret) {
+		kfree(request);
+		return ret;
+	}
 
 	trace_i915_gem_request_add(ring, seqno);
 
@@ -1859,14 +1866,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	 */
 	idle = true;
 	for_each_ring(ring, dev_priv, i) {
-		if (ring->gpu_caches_dirty) {
-			struct drm_i915_gem_request *request;
-
-			request = kzalloc(sizeof(*request), GFP_KERNEL);
-			if (request == NULL ||
-			    i915_add_request(ring, NULL, request))
-			    kfree(request);
-		}
+		if (ring->gpu_caches_dirty)
+			i915_add_request(ring, NULL, NULL);
 
 		idle &= list_empty(&ring->request_list);
 	}
@@ -1913,25 +1914,13 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv,
 static int
 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 {
-	int ret = 0;
+	int ret;
 
 	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
 
-	if (seqno == ring->outstanding_lazy_request) {
-		struct drm_i915_gem_request *request;
-
-		request = kzalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
-
-		ret = i915_add_request(ring, NULL, request);
-		if (ret) {
-			kfree(request);
-			return ret;
-		}
-
-		BUG_ON(seqno != request->seqno);
-	}
+	ret = 0;
+	if (seqno == ring->outstanding_lazy_request)
+		ret = i915_add_request(ring, NULL, NULL);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 88e2e11..0c26692 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -967,16 +967,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct drm_file *file,
 				    struct intel_ring_buffer *ring)
 {
-	struct drm_i915_gem_request *request;
-
 	/* Unconditionally force add_request to emit a full flush. */
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	request = kzalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL || i915_add_request(ring, file, request)) {
-		kfree(request);
-	}
+	(void)i915_add_request(ring, file, NULL);
 }
 
 static int
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 04/13] drm/i915: Replace the pending_gpu_write flag with an explicit seqno
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (2 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 03/13] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 15:41   ` Daniel Vetter
  2012-07-13 13:14 ` [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped Chris Wilson
                   ` (8 subsequent siblings)
  12 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

As we always flush the GPU cache prior to emitting the breadcrumb, we no
longer have to worry about the deferred flush causing the
pending_gpu_write to be delayed. So we can instead utilize the known
last_write_seqno to hopefully minimise the wait times.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |    9 ++--
 drivers/gpu/drm/i915/i915_drv.h            |   12 ++---
 drivers/gpu/drm/i915/i915_gem.c            |   66 ++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
 drivers/gpu/drm/i915/i915_irq.c            |    5 ++-
 5 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 359f6e8..a8b7db6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -121,14 +121,15 @@ static const char *cache_level_str(int type)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
-	seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d%s%s%s",
+	seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d %d%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
 		   obj->base.write_domain,
-		   obj->last_rendering_seqno,
+		   obj->last_read_seqno,
+		   obj->last_write_seqno,
 		   obj->last_fenced_seqno,
 		   cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
@@ -630,12 +631,12 @@ static void print_error_buffers(struct seq_file *m,
 	seq_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		seq_printf(m, "  %08x %8u %04x %04x %08x%s%s%s%s%s%s%s",
+		seq_printf(m, "  %08x %8u %04x %04x %x %x%s%s%s%s%s%s%s",
 			   err->gtt_offset,
 			   err->size,
 			   err->read_domains,
 			   err->write_domain,
-			   err->seqno,
+			   err->rseqno, err->wseqno,
 			   pin_flag(err->pinned),
 			   tiling_flag(err->tiling),
 			   dirty_flag(err->dirty),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 51e234e..b398b72 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -221,7 +221,7 @@ struct drm_i915_error_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 seqno;
+		u32 rseqno, wseqno;
 		u32 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
@@ -895,12 +895,6 @@ struct drm_i915_gem_object {
 	unsigned int dirty:1;
 
 	/**
-	 * This is set if the object has been written to since the last
-	 * GPU flush.
-	 */
-	unsigned int pending_gpu_write:1;
-
-	/**
 	 * Fence register bits (if any) for this object.  Will be set
 	 * as needed when mapped into the GTT.
 	 * Protected by dev->struct_mutex.
@@ -992,7 +986,8 @@ struct drm_i915_gem_object {
 	struct intel_ring_buffer *ring;
 
 	/** Breadcrumb of last rendering to the buffer. */
-	uint32_t last_rendering_seqno;
+	uint32_t last_read_seqno;
+	uint32_t last_write_seqno;
 	/** Breadcrumb of last fenced GPU access to the buffer. */
 	uint32_t last_fenced_seqno;
 
@@ -1291,7 +1286,6 @@ void i915_gem_lastclose(struct drm_device *dev);
 int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
 				  gfp_t gfpmask);
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct intel_ring_buffer *to);
 void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1782369..49b8864 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1441,7 +1441,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
 	list_move_tail(&obj->ring_list, &ring->active_list);
 
-	obj->last_rendering_seqno = seqno;
+	obj->last_read_seqno = seqno;
 
 	if (obj->fenced_gpu_access) {
 		obj->last_fenced_seqno = seqno;
@@ -1461,7 +1461,8 @@ static void
 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
 {
 	list_del_init(&obj->ring_list);
-	obj->last_rendering_seqno = 0;
+	obj->last_read_seqno = 0;
+	obj->last_write_seqno = 0;
 	obj->last_fenced_seqno = 0;
 }
 
@@ -1493,7 +1494,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	obj->fenced_gpu_access = false;
 
 	obj->active = 0;
-	obj->pending_gpu_write = false;
 	drm_gem_object_unreference(&obj->base);
 
 	WARN_ON(i915_verify_lists(dev));
@@ -1811,7 +1811,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 				      struct drm_i915_gem_object,
 				      ring_list);
 
-		if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
+		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
 			break;
 
 		if (obj->base.write_domain != 0)
@@ -2034,9 +2034,11 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
  */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
+static __must_check int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly)
 {
+	u32 seqno;
 	int ret;
 
 	/* This function only exists to support waiting for existing rendering,
@@ -2047,13 +2049,27 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
 	/* If there is rendering queued on the buffer being evicted, wait for
 	 * it.
 	 */
-	if (obj->active) {
-		ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno);
-		if (ret)
-			return ret;
-		i915_gem_retire_requests_ring(obj->ring);
+	if (readonly)
+		seqno = obj->last_write_seqno;
+	else
+		seqno = obj->last_read_seqno;
+	if (seqno == 0)
+		return 0;
+
+	ret = i915_wait_seqno(obj->ring, seqno);
+	if (ret)
+		return ret;
+
+	/* Manually manage the write flush as we may have not yet retired
+	 * the buffer.
+	 */
+	if (obj->last_write_seqno &&
+	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
+		obj->last_write_seqno = 0;
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 	}
 
+	i915_gem_retire_requests_ring(obj->ring);
 	return 0;
 }
 
@@ -2072,10 +2088,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 		if (ret)
 			return ret;
 
-		ret = i915_gem_check_olr(obj->ring,
-					 obj->last_rendering_seqno);
+		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
 		if (ret)
 			return ret;
+
 		i915_gem_retire_requests_ring(obj->ring);
 	}
 
@@ -2135,7 +2151,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto out;
 
 	if (obj->active) {
-		seqno = obj->last_rendering_seqno;
+		seqno = obj->last_read_seqno;
 		ring = obj->ring;
 	}
 
@@ -2190,11 +2206,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		return 0;
 
 	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj);
+		return i915_gem_object_wait_rendering(obj, false);
 
 	idx = intel_ring_sync_index(from, to);
 
-	seqno = obj->last_rendering_seqno;
+	seqno = obj->last_read_seqno;
 	if (seqno <= from->sync_seqno[idx])
 		return 0;
 
@@ -2938,11 +2954,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	if (obj->pending_gpu_write || write) {
-		ret = i915_gem_object_wait_rendering(obj);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_wait_rendering(obj, !write);
+	if (ret)
+		return ret;
 
 	i915_gem_object_flush_cpu_write_domain(obj);
 
@@ -3113,7 +3127,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 			return ret;
 	}
 
-	ret = i915_gem_object_wait_rendering(obj);
+	ret = i915_gem_object_wait_rendering(obj, false);
 	if (ret)
 		return ret;
 
@@ -3141,11 +3155,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	if (write || obj->pending_gpu_write) {
-		ret = i915_gem_object_wait_rendering(obj);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_wait_rendering(obj, !write);
+	if (ret)
+		return ret;
 
 	i915_gem_object_flush_gtt_write_domain(obj);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0c26692..50e83e5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -949,7 +949,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 		i915_gem_object_move_to_active(obj, ring, seqno);
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
-			obj->pending_gpu_write = true;
+			obj->last_write_seqno = seqno;
 			list_move_tail(&obj->gpu_write_list,
 				       &ring->gpu_write_list);
 			if (obj->pin_count) /* check for potential scanout */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 566f61b..41ed41d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 {
 	err->size = obj->base.size;
 	err->name = obj->base.name;
-	err->seqno = obj->last_rendering_seqno;
+	err->rseqno = obj->last_read_seqno;
+	err->wseqno = obj->last_write_seqno;
 	err->gtt_offset = obj->gtt_offset;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
@@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 		if (obj->ring != ring)
 			continue;
 
-		if (i915_seqno_passed(seqno, obj->last_rendering_seqno))
+		if (i915_seqno_passed(seqno, obj->last_read_seqno))
 			continue;
 
 		if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (3 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 04/13] drm/i915: Replace the pending_gpu_write flag with an explicit seqno Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 15:46   ` Daniel Vetter
  2012-07-14 13:39   ` Daniel Vetter
  2012-07-13 13:14 ` [PATCH 06/13] drm/i915: Remove the defunct flushing list Chris Wilson
                   ` (7 subsequent siblings)
  12 siblings, 2 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

If we drop the breadcrumb request after a batch due to a signal for
example we aim to fix it up at the next opportunity. In this case we
emit a second batchbuffer with no waits upon the first and so no
opportunity to insert the missing request, so we need to emit the
missing flush for coherency. (Note that that invalidating the render
cache is the same as flushing it, so there should have been no
observable corruption.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 50e83e5..c08229f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -885,11 +885,16 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 			return ret;
 	}
 
-	/* Unconditionally invalidate gpu caches. */
-	ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, 0);
+	/* Unconditionally invalidate gpu caches and ensure that we do flush
+	 * any residual writes from the previous batch.
+	 */
+	ret = i915_gem_flush_ring(ring,
+				  I915_GEM_GPU_DOMAINS,
+				  ring->gpu_caches_dirty ? I915_GEM_GPU_DOMAINS : 0);
 	if (ret)
 		return ret;
 
+	ring->gpu_caches_dirty = false;
 	return 0;
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 06/13] drm/i915: Remove the defunct flushing list
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (4 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 07/13] drm/i915: Remove the per-ring write list Chris Wilson
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

As we guarantee to emit a flush before emitting the breadcrumb or
the next batchbuffer, there is no further need for the flushing list.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |    7 ----
 drivers/gpu/drm/i915/i915_drv.h       |   19 +++--------
 drivers/gpu/drm/i915/i915_gem.c       |   58 +++++----------------------------
 drivers/gpu/drm/i915/i915_gem_evict.c |   20 ------------
 4 files changed, 13 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a8b7db6..1312b79 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -44,7 +44,6 @@
 
 enum {
 	ACTIVE_LIST,
-	FLUSHING_LIST,
 	INACTIVE_LIST,
 	PINNED_LIST,
 };
@@ -178,10 +177,6 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 		seq_printf(m, "Inactive:\n");
 		head = &dev_priv->mm.inactive_list;
 		break;
-	case FLUSHING_LIST:
-		seq_printf(m, "Flushing:\n");
-		head = &dev_priv->mm.flushing_list;
-		break;
 	default:
 		mutex_unlock(&dev->struct_mutex);
 		return -EINVAL;
@@ -239,7 +234,6 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
 
 	size = count = mappable_size = mappable_count = 0;
 	count_objects(&dev_priv->mm.active_list, mm_list);
-	count_objects(&dev_priv->mm.flushing_list, mm_list);
 	seq_printf(m, "  %u [%u] active objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
@@ -2007,7 +2001,6 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_gtt", i915_gem_gtt_info, 0},
 	{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
 	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
-	{"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST},
 	{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
 	{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
 	{"i915_gem_request", i915_gem_request_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b398b72..203562b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -696,17 +696,6 @@ typedef struct drm_i915_private {
 		struct list_head active_list;
 
 		/**
-		 * List of objects which are not in the ringbuffer but which
-		 * still have a write_domain which needs to be flushed before
-		 * unbinding.
-		 *
-		 * last_rendering_seqno is 0 while an object is in this list.
-		 *
-		 * A reference is held on the buffer while on this list.
-		 */
-		struct list_head flushing_list;
-
-		/**
 		 * LRU list of objects which are not in the ringbuffer and
 		 * are ready to unbind, but are still in the GTT.
 		 *
@@ -873,7 +862,7 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *gtt_space;
 	struct list_head gtt_list;
 
-	/** This object's place on the active/flushing/inactive lists */
+	/** This object's place on the active/inactive lists */
 	struct list_head ring_list;
 	struct list_head mm_list;
 	/** This object's place on GPU write list */
@@ -882,9 +871,9 @@ struct drm_i915_gem_object {
 	struct list_head exec_list;
 
 	/**
-	 * This is set if the object is on the active or flushing lists
-	 * (has pending rendering), and is not set if it's on inactive (ready
-	 * to be unbound).
+	 * This is set if the object is on the active lists (has pending
+	 * rendering and so a non-zero seqno), and is not set if it i s on
+	 * inactive (ready to be unbound) list.
 	 */
 	unsigned int active:1;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 49b8864..ee20fdb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1458,27 +1458,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 }
 
 static void
-i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
-{
-	list_del_init(&obj->ring_list);
-	obj->last_read_seqno = 0;
-	obj->last_write_seqno = 0;
-	obj->last_fenced_seqno = 0;
-}
-
-static void
-i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-
-	BUG_ON(!obj->active);
-	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
-
-	i915_gem_object_move_off_active(obj);
-}
-
-static void
 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -1488,9 +1467,15 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 
 	BUG_ON(!list_empty(&obj->gpu_write_list));
 	BUG_ON(!obj->active);
+
+	list_del_init(&obj->ring_list);
 	obj->ring = NULL;
 
-	i915_gem_object_move_off_active(obj);
+	obj->last_read_seqno = 0;
+	obj->last_write_seqno = 0;
+	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+
+	obj->last_fenced_seqno = 0;
 	obj->fenced_gpu_access = false;
 
 	obj->active = 0;
@@ -1693,7 +1678,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
 				       struct drm_i915_gem_object,
 				       ring_list);
 
-		obj->base.write_domain = 0;
 		list_del_init(&obj->gpu_write_list);
 		i915_gem_object_move_to_inactive(obj);
 	}
@@ -1730,20 +1714,6 @@ void i915_gem_reset(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i)
 		i915_gem_reset_ring_lists(dev_priv, ring);
 
-	/* Remove anything from the flushing lists. The GPU cache is likely
-	 * to be lost on reset along with the data, so simply move the
-	 * lost bo to the inactive list.
-	 */
-	while (!list_empty(&dev_priv->mm.flushing_list)) {
-		obj = list_first_entry(&dev_priv->mm.flushing_list,
-				      struct drm_i915_gem_object,
-				      mm_list);
-
-		obj->base.write_domain = 0;
-		list_del_init(&obj->gpu_write_list);
-		i915_gem_object_move_to_inactive(obj);
-	}
-
 	/* Move everything out of the GPU domains to ensure we do any
 	 * necessary invalidation upon reuse.
 	 */
@@ -1814,10 +1784,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
 			break;
 
-		if (obj->base.write_domain != 0)
-			i915_gem_object_move_to_flushing(obj);
-		else
-			i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_move_to_inactive(obj);
 	}
 
 	if (unlikely(ring->trace_irq_seqno &&
@@ -3891,7 +3858,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	}
 
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
 	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
 	mutex_unlock(&dev->struct_mutex);
 
@@ -3949,7 +3915,6 @@ i915_gem_load(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
-	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
@@ -4200,12 +4165,7 @@ static int
 i915_gpu_is_active(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	int lists_empty;
-
-	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
-		      list_empty(&dev_priv->mm.active_list);
-
-	return !lists_empty;
+	return !list_empty(&dev_priv->mm.active_list);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index ae7c24e..f61af59 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -92,23 +92,6 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 
 	/* Now merge in the soon-to-be-expired objects... */
 	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		/* Does the object require an outstanding flush? */
-		if (obj->base.write_domain)
-			continue;
-
-		if (mark_free(obj, &unwind_list))
-			goto found;
-	}
-
-	/* Finally add anything with a pending flush (in order of retirement) */
-	list_for_each_entry(obj, &dev_priv->mm.flushing_list, mm_list) {
-		if (mark_free(obj, &unwind_list))
-			goto found;
-	}
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		if (!obj->base.write_domain)
-			continue;
-
 		if (mark_free(obj, &unwind_list))
 			goto found;
 	}
@@ -171,7 +154,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
 	int ret;
 
 	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
-		       list_empty(&dev_priv->mm.flushing_list) &&
 		       list_empty(&dev_priv->mm.active_list));
 	if (lists_empty)
 		return -ENOSPC;
@@ -188,8 +170,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
 
 	i915_gem_retire_requests(dev);
 
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
-
 	/* Having flushed everything, unbind() should never raise an error */
 	list_for_each_entry_safe(obj, next,
 				 &dev_priv->mm.inactive_list, mm_list) {
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 07/13] drm/i915: Remove the per-ring write list
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (5 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 06/13] drm/i915: Remove the defunct flushing list Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 08/13] drm/i915: Remove explicit flush from i915_gem_object_flush_fence() Chris Wilson
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

This is now handled by a global flag to ensure we emit a flush before
the next serialisation point (if we failed to queue one previously).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    2 --
 drivers/gpu/drm/i915/i915_gem.c            |   53 +---------------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c    |    2 --
 drivers/gpu/drm/i915/intel_ringbuffer.h    |    9 -----
 5 files changed, 3 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 203562b..9747a92 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -865,8 +865,6 @@ struct drm_i915_gem_object {
 	/** This object's place on the active/inactive lists */
 	struct list_head ring_list;
 	struct list_head mm_list;
-	/** This object's place on GPU write list */
-	struct list_head gpu_write_list;
 	/** This object's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ee20fdb..7d2765c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1465,7 +1465,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 
 	list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	BUG_ON(!list_empty(&obj->gpu_write_list));
 	BUG_ON(!obj->active);
 
 	list_del_init(&obj->ring_list);
@@ -1510,30 +1509,6 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
 	return obj->madv == I915_MADV_DONTNEED;
 }
 
-static void
-i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
-			       uint32_t flush_domains)
-{
-	struct drm_i915_gem_object *obj, *next;
-
-	list_for_each_entry_safe(obj, next,
-				 &ring->gpu_write_list,
-				 gpu_write_list) {
-		if (obj->base.write_domain & flush_domains) {
-			uint32_t old_write_domain = obj->base.write_domain;
-
-			obj->base.write_domain = 0;
-			list_del_init(&obj->gpu_write_list);
-			i915_gem_object_move_to_active(obj, ring,
-						       i915_gem_next_request_seqno(ring));
-
-			trace_i915_gem_object_change_domain(obj,
-							    obj->base.read_domains,
-							    old_write_domain);
-		}
-	}
-}
-
 static u32
 i915_gem_get_seqno(struct drm_device *dev)
 {
@@ -1635,8 +1610,6 @@ i915_add_request(struct intel_ring_buffer *ring,
 					   &dev_priv->mm.retire_work, HZ);
 	}
 
-	WARN_ON(!list_empty(&ring->gpu_write_list));
-
 	return 0;
 }
 
@@ -1678,7 +1651,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
 				       struct drm_i915_gem_object,
 				       ring_list);
 
-		list_del_init(&obj->gpu_write_list);
 		i915_gem_object_move_to_inactive(obj);
 	}
 }
@@ -2008,11 +1980,6 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 	u32 seqno;
 	int ret;
 
-	/* This function only exists to support waiting for existing rendering,
-	 * not for emitting required flushes.
-	 */
-	BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
-
 	/* If there is rendering queued on the buffer being evicted, wait for
 	 * it.
 	 */
@@ -2305,26 +2272,14 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,
 	if (ret)
 		return ret;
 
-	if (flush_domains & I915_GEM_GPU_DOMAINS)
-		i915_gem_process_flushing_list(ring, flush_domains);
-
 	return 0;
 }
 
 static int i915_ring_idle(struct intel_ring_buffer *ring)
 {
-	int ret;
-
-	if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
+	if (list_empty(&ring->active_list))
 		return 0;
 
-	if (!list_empty(&ring->gpu_write_list)) {
-		ret = i915_gem_flush_ring(ring,
-				    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
-		if (ret)
-			return ret;
-	}
-
 	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
 }
 
@@ -2340,10 +2295,6 @@ int i915_gpu_idle(struct drm_device *dev)
 		if (ret)
 			return ret;
 
-		/* Is the device fubar? */
-		if (WARN_ON(!list_empty(&ring->gpu_write_list)))
-			return -EBUSY;
-
 		ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
 		if (ret)
 			return ret;
@@ -3484,7 +3435,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	INIT_LIST_HEAD(&obj->gtt_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->exec_list);
-	INIT_LIST_HEAD(&obj->gpu_write_list);
 	obj->madv = I915_MADV_WILLNEED;
 	/* Avoid an unnecessary call to unbind on the first bind. */
 	obj->map_and_fenceable = true;
@@ -3905,7 +3855,6 @@ init_ring_lists(struct intel_ring_buffer *ring)
 {
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
-	INIT_LIST_HEAD(&ring->gpu_write_list);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c08229f..86aead4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -943,9 +943,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 	struct drm_i915_gem_object *obj;
 
 	list_for_each_entry(obj, objects, exec_list) {
-		  u32 old_read = obj->base.read_domains;
-		  u32 old_write = obj->base.write_domain;
-
+		u32 old_read = obj->base.read_domains;
+		u32 old_write = obj->base.write_domain;
 
 		obj->base.read_domains = obj->base.pending_read_domains;
 		obj->base.write_domain = obj->base.pending_write_domain;
@@ -955,8 +954,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
 			obj->last_write_seqno = seqno;
-			list_move_tail(&obj->gpu_write_list,
-				       &ring->gpu_write_list);
 			if (obj->pin_count) /* check for potential scanout */
 				intel_mark_busy(ring->dev, obj);
 		}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ddc4859..7c166ff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1001,7 +1001,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
-	INIT_LIST_HEAD(&ring->gpu_write_list);
 	ring->size = 32 * PAGE_SIZE;
 
 	init_waitqueue_head(&ring->irq_queue);
@@ -1472,7 +1471,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
-	INIT_LIST_HEAD(&ring->gpu_write_list);
 
 	ring->size = size;
 	ring->effective_size = ring->size;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1d3c81f..7986f30 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -101,15 +101,6 @@ struct  intel_ring_buffer {
 	struct list_head request_list;
 
 	/**
-	 * List of objects currently pending a GPU write flush.
-	 *
-	 * All elements on this list will belong to either the
-	 * active_list or flushing_list, last_rendering_seqno can
-	 * be used to differentiate between the two elements.
-	 */
-	struct list_head gpu_write_list;
-
-	/**
 	 * Do we have some not yet emitted requests outstanding?
 	 */
 	u32 outstanding_lazy_request;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 08/13] drm/i915: Remove explicit flush from i915_gem_object_flush_fence()
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (6 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 07/13] drm/i915: Remove the per-ring write list Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 09/13] drm/i915: Remove the explicit flush of the GPU write domain Chris Wilson
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

As the flush is either performed explictly immediately after the
execbuffer dispatch, or before the serialisation of last_fenced_seqno we
can forgo the explict i915_gem_flush_ring().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7d2765c..24f6c2d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2470,21 +2470,8 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 static int
 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 {
-	int ret;
-
-	if (obj->fenced_gpu_access) {
-		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-			ret = i915_gem_flush_ring(obj->ring,
-						  0, obj->base.write_domain);
-			if (ret)
-				return ret;
-		}
-
-		obj->fenced_gpu_access = false;
-	}
-
 	if (obj->last_fenced_seqno) {
-		ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
+		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
 		if (ret)
 			return ret;
 
@@ -2497,6 +2484,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 	if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
 		mb();
 
+	obj->fenced_gpu_access = false;
 	return 0;
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 09/13] drm/i915: Remove the explicit flush of the GPU write domain
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (7 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 08/13] drm/i915: Remove explicit flush from i915_gem_object_flush_fence() Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 10/13] drm/i915: Replace the complex flushing logic with simple invalidate/flush all Chris Wilson
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

Rely instead on the insertion of the implicit flush before the seqno
breadcrumb.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 24f6c2d..50ccc06 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -37,7 +37,6 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
-static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
@@ -2018,10 +2017,6 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 	int ret;
 
 	if (obj->active) {
-		ret = i915_gem_object_flush_gpu_write_domain(obj);
-		if (ret)
-			return ret;
-
 		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
 		if (ret)
 			return ret;
@@ -2779,17 +2774,6 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
 }
 
-/** Flushes any GPU write domain for the object if it's dirty. */
-static int
-i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
-{
-	if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
-		return 0;
-
-	/* Queue the GPU write cache flushing we need. */
-	return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
-}
-
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -2856,10 +2840,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
-	ret = i915_gem_object_flush_gpu_write_domain(obj);
-	if (ret)
-		return ret;
-
 	ret = i915_gem_object_wait_rendering(obj, !write);
 	if (ret)
 		return ret;
@@ -2970,10 +2950,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	u32 old_read_domains, old_write_domain;
 	int ret;
 
-	ret = i915_gem_object_flush_gpu_write_domain(obj);
-	if (ret)
-		return ret;
-
 	if (pipelined != obj->ring) {
 		ret = i915_gem_object_sync(obj, pipelined);
 		if (ret)
@@ -3027,12 +3003,6 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
 		return 0;
 
-	if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-		ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
-		if (ret)
-			return ret;
-	}
-
 	ret = i915_gem_object_wait_rendering(obj, false);
 	if (ret)
 		return ret;
@@ -3057,10 +3027,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return 0;
 
-	ret = i915_gem_object_flush_gpu_write_domain(obj);
-	if (ret)
-		return ret;
-
 	ret = i915_gem_object_wait_rendering(obj, !write);
 	if (ret)
 		return ret;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 10/13] drm/i915: Replace the complex flushing logic with simple invalidate/flush all
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (8 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 09/13] drm/i915: Remove the explicit flush of the GPU write domain Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 11/13] drm/i915: Clear the pending_gpu_fenced_access flag at the start of execbuffer Chris Wilson
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

Now that we unconditionally flush and invalidate between every batch
buffer, we no longer need the complex logic to decide which domains
require flushing. Remove it and rejoice.

One side-effect is that this also removes the broken wait-on-pending-flip
logic.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  248 ++--------------------------
 1 file changed, 12 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 86aead4..b8d4dc3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,180 +34,6 @@
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
 
-struct change_domains {
-	uint32_t invalidate_domains;
-	uint32_t flush_domains;
-	uint32_t flush_rings;
-	uint32_t flips;
-};
-
-/*
- * Set the next domain for the specified object. This
- * may not actually perform the necessary flushing/invaliding though,
- * as that may want to be batched with other set_domain operations
- *
- * This is (we hope) the only really tricky part of gem. The goal
- * is fairly simple -- track which caches hold bits of the object
- * and make sure they remain coherent. A few concrete examples may
- * help to explain how it works. For shorthand, we use the notation
- * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
- * a pair of read and write domain masks.
- *
- * Case 1: the batch buffer
- *
- *	1. Allocated
- *	2. Written by CPU
- *	3. Mapped to GTT
- *	4. Read by GPU
- *	5. Unmapped from GTT
- *	6. Freed
- *
- *	Let's take these a step at a time
- *
- *	1. Allocated
- *		Pages allocated from the kernel may still have
- *		cache contents, so we set them to (CPU, CPU) always.
- *	2. Written by CPU (using pwrite)
- *		The pwrite function calls set_domain (CPU, CPU) and
- *		this function does nothing (as nothing changes)
- *	3. Mapped by GTT
- *		This function asserts that the object is not
- *		currently in any GPU-based read or write domains
- *	4. Read by GPU
- *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
- *		As write_domain is zero, this function adds in the
- *		current read domains (CPU+COMMAND, 0).
- *		flush_domains is set to CPU.
- *		invalidate_domains is set to COMMAND
- *		clflush is run to get data out of the CPU caches
- *		then i915_dev_set_domain calls i915_gem_flush to
- *		emit an MI_FLUSH and drm_agp_chipset_flush
- *	5. Unmapped from GTT
- *		i915_gem_object_unbind calls set_domain (CPU, CPU)
- *		flush_domains and invalidate_domains end up both zero
- *		so no flushing/invalidating happens
- *	6. Freed
- *		yay, done
- *
- * Case 2: The shared render buffer
- *
- *	1. Allocated
- *	2. Mapped to GTT
- *	3. Read/written by GPU
- *	4. set_domain to (CPU,CPU)
- *	5. Read/written by CPU
- *	6. Read/written by GPU
- *
- *	1. Allocated
- *		Same as last example, (CPU, CPU)
- *	2. Mapped to GTT
- *		Nothing changes (assertions find that it is not in the GPU)
- *	3. Read/written by GPU
- *		execbuffer calls set_domain (RENDER, RENDER)
- *		flush_domains gets CPU
- *		invalidate_domains gets GPU
- *		clflush (obj)
- *		MI_FLUSH and drm_agp_chipset_flush
- *	4. set_domain (CPU, CPU)
- *		flush_domains gets GPU
- *		invalidate_domains gets CPU
- *		wait_rendering (obj) to make sure all drawing is complete.
- *		This will include an MI_FLUSH to get the data from GPU
- *		to memory
- *		clflush (obj) to invalidate the CPU cache
- *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
- *	5. Read/written by CPU
- *		cache lines are loaded and dirtied
- *	6. Read written by GPU
- *		Same as last GPU access
- *
- * Case 3: The constant buffer
- *
- *	1. Allocated
- *	2. Written by CPU
- *	3. Read by GPU
- *	4. Updated (written) by CPU again
- *	5. Read by GPU
- *
- *	1. Allocated
- *		(CPU, CPU)
- *	2. Written by CPU
- *		(CPU, CPU)
- *	3. Read by GPU
- *		(CPU+RENDER, 0)
- *		flush_domains = CPU
- *		invalidate_domains = RENDER
- *		clflush (obj)
- *		MI_FLUSH
- *		drm_agp_chipset_flush
- *	4. Updated (written) by CPU again
- *		(CPU, CPU)
- *		flush_domains = 0 (no previous write domain)
- *		invalidate_domains = 0 (no new read domains)
- *	5. Read by GPU
- *		(CPU+RENDER, 0)
- *		flush_domains = CPU
- *		invalidate_domains = RENDER
- *		clflush (obj)
- *		MI_FLUSH
- *		drm_agp_chipset_flush
- */
-static void
-i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
-				  struct intel_ring_buffer *ring,
-				  struct change_domains *cd)
-{
-	uint32_t invalidate_domains = 0, flush_domains = 0;
-
-	/*
-	 * If the object isn't moving to a new write domain,
-	 * let the object stay in multiple read domains
-	 */
-	if (obj->base.pending_write_domain == 0)
-		obj->base.pending_read_domains |= obj->base.read_domains;
-
-	/*
-	 * Flush the current write domain if
-	 * the new read domains don't match. Invalidate
-	 * any read domains which differ from the old
-	 * write domain
-	 */
-	if (obj->base.write_domain &&
-	    (((obj->base.write_domain != obj->base.pending_read_domains ||
-	       obj->ring != ring)) ||
-	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
-		flush_domains |= obj->base.write_domain;
-		invalidate_domains |=
-			obj->base.pending_read_domains & ~obj->base.write_domain;
-	}
-	/*
-	 * Invalidate any read caches which may have
-	 * stale data. That is, any new read domains.
-	 */
-	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
-	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
-		i915_gem_clflush_object(obj);
-
-	if (obj->base.pending_write_domain)
-		cd->flips |= atomic_read(&obj->pending_flip);
-
-	/* The actual obj->write_domain will be updated with
-	 * pending_write_domain after we emit the accumulated flush for all
-	 * of our domain changes in execbuffers (which clears objects'
-	 * write_domains).  So if we have a current write domain that we
-	 * aren't changing, set pending_write_domain to that.
-	 */
-	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
-		obj->base.pending_write_domain = obj->base.write_domain;
-
-	cd->invalidate_domains |= invalidate_domains;
-	cd->flush_domains |= flush_domains;
-	if (flush_domains & I915_GEM_GPU_DOMAINS)
-		cd->flush_rings |= intel_ring_flag(obj->ring);
-	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-		cd->flush_rings |= intel_ring_flag(ring);
-}
-
 struct eb_objects {
 	int and;
 	struct hlist_head buckets[0];
@@ -810,81 +636,31 @@ err:
 	return ret;
 }
 
-static void
-i915_gem_execbuffer_flush(struct drm_device *dev,
-			  uint32_t invalidate_domains,
-			  uint32_t flush_domains)
-{
-	if (flush_domains & I915_GEM_DOMAIN_CPU)
-		intel_gtt_chipset_flush();
-
-	if (flush_domains & I915_GEM_DOMAIN_GTT)
-		wmb();
-}
-
-static int
-i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
-{
-	u32 plane, flip_mask;
-	int ret;
-
-	/* Check for any pending flips. As we only maintain a flip queue depth
-	 * of 1, we can simply insert a WAIT for the next display flip prior
-	 * to executing the batch and avoid stalling the CPU.
-	 */
-
-	for (plane = 0; flips >> plane; plane++) {
-		if (((flips >> plane) & 1) == 0)
-			continue;
-
-		if (plane)
-			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
-		else
-			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
-
-		ret = intel_ring_begin(ring, 2);
-		if (ret)
-			return ret;
-
-		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
-		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
-	}
-
-	return 0;
-}
-
-
 static int
 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 				struct list_head *objects)
 {
 	struct drm_i915_gem_object *obj;
-	struct change_domains cd;
+	uint32_t flush_domains = 0;
 	int ret;
 
-	memset(&cd, 0, sizeof(cd));
-	list_for_each_entry(obj, objects, exec_list)
-		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
-
-	if (cd.invalidate_domains | cd.flush_domains) {
-		i915_gem_execbuffer_flush(ring->dev,
-					  cd.invalidate_domains,
-					  cd.flush_domains);
-	}
-
-	if (cd.flips) {
-		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
-		if (ret)
-			return ret;
-	}
-
 	list_for_each_entry(obj, objects, exec_list) {
 		ret = i915_gem_object_sync(obj, ring);
 		if (ret)
 			return ret;
+
+		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
+			i915_gem_clflush_object(obj);
+
+		flush_domains |= obj->base.write_domain;
 	}
 
+	if (flush_domains & I915_GEM_DOMAIN_CPU)
+		intel_gtt_chipset_flush();
+
+	if (flush_domains & I915_GEM_DOMAIN_GTT)
+		wmb();
+
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 11/13] drm/i915: Clear the pending_gpu_fenced_access flag at the start of execbuffer
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (9 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 10/13] drm/i915: Replace the complex flushing logic with simple invalidate/flush all Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 12/13] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs Chris Wilson
  2012-07-13 13:14 ` [PATCH 13/13] drm/i915: Move the write seqno handling to move_to_active Chris Wilson
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

Otherwise once we use the buffer with a BLT command on gen2/3, we will
always regard future command submissions as continuing the fenced
access. However, now that we flush/invalidate between every batch we can
drop this pessimism.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b8d4dc3..0324965 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -413,6 +413,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 
 		obj->base.pending_read_domains = 0;
 		obj->base.pending_write_domain = 0;
+		obj->pending_fenced_gpu_access = false;
 	}
 	list_splice(&ordered_objects, objects);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 12/13] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (10 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 11/13] drm/i915: Clear the pending_gpu_fenced_access flag at the start of execbuffer Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  2012-07-13 13:14 ` [PATCH 13/13] drm/i915: Move the write seqno handling to move_to_active Chris Wilson
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

By moving the function to intel_ringbuffer and currying the appropriate
parameter, hopefully we make the callsites easier to read and
understand.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    3 ---
 drivers/gpu/drm/i915/i915_gem.c            |   29 +++------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    9 +------
 drivers/gpu/drm/i915/intel_ringbuffer.c    |   38 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h    |    2 ++
 5 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9747a92..7ae333b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1256,9 +1256,6 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
 int i915_gem_init_object(struct drm_gem_object *obj);
-int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring,
-				     uint32_t invalidate_domains,
-				     uint32_t flush_domains);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 50ccc06..f281c7b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1548,13 +1548,9 @@ i915_add_request(struct intel_ring_buffer *ring,
 	 * is that the flush _must_ happen before the next request, no matter
 	 * what.
 	 */
-	if (ring->gpu_caches_dirty) {
-		ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS);
-		if (ret)
-			return ret;
-
-		ring->gpu_caches_dirty = false;
-	}
+	ret = intel_ring_flush_all_caches(ring);
+	if (ret)
+		return ret;
 
 	if (request == NULL) {
 		request = kmalloc(sizeof(*request), GFP_KERNEL);
@@ -2251,25 +2247,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-int
-i915_gem_flush_ring(struct intel_ring_buffer *ring,
-		    uint32_t invalidate_domains,
-		    uint32_t flush_domains)
-{
-	int ret;
-
-	if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
-		return 0;
-
-	trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
-
-	ret = ring->flush(ring, invalidate_domains, flush_domains);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static int i915_ring_idle(struct intel_ring_buffer *ring)
 {
 	if (list_empty(&ring->active_list))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0324965..d48fe33 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -665,14 +665,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
-	ret = i915_gem_flush_ring(ring,
-				  I915_GEM_GPU_DOMAINS,
-				  ring->gpu_caches_dirty ? I915_GEM_GPU_DOMAINS : 0);
-	if (ret)
-		return ret;
-
-	ring->gpu_caches_dirty = false;
-	return 0;
+	return intel_ring_invalidate_all_caches(ring);
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7c166ff..81698cf 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1563,3 +1563,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 
 	return intel_init_ring_buffer(dev, ring);
 }
+
+int
+intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
+{
+	int ret;
+
+	if (!ring->gpu_caches_dirty)
+		return 0;
+
+	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	if (ret)
+		return ret;
+
+	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+
+	ring->gpu_caches_dirty = false;
+	return 0;
+}
+
+int
+intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
+{
+	uint32_t flush_domains;
+	int ret;
+
+	flush_domains = 0;
+	if (ring->gpu_caches_dirty)
+		flush_domains = I915_GEM_GPU_DOMAINS;
+
+	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+	if (ret)
+		return ret;
+
+	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+
+	ring->gpu_caches_dirty = false;
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7986f30..8b2b92e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -195,6 +195,8 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring,
 void intel_ring_advance(struct intel_ring_buffer *ring);
 
 u32 intel_ring_get_seqno(struct intel_ring_buffer *ring);
+int intel_ring_flush_all_caches(struct intel_ring_buffer *ring);
+int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 13/13] drm/i915: Move the write seqno handling to move_to_active
  2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
                   ` (11 preceding siblings ...)
  2012-07-13 13:14 ` [PATCH 12/13] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs Chris Wilson
@ 2012-07-13 13:14 ` Chris Wilson
  12 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-13 13:14 UTC (permalink / raw)
  To: intel-gfx

This moves the open-coded handling  of the write seqno from the
execbuffer dispatch into the core logic alongside the read seqno
handling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            |    9 +++++++++
 drivers/gpu/drm/i915/i915_gem_context.c    |    1 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    6 ------
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f281c7b..c4cedb6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1427,7 +1427,9 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
 	BUG_ON(ring == NULL);
+
 	obj->ring = ring;
 
 	/* Add a reference if we're newly entering the active list. */
@@ -1442,6 +1444,13 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 
 	obj->last_read_seqno = seqno;
 
+	if (obj->base.write_domain) {
+		obj->dirty = 1;
+		obj->last_write_seqno = seqno;
+		if (obj->pin_count) /* check for potential scanout */
+			intel_mark_busy(dev, obj);
+	}
+
 	if (obj->fenced_gpu_access) {
 		obj->last_fenced_seqno = seqno;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b0a855f..c79191b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -408,7 +408,6 @@ static int do_switch(struct i915_hw_context *to)
 		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 		i915_gem_object_move_to_active(from_obj, to->ring, seqno);
-		from_obj->dirty = 1;
 		BUG_ON(from_obj->ring != to->ring);
 		i915_gem_object_unpin(from_obj);
 		drm_gem_object_unreference(&from_obj->base);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d48fe33..44f5b22 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -721,12 +721,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
 		i915_gem_object_move_to_active(obj, ring, seqno);
-		if (obj->base.write_domain) {
-			obj->dirty = 1;
-			obj->last_write_seqno = seqno;
-			if (obj->pin_count) /* check for potential scanout */
-				intel_mark_busy(ring->dev, obj);
-		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 	}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj
  2012-07-13 13:14 ` [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj Chris Wilson
@ 2012-07-13 15:25   ` Ben Widawsky
  2012-07-13 15:37   ` Daniel Vetter
  1 sibling, 0 replies; 29+ messages in thread
From: Ben Widawsky @ 2012-07-13 15:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 13 Jul 2012 14:14:05 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Otherwise we end up trying to unpin a freed object and BUG.
> 

Just a quick excuse: originally do_switch was just a very low level
helper, and everything called i915_switch_context (including default
setup), so the bug likely didn't exist in older revisions of the series.

Since you're moving around so much code, maybe we could just move
do_switch() a bit higher in the file so we don't have to do the forward
declaration crap anymore?

I am a bit confused why I didn't hit this in my testing, but thanks for
tracking it down. I must say though, really like this patch, and the
cleanups it does in addition to fixing the bug is way understated in the
commit message. Still, a couple of comments below.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   82 +++++++++++--------------------
>  1 file changed, 30 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 90857f8..b0a855f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -97,8 +97,7 @@
>  
>  static struct i915_hw_context *
>  i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
> -static int do_switch(struct drm_i915_gem_object *from_obj,
> -		     struct i915_hw_context *to, u32 seqno);
> +static int do_switch(struct i915_hw_context *to);
>  
>  static int get_context_size(struct drm_device *dev)
>  {
> @@ -220,26 +219,24 @@ static int create_default_context(struct drm_i915_private *dev_priv)
>  	 */
>  	dev_priv->ring[RCS].default_context = ctx;
>  	ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false);
> -	if (ret) {
> -		do_destroy(ctx);
> -		return ret;
> -	}
> +	if (ret)
> +		goto err_destroy;
>  
>  	ret = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> -	if (ret) {
> -		i915_gem_object_unpin(ctx->obj);
> -		do_destroy(ctx);
> -		return ret;
> -	}
> +	if (ret)
> +		goto err_unpin;
>  
> -	ret = do_switch(NULL, ctx, 0);
> -	if (ret) {
> -		i915_gem_object_unpin(ctx->obj);
> -		do_destroy(ctx);
> -	} else {
> -		DRM_DEBUG_DRIVER("Default HW context loaded\n");
> -	}
> +	ret = do_switch(ctx);
> +	if (ret)
> +		goto err_unpin;
>  
> +	DRM_DEBUG_DRIVER("Default HW context loaded\n");
> +	return 0;
> +
> +err_unpin:
> +	i915_gem_object_unpin(ctx->obj);
> +err_destroy:
> +	do_destroy(ctx);
>  	return ret;
>  }
>  
> @@ -366,16 +363,14 @@ mi_set_context(struct intel_ring_buffer *ring,
>  	return ret;
>  }
>  
> -static int do_switch(struct drm_i915_gem_object *from_obj,
> -		     struct i915_hw_context *to,
> -		     u32 seqno)
> +static int do_switch(struct i915_hw_context *to)
>  {
> -	struct intel_ring_buffer *ring = NULL;
> +	struct drm_i915_gem_object *from_obj = to->ring->last_context_obj;
>  	u32 hw_flags = 0;
>  	int ret;
>  
> -	BUG_ON(to == NULL);
> -	BUG_ON(from_obj != NULL && from_obj->pin_count == 0);
> +	if (from_obj == to->obj)
> +		return 0;

The second BUG_ON I think is still fairly useful (though it would be
nicer to have a context_id in the object so we could check at the time
of unpin).

>  
>  	ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false);
>  	if (ret)
> @@ -389,8 +384,7 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  	else if (WARN_ON_ONCE(from_obj == to->obj)) /* not yet expected */
>  		hw_flags |= MI_FORCE_RESTORE;
>  
> -	ring = to->ring;
> -	ret = mi_set_context(ring, to, hw_flags);
> +	ret = mi_set_context(to->ring, to, hw_flags);
>  	if (ret) {
>  		i915_gem_object_unpin(to->obj);
>  		return ret;
> @@ -403,6 +397,7 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from_obj != NULL) {
> +		u32 seqno = i915_gem_next_request_seqno(to->ring);

I haven't really looked hard, but we do end up here quite early in init,
reset, unfreeze. Hopefully this is always safe to do.

>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -412,13 +407,15 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  		 */
>  		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
>  		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_gem_object_move_to_active(from_obj, ring, seqno);
> +		i915_gem_object_move_to_active(from_obj, to->ring, seqno);
>  		from_obj->dirty = 1;
>  		BUG_ON(from_obj->ring != to->ring);
>  		i915_gem_object_unpin(from_obj);
> +		drm_gem_object_unreference(&from_obj->base);
>  	}
>  
> -	ring->last_context_obj = to->obj;
> +	drm_gem_object_reference(&to->obj->base);
> +	to->ring->last_context_obj = to->obj;
>  	to->is_initialized = true;
>  
>  	return 0;
> @@ -442,10 +439,7 @@ int i915_switch_context(struct intel_ring_buffer *ring,
>  			int to_id)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_file_private *file_priv = NULL;
>  	struct i915_hw_context *to;
> -	struct drm_i915_gem_object *from_obj = ring->last_context_obj;
> -	int ret;
>  
>  	if (dev_priv->hw_contexts_disabled)
>  		return 0;
> @@ -453,34 +447,18 @@ int i915_switch_context(struct intel_ring_buffer *ring,
>  	if (ring != &dev_priv->ring[RCS])
>  		return 0;
>  
> -	if (file)
> -		file_priv = file->driver_priv;
> -
>  	if (to_id == DEFAULT_CONTEXT_ID) {
>  		to = ring->default_context;
>  	} else {
> -		to = i915_gem_context_get(file_priv, to_id);
> +		if (file == NULL)
> +			return -EINVAL;
> +
> +		to = i915_gem_context_get(file->driver_priv, to_id);
>  		if (to == NULL)
>  			return -ENOENT;
>  	}
>  
> -	if (from_obj == to->obj)
> -		return 0;
> -
> -	ret = do_switch(from_obj, to, i915_gem_next_request_seqno(to->ring));
> -	if (ret)
> -		return ret;
> -
> -	/* Just to make the code a little cleaner we take the object reference
> -	 * after the switch was successful. It would be more intuitive to ref
> -	 * the 'to' object before the switch but we know the refcount must be >0
> -	 * if context_get() succeeded, and we hold struct mutex. So it's safe to
> -	 * do this here/now
> -	 */
> -	drm_gem_object_reference(&to->obj->base);
> -	if (from_obj != NULL)
> -		drm_gem_object_unreference(&from_obj->base);
> -	return ret;
> +	return do_switch(to);
>  }
>  
>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation
  2012-07-13 13:14 ` [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation Chris Wilson
@ 2012-07-13 15:28   ` Ben Widawsky
  2012-07-13 15:54   ` Daniel Vetter
  2012-07-14 11:58   ` Daniel Vetter
  2 siblings, 0 replies; 29+ messages in thread
From: Ben Widawsky @ 2012-07-13 15:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 13 Jul 2012 14:14:04 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <ben@bwidawsk.net>

I guess setting the read_domains is now superfluous.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   12 ++++++++++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9ae3f2c..90857f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -225,6 +225,13 @@ static int create_default_context(struct drm_i915_private *dev_priv)
>  		return ret;
>  	}
>  
> +	ret = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> +	if (ret) {
> +		i915_gem_object_unpin(ctx->obj);
> +		do_destroy(ctx);
> +		return ret;
> +	}
> +
>  	ret = do_switch(NULL, ctx, 0);
>  	if (ret) {
>  		i915_gem_object_unpin(ctx->obj);
> @@ -396,8 +403,6 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from_obj != NULL) {
> -		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_gem_object_move_to_active(from_obj, ring, seqno);
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -405,6 +410,9 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  		 * able to defer doing this until we know the object would be
>  		 * swapped, but there is no way to do that yet.
>  		 */
> +		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
> +		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +		i915_gem_object_move_to_active(from_obj, ring, seqno);
>  		from_obj->dirty = 1;
>  		BUG_ON(from_obj->ring != to->ring);
>  		i915_gem_object_unpin(from_obj);



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj
  2012-07-13 13:14 ` [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj Chris Wilson
  2012-07-13 15:25   ` Ben Widawsky
@ 2012-07-13 15:37   ` Daniel Vetter
  2012-07-14  9:55     ` Chris Wilson
  1 sibling, 1 reply; 29+ messages in thread
From: Daniel Vetter @ 2012-07-13 15:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Ben Widawsky

On Fri, Jul 13, 2012 at 02:14:05PM +0100, Chris Wilson wrote:
> Otherwise we end up trying to unpin a freed object and BUG.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <ben@bwidawsk.net>

Afact this patch contains quite some code refactoring that does not
relate directly to the fix (or if it does, I fail to see the direct
relevance). So I think this either needs an explanation in the commit
message or be put into a separate patch (I agree though for actual code
cleanups).

For the fix itself I seem to be a bit dense again - the only thing I see
is that you move the refcount handling into do_switch. Afacs we do the
ref-handling in both cases only when do_switch is successful, and also
right at the end of do_switch (or right afterwards). So can you please
enlighten your clueless maintainer a bit an explain how things blow up?

Yours, Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   82 +++++++++++--------------------
>  1 file changed, 30 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 90857f8..b0a855f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -97,8 +97,7 @@
>  
>  static struct i915_hw_context *
>  i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
> -static int do_switch(struct drm_i915_gem_object *from_obj,
> -		     struct i915_hw_context *to, u32 seqno);
> +static int do_switch(struct i915_hw_context *to);
>  
>  static int get_context_size(struct drm_device *dev)
>  {
> @@ -220,26 +219,24 @@ static int create_default_context(struct drm_i915_private *dev_priv)
>  	 */
>  	dev_priv->ring[RCS].default_context = ctx;
>  	ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false);
> -	if (ret) {
> -		do_destroy(ctx);
> -		return ret;
> -	}
> +	if (ret)
> +		goto err_destroy;
>  
>  	ret = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> -	if (ret) {
> -		i915_gem_object_unpin(ctx->obj);
> -		do_destroy(ctx);
> -		return ret;
> -	}
> +	if (ret)
> +		goto err_unpin;
>  
> -	ret = do_switch(NULL, ctx, 0);
> -	if (ret) {
> -		i915_gem_object_unpin(ctx->obj);
> -		do_destroy(ctx);
> -	} else {
> -		DRM_DEBUG_DRIVER("Default HW context loaded\n");
> -	}
> +	ret = do_switch(ctx);
> +	if (ret)
> +		goto err_unpin;
>  
> +	DRM_DEBUG_DRIVER("Default HW context loaded\n");
> +	return 0;
> +
> +err_unpin:
> +	i915_gem_object_unpin(ctx->obj);
> +err_destroy:
> +	do_destroy(ctx);
>  	return ret;
>  }
>  
> @@ -366,16 +363,14 @@ mi_set_context(struct intel_ring_buffer *ring,
>  	return ret;
>  }
>  
> -static int do_switch(struct drm_i915_gem_object *from_obj,
> -		     struct i915_hw_context *to,
> -		     u32 seqno)
> +static int do_switch(struct i915_hw_context *to)
>  {
> -	struct intel_ring_buffer *ring = NULL;
> +	struct drm_i915_gem_object *from_obj = to->ring->last_context_obj;
>  	u32 hw_flags = 0;
>  	int ret;
>  
> -	BUG_ON(to == NULL);
> -	BUG_ON(from_obj != NULL && from_obj->pin_count == 0);
> +	if (from_obj == to->obj)
> +		return 0;
>  
>  	ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false);
>  	if (ret)
> @@ -389,8 +384,7 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  	else if (WARN_ON_ONCE(from_obj == to->obj)) /* not yet expected */
>  		hw_flags |= MI_FORCE_RESTORE;
>  
> -	ring = to->ring;
> -	ret = mi_set_context(ring, to, hw_flags);
> +	ret = mi_set_context(to->ring, to, hw_flags);
>  	if (ret) {
>  		i915_gem_object_unpin(to->obj);
>  		return ret;
> @@ -403,6 +397,7 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from_obj != NULL) {
> +		u32 seqno = i915_gem_next_request_seqno(to->ring);
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -412,13 +407,15 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  		 */
>  		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
>  		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_gem_object_move_to_active(from_obj, ring, seqno);
> +		i915_gem_object_move_to_active(from_obj, to->ring, seqno);
>  		from_obj->dirty = 1;
>  		BUG_ON(from_obj->ring != to->ring);
>  		i915_gem_object_unpin(from_obj);
> +		drm_gem_object_unreference(&from_obj->base);
>  	}
>  
> -	ring->last_context_obj = to->obj;
> +	drm_gem_object_reference(&to->obj->base);
> +	to->ring->last_context_obj = to->obj;
>  	to->is_initialized = true;
>  
>  	return 0;
> @@ -442,10 +439,7 @@ int i915_switch_context(struct intel_ring_buffer *ring,
>  			int to_id)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_file_private *file_priv = NULL;
>  	struct i915_hw_context *to;
> -	struct drm_i915_gem_object *from_obj = ring->last_context_obj;
> -	int ret;
>  
>  	if (dev_priv->hw_contexts_disabled)
>  		return 0;
> @@ -453,34 +447,18 @@ int i915_switch_context(struct intel_ring_buffer *ring,
>  	if (ring != &dev_priv->ring[RCS])
>  		return 0;
>  
> -	if (file)
> -		file_priv = file->driver_priv;
> -
>  	if (to_id == DEFAULT_CONTEXT_ID) {
>  		to = ring->default_context;
>  	} else {
> -		to = i915_gem_context_get(file_priv, to_id);
> +		if (file == NULL)
> +			return -EINVAL;
> +
> +		to = i915_gem_context_get(file->driver_priv, to_id);
>  		if (to == NULL)
>  			return -ENOENT;
>  	}
>  
> -	if (from_obj == to->obj)
> -		return 0;
> -
> -	ret = do_switch(from_obj, to, i915_gem_next_request_seqno(to->ring));
> -	if (ret)
> -		return ret;
> -
> -	/* Just to make the code a little cleaner we take the object reference
> -	 * after the switch was successful. It would be more intuitive to ref
> -	 * the 'to' object before the switch but we know the refcount must be >0
> -	 * if context_get() succeeded, and we hold struct mutex. So it's safe to
> -	 * do this here/now
> -	 */
> -	drm_gem_object_reference(&to->obj->base);
> -	if (from_obj != NULL)
> -		drm_gem_object_unreference(&from_obj->base);
> -	return ret;
> +	return do_switch(to);
>  }
>  
>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 04/13] drm/i915: Replace the pending_gpu_write flag with an explicit seqno
  2012-07-13 13:14 ` [PATCH 04/13] drm/i915: Replace the pending_gpu_write flag with an explicit seqno Chris Wilson
@ 2012-07-13 15:41   ` Daniel Vetter
  2012-07-14  9:53     ` Chris Wilson
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel Vetter @ 2012-07-13 15:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jul 13, 2012 at 02:14:07PM +0100, Chris Wilson wrote:
> As we always flush the GPU cache prior to emitting the breadcrumb, we no
> longer have to worry about the deferred flush causing the
> pending_gpu_write to be delayed. So we can instead utilize the known
> last_write_seqno to hopefully minimise the wait times.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I like this, but I'd prefer if this would be at the end of the series as a
neat improvement - I'd really prefer if we get together a somewhat minimal
fix to take care of the flushing list and intel_begin_ring interruptable
patch.

The reason is that the merge window approaches fast, and if we're unlucky
the current -next cycle won't make it into 3.6, so I'd have to send Dave a
smaller pile of fixes. Which this doesn't really look like.

Or do I again miss something? It's not really my brightest day ;-)

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |    9 ++--
>  drivers/gpu/drm/i915/i915_drv.h            |   12 ++---
>  drivers/gpu/drm/i915/i915_gem.c            |   66 ++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
>  drivers/gpu/drm/i915/i915_irq.c            |    5 ++-
>  5 files changed, 51 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 359f6e8..a8b7db6 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -121,14 +121,15 @@ static const char *cache_level_str(int type)
>  static void
>  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  {
> -	seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d%s%s%s",
> +	seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d %d%s%s%s",
>  		   &obj->base,
>  		   get_pin_flag(obj),
>  		   get_tiling_flag(obj),
>  		   obj->base.size / 1024,
>  		   obj->base.read_domains,
>  		   obj->base.write_domain,
> -		   obj->last_rendering_seqno,
> +		   obj->last_read_seqno,
> +		   obj->last_write_seqno,
>  		   obj->last_fenced_seqno,
>  		   cache_level_str(obj->cache_level),
>  		   obj->dirty ? " dirty" : "",
> @@ -630,12 +631,12 @@ static void print_error_buffers(struct seq_file *m,
>  	seq_printf(m, "%s [%d]:\n", name, count);
>  
>  	while (count--) {
> -		seq_printf(m, "  %08x %8u %04x %04x %08x%s%s%s%s%s%s%s",
> +		seq_printf(m, "  %08x %8u %04x %04x %x %x%s%s%s%s%s%s%s",
>  			   err->gtt_offset,
>  			   err->size,
>  			   err->read_domains,
>  			   err->write_domain,
> -			   err->seqno,
> +			   err->rseqno, err->wseqno,
>  			   pin_flag(err->pinned),
>  			   tiling_flag(err->tiling),
>  			   dirty_flag(err->dirty),
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 51e234e..b398b72 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -221,7 +221,7 @@ struct drm_i915_error_state {
>  	struct drm_i915_error_buffer {
>  		u32 size;
>  		u32 name;
> -		u32 seqno;
> +		u32 rseqno, wseqno;
>  		u32 gtt_offset;
>  		u32 read_domains;
>  		u32 write_domain;
> @@ -895,12 +895,6 @@ struct drm_i915_gem_object {
>  	unsigned int dirty:1;
>  
>  	/**
> -	 * This is set if the object has been written to since the last
> -	 * GPU flush.
> -	 */
> -	unsigned int pending_gpu_write:1;
> -
> -	/**
>  	 * Fence register bits (if any) for this object.  Will be set
>  	 * as needed when mapped into the GTT.
>  	 * Protected by dev->struct_mutex.
> @@ -992,7 +986,8 @@ struct drm_i915_gem_object {
>  	struct intel_ring_buffer *ring;
>  
>  	/** Breadcrumb of last rendering to the buffer. */
> -	uint32_t last_rendering_seqno;
> +	uint32_t last_read_seqno;
> +	uint32_t last_write_seqno;
>  	/** Breadcrumb of last fenced GPU access to the buffer. */
>  	uint32_t last_fenced_seqno;
>  
> @@ -1291,7 +1286,6 @@ void i915_gem_lastclose(struct drm_device *dev);
>  int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
>  				  gfp_t gfpmask);
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> -int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  			 struct intel_ring_buffer *to);
>  void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1782369..49b8864 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1441,7 +1441,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>  	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
>  	list_move_tail(&obj->ring_list, &ring->active_list);
>  
> -	obj->last_rendering_seqno = seqno;
> +	obj->last_read_seqno = seqno;
>  
>  	if (obj->fenced_gpu_access) {
>  		obj->last_fenced_seqno = seqno;
> @@ -1461,7 +1461,8 @@ static void
>  i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
>  {
>  	list_del_init(&obj->ring_list);
> -	obj->last_rendering_seqno = 0;
> +	obj->last_read_seqno = 0;
> +	obj->last_write_seqno = 0;
>  	obj->last_fenced_seqno = 0;
>  }
>  
> @@ -1493,7 +1494,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  	obj->fenced_gpu_access = false;
>  
>  	obj->active = 0;
> -	obj->pending_gpu_write = false;
>  	drm_gem_object_unreference(&obj->base);
>  
>  	WARN_ON(i915_verify_lists(dev));
> @@ -1811,7 +1811,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
>  				      struct drm_i915_gem_object,
>  				      ring_list);
>  
> -		if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
> +		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
>  			break;
>  
>  		if (obj->base.write_domain != 0)
> @@ -2034,9 +2034,11 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
>   * Ensures that all rendering to the object has completed and the object is
>   * safe to unbind from the GTT or access from the CPU.
>   */
> -int
> -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
> +static __must_check int
> +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> +			       bool readonly)
>  {
> +	u32 seqno;
>  	int ret;
>  
>  	/* This function only exists to support waiting for existing rendering,
> @@ -2047,13 +2049,27 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
>  	/* If there is rendering queued on the buffer being evicted, wait for
>  	 * it.
>  	 */
> -	if (obj->active) {
> -		ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno);
> -		if (ret)
> -			return ret;
> -		i915_gem_retire_requests_ring(obj->ring);
> +	if (readonly)
> +		seqno = obj->last_write_seqno;
> +	else
> +		seqno = obj->last_read_seqno;
> +	if (seqno == 0)
> +		return 0;
> +
> +	ret = i915_wait_seqno(obj->ring, seqno);
> +	if (ret)
> +		return ret;
> +
> +	/* Manually manage the write flush as we may have not yet retired
> +	 * the buffer.
> +	 */
> +	if (obj->last_write_seqno &&
> +	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
> +		obj->last_write_seqno = 0;
> +		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
>  	}
>  
> +	i915_gem_retire_requests_ring(obj->ring);
>  	return 0;
>  }
>  
> @@ -2072,10 +2088,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  		if (ret)
>  			return ret;
>  
> -		ret = i915_gem_check_olr(obj->ring,
> -					 obj->last_rendering_seqno);
> +		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
>  		if (ret)
>  			return ret;
> +
>  		i915_gem_retire_requests_ring(obj->ring);
>  	}
>  
> @@ -2135,7 +2151,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto out;
>  
>  	if (obj->active) {
> -		seqno = obj->last_rendering_seqno;
> +		seqno = obj->last_read_seqno;
>  		ring = obj->ring;
>  	}
>  
> @@ -2190,11 +2206,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  		return 0;
>  
>  	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
> -		return i915_gem_object_wait_rendering(obj);
> +		return i915_gem_object_wait_rendering(obj, false);
>  
>  	idx = intel_ring_sync_index(from, to);
>  
> -	seqno = obj->last_rendering_seqno;
> +	seqno = obj->last_read_seqno;
>  	if (seqno <= from->sync_seqno[idx])
>  		return 0;
>  
> @@ -2938,11 +2954,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  	if (ret)
>  		return ret;
>  
> -	if (obj->pending_gpu_write || write) {
> -		ret = i915_gem_object_wait_rendering(obj);
> -		if (ret)
> -			return ret;
> -	}
> +	ret = i915_gem_object_wait_rendering(obj, !write);
> +	if (ret)
> +		return ret;
>  
>  	i915_gem_object_flush_cpu_write_domain(obj);
>  
> @@ -3113,7 +3127,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
>  			return ret;
>  	}
>  
> -	ret = i915_gem_object_wait_rendering(obj);
> +	ret = i915_gem_object_wait_rendering(obj, false);
>  	if (ret)
>  		return ret;
>  
> @@ -3141,11 +3155,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>  	if (ret)
>  		return ret;
>  
> -	if (write || obj->pending_gpu_write) {
> -		ret = i915_gem_object_wait_rendering(obj);
> -		if (ret)
> -			return ret;
> -	}
> +	ret = i915_gem_object_wait_rendering(obj, !write);
> +	if (ret)
> +		return ret;
>  
>  	i915_gem_object_flush_gtt_write_domain(obj);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0c26692..50e83e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -949,7 +949,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
>  		i915_gem_object_move_to_active(obj, ring, seqno);
>  		if (obj->base.write_domain) {
>  			obj->dirty = 1;
> -			obj->pending_gpu_write = true;
> +			obj->last_write_seqno = seqno;
>  			list_move_tail(&obj->gpu_write_list,
>  				       &ring->gpu_write_list);
>  			if (obj->pin_count) /* check for potential scanout */
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 566f61b..41ed41d 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  {
>  	err->size = obj->base.size;
>  	err->name = obj->base.name;
> -	err->seqno = obj->last_rendering_seqno;
> +	err->rseqno = obj->last_read_seqno;
> +	err->wseqno = obj->last_write_seqno;
>  	err->gtt_offset = obj->gtt_offset;
>  	err->read_domains = obj->base.read_domains;
>  	err->write_domain = obj->base.write_domain;
> @@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
>  		if (obj->ring != ring)
>  			continue;
>  
> -		if (i915_seqno_passed(seqno, obj->last_rendering_seqno))
> +		if (i915_seqno_passed(seqno, obj->last_read_seqno))
>  			continue;
>  
>  		if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0)
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped
  2012-07-13 13:14 ` [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped Chris Wilson
@ 2012-07-13 15:46   ` Daniel Vetter
  2012-07-14 10:24     ` Chris Wilson
  2012-07-14 13:39   ` Daniel Vetter
  1 sibling, 1 reply; 29+ messages in thread
From: Daniel Vetter @ 2012-07-13 15:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jul 13, 2012 at 02:14:08PM +0100, Chris Wilson wrote:
> If we drop the breadcrumb request after a batch due to a signal for
> example we aim to fix it up at the next opportunity. In this case we
> emit a second batchbuffer with no waits upon the first and so no
> opportunity to insert the missing request, so we need to emit the
> missing flush for coherency. (Note that that invalidating the render
> cache is the same as flushing it, so there should have been no
> observable corruption.)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Imo still too meager commit message ;-) As I've said in the previous mail,
I'd like some mention of the two commits that made this disaster possible
(put the blame on me where it is due). And I think some more in-detail
walk-thru of how things blow up would be great. And the Bugzilla link for
the QA bugreport.

Also, I still don't understand why this patch here isn't enough to fix up
the fallout. So if you can enlighten me where/why stuff blows up even with
this I'd highly appreciate. Not just because not understanding bugs makes
me queasy, but also to have a clear picture of what I'd need to send to
Dave it this -next cycle misses 3.6.

Meanwhile I'll try to hit this with some i-g-t tests, maybe that gives me
better understanding of what's going on.

Thanks, Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 50e83e5..c08229f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -885,11 +885,16 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
>  			return ret;
>  	}
>  
> -	/* Unconditionally invalidate gpu caches. */
> -	ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, 0);
> +	/* Unconditionally invalidate gpu caches and ensure that we do flush
> +	 * any residual writes from the previous batch.
> +	 */
> +	ret = i915_gem_flush_ring(ring,
> +				  I915_GEM_GPU_DOMAINS,
> +				  ring->gpu_caches_dirty ? I915_GEM_GPU_DOMAINS : 0);
>  	if (ret)
>  		return ret;
>  
> +	ring->gpu_caches_dirty = false;
>  	return 0;
>  }
>  
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation
  2012-07-13 13:14 ` [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation Chris Wilson
  2012-07-13 15:28   ` Ben Widawsky
@ 2012-07-13 15:54   ` Daniel Vetter
  2012-07-14  9:38     ` Chris Wilson
  2012-07-14 11:58   ` Daniel Vetter
  2 siblings, 1 reply; 29+ messages in thread
From: Daniel Vetter @ 2012-07-13 15:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Ben Widawsky

On Fri, Jul 13, 2012 at 02:14:04PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   12 ++++++++++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9ae3f2c..90857f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -225,6 +225,13 @@ static int create_default_context(struct drm_i915_private *dev_priv)
>  		return ret;
>  	}
>  
> +	ret = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> +	if (ret) {
> +		i915_gem_object_unpin(ctx->obj);
> +		do_destroy(ctx);
> +		return ret;
> +	}
> +
>  	ret = do_switch(NULL, ctx, 0);
>  	if (ret) {
>  		i915_gem_object_unpin(ctx->obj);
> @@ -396,8 +403,6 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from_obj != NULL) {
> -		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_gem_object_move_to_active(from_obj, ring, seqno);
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -405,6 +410,9 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  		 * able to defer doing this until we know the object would be
>  		 * swapped, but there is no way to do that yet.
>  		 */
> +		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
> +		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +		i915_gem_object_move_to_active(from_obj, ring, seqno);
>  		from_obj->dirty = 1;
>  		BUG_ON(from_obj->ring != to->ring);
>  		i915_gem_object_unpin(from_obj);

I think only the first hunk should be part of this patch - the later two
hunks make more sense squashed together with the last patch. At least that
would avoid me going a bit wtf here and then again on the last patch where
the from_obj->dirty=1 gets removed and smashed into move_to_active. Until
I've realized what's going on here ;-)
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation
  2012-07-13 15:54   ` Daniel Vetter
@ 2012-07-14  9:38     ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-14  9:38 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Ben Widawsky

On Fri, 13 Jul 2012 17:54:54 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> I think only the first hunk should be part of this patch - the later two
> hunks make more sense squashed together with the last patch. At least that
> would avoid me going a bit wtf here and then again on the last patch where
> the from_obj->dirty=1 gets removed and smashed into move_to_active. Until
> I've realized what's going on here ;-)

When I've rewritten this patch to be more correct (handle the swap-in
case as well), it will make more sense the way it is in this patch.
Honestly. :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 04/13] drm/i915: Replace the pending_gpu_write flag with an explicit seqno
  2012-07-13 15:41   ` Daniel Vetter
@ 2012-07-14  9:53     ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-14  9:53 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, 13 Jul 2012 17:41:01 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Fri, Jul 13, 2012 at 02:14:07PM +0100, Chris Wilson wrote:
> > As we always flush the GPU cache prior to emitting the breadcrumb, we no
> > longer have to worry about the deferred flush causing the
> > pending_gpu_write to be delayed. So we can instead utilize the known
> > last_write_seqno to hopefully minimise the wait times.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> I like this, but I'd prefer if this would be at the end of the series as a
> neat improvement - I'd really prefer if we get together a somewhat minimal
> fix to take care of the flushing list and intel_begin_ring interruptable
> patch.
> 
> The reason is that the merge window approaches fast, and if we're unlucky
> the current -next cycle won't make it into 3.6, so I'd have to send Dave a
> smaller pile of fixes. Which this doesn't really look like.
> 
> Or do I again miss something? It's not really my brightest day ;-)

This is just to enable removing the pending_gpu_write bit, and really if
the flushing list removal wasn't to enable this patch, what was the
point? :-p
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj
  2012-07-13 15:37   ` Daniel Vetter
@ 2012-07-14  9:55     ` Chris Wilson
  2012-07-14 11:53       ` Daniel Vetter
  0 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2012-07-14  9:55 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Ben Widawsky

On Fri, 13 Jul 2012 17:37:14 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Fri, Jul 13, 2012 at 02:14:05PM +0100, Chris Wilson wrote:
> > Otherwise we end up trying to unpin a freed object and BUG.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ben Widawsky <ben@bwidawsk.net>
> 
> Afact this patch contains quite some code refactoring that does not
> relate directly to the fix (or if it does, I fail to see the direct
> relevance). So I think this either needs an explanation in the commit
> message or be put into a separate patch (I agree though for actual code
> cleanups).
> 
> For the fix itself I seem to be a bit dense again - the only thing I see
> is that you move the refcount handling into do_switch. Afacs we do the
> ref-handling in both cases only when do_switch is successful, and also
> right at the end of do_switch (or right afterwards). So can you please
> enlighten your clueless maintainer a bit an explain how things blow up?

The fix is that the reference handling was only done on one path, not
both. Hence the default_ctx ends up being used-after-free.

The rest of it was just unwinding the code to get to finding the bug...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped
  2012-07-13 15:46   ` Daniel Vetter
@ 2012-07-14 10:24     ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2012-07-14 10:24 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, 13 Jul 2012 17:46:20 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Fri, Jul 13, 2012 at 02:14:08PM +0100, Chris Wilson wrote:
> > If we drop the breadcrumb request after a batch due to a signal for
> > example we aim to fix it up at the next opportunity. In this case we
> > emit a second batchbuffer with no waits upon the first and so no
> > opportunity to insert the missing request, so we need to emit the
> > missing flush for coherency. (Note that that invalidating the render
> > cache is the same as flushing it, so there should have been no
> > observable corruption.)
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Imo still too meager commit message ;-) As I've said in the previous mail,
> I'd like some mention of the two commits that made this disaster possible
> (put the blame on me where it is due). And I think some more in-detail
> walk-thru of how things blow up would be great. And the Bugzilla link for
> the QA bugreport.

Sure, in the patch I thought I was sending I had an extra paragraph:

    As a side effect this will also paper over issues such as
      https://bugs.freedesktop.org/show_bug.cgi?id=52040
    whereby we clear the write_domain on objects on the defunct
    gpu_write_list.
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=52040

> Also, I still don't understand why this patch here isn't enough to fix up
> the fallout. So if you can enlighten me where/why stuff blows up even with
> this I'd highly appreciate. Not just because not understanding bugs makes
> me queasy, but also to have a clear picture of what I'd need to send to
> Dave it this -next cycle misses 3.6.

The remaining fallout is that we still end up using the flushing-list,
as revealed by *adding* a WARN.

To end up in that situation we must retire an object with a write-domain
still set. But how can this be possible if we always clear the
write_list prior to the request/retirment?

I thought I had it, being sneaky with the use of INSTRUCTION write
domain for pipe-control. However, looks like I'm going to have to
reproduce with some more debugging.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj
  2012-07-14  9:55     ` Chris Wilson
@ 2012-07-14 11:53       ` Daniel Vetter
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2012-07-14 11:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Ben Widawsky

On Sat, Jul 14, 2012 at 10:55:19AM +0100, Chris Wilson wrote:
> On Fri, 13 Jul 2012 17:37:14 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Fri, Jul 13, 2012 at 02:14:05PM +0100, Chris Wilson wrote:
> > > Otherwise we end up trying to unpin a freed object and BUG.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Ben Widawsky <ben@bwidawsk.net>
> > 
> > Afact this patch contains quite some code refactoring that does not
> > relate directly to the fix (or if it does, I fail to see the direct
> > relevance). So I think this either needs an explanation in the commit
> > message or be put into a separate patch (I agree though for actual code
> > cleanups).
> > 
> > For the fix itself I seem to be a bit dense again - the only thing I see
> > is that you move the refcount handling into do_switch. Afacs we do the
> > ref-handling in both cases only when do_switch is successful, and also
> > right at the end of do_switch (or right afterwards). So can you please
> > enlighten your clueless maintainer a bit an explain how things blow up?
> 
> The fix is that the reference handling was only done on one path, not
> both. Hence the default_ctx ends up being used-after-free.
> 
> The rest of it was just unwinding the code to get to finding the bug...

Yeah, I've figured this out somewhat later yesterday. Still, a bugfix
shouldn't resemble a riddle more than a simple patch ... Afacs we only
need to move the reference count handling from i915_switch_context to
do_switch (and mention in the commit message that it's been missing when
creating the default context).
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation
  2012-07-13 13:14 ` [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation Chris Wilson
  2012-07-13 15:28   ` Ben Widawsky
  2012-07-13 15:54   ` Daniel Vetter
@ 2012-07-14 11:58   ` Daniel Vetter
  2012-07-14 12:48     ` Chris Wilson
  2 siblings, 1 reply; 29+ messages in thread
From: Daniel Vetter @ 2012-07-14 11:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Ben Widawsky

On Fri, Jul 13, 2012 at 02:14:04PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   12 ++++++++++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9ae3f2c..90857f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -225,6 +225,13 @@ static int create_default_context(struct drm_i915_private *dev_priv)
>  		return ret;
>  	}
>  
> +	ret = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
> +	if (ret) {
> +		i915_gem_object_unpin(ctx->obj);
> +		do_destroy(ctx);
> +		return ret;
> +	}
> +
>  	ret = do_switch(NULL, ctx, 0);
>  	if (ret) {
>  		i915_gem_object_unpin(ctx->obj);
> @@ -396,8 +403,6 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from_obj != NULL) {
> -		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_gem_object_move_to_active(from_obj, ring, seqno);
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -405,6 +410,9 @@ static int do_switch(struct drm_i915_gem_object *from_obj,
>  		 * able to defer doing this until we know the object would be
>  		 * swapped, but there is no way to do that yet.
>  		 */
> +		from_obj->base.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
> +		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;

Presuming I understand things correctly, setting write_domain to non-zero
will result in the ctx object landing on the flushing list when we retire
it from the active list. But it isn't being added to the ring's
gpu_write_list, so it won't ever get off that flushing list and eventually
result in the BUG_ON(seqno == 0) when we try to wait for it after a flush.

So afact this first patch here seems to add another instance of the very
bug this patch series tries squash ... Additionally I'm still hunting for
that other failure case, which can't be fixed by adding the flush in
execbuffer if ring->gpu_caches_dirty is set.

/me is still lost

-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation
  2012-07-14 11:58   ` Daniel Vetter
@ 2012-07-14 12:48     ` Chris Wilson
  2012-07-14 12:59       ` Daniel Vetter
  0 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2012-07-14 12:48 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Ben Widawsky

On Sat, 14 Jul 2012 13:58:58 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> So afact this first patch here seems to add another instance of the very
> bug this patch series tries squash ... Additionally I'm still hunting for
> that other failure case, which can't be fixed by adding the flush in
> execbuffer if ring->gpu_caches_dirty is set.
> 
> /me is still lost

Now all that you are missing is that we only flush GPU_DOMAINS when on
the gpu_write_list.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation
  2012-07-14 12:48     ` Chris Wilson
@ 2012-07-14 12:59       ` Daniel Vetter
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2012-07-14 12:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Ben Widawsky

On Sat, Jul 14, 2012 at 2:48 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Sat, 14 Jul 2012 13:58:58 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
>> So afact this first patch here seems to add another instance of the very
>> bug this patch series tries squash ... Additionally I'm still hunting for
>> that other failure case, which can't be fixed by adding the flush in
>> execbuffer if ring->gpu_caches_dirty is set.
>>
>> /me is still lost
>
> Now all that you are missing is that we only flush GPU_DOMAINS when on
> the gpu_write_list.

Hm, I guess we've seen that one before ;-) But that would require that
we add an object with a non-gpu write domain to the active list, but
both with or without this patch I don't see how that should happen ...

I guess a WARN in move_to_active won't hurt (and we should have done
that when fixing the GTT_DOMAIN relocation hole in execbuf), but
besides that I'm still as dense as ever, it seems.
-Daniel
-- 
Daniel Vetter
daniel.vetter@ffwll.ch - +41 (0) 79 364 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped
  2012-07-13 13:14 ` [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped Chris Wilson
  2012-07-13 15:46   ` Daniel Vetter
@ 2012-07-14 13:39   ` Daniel Vetter
  1 sibling, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2012-07-14 13:39 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jul 13, 2012 at 02:14:08PM +0100, Chris Wilson wrote:
> If we drop the breadcrumb request after a batch due to a signal for
> example we aim to fix it up at the next opportunity. In this case we
> emit a second batchbuffer with no waits upon the first and so no
> opportunity to insert the missing request, so we need to emit the
> missing flush for coherency. (Note that that invalidating the render
> cache is the same as flushing it, so there should have been no
> observable corruption.)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Ok, now that I've got some more clue about how this all blows up, I've
merged this patch here (with a rather decently pimped commit message).
Thanks for digging into this & feeding me the lacking clue.

Cheers, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2012-07-14 13:40 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-13 13:14 Remove defunct flushing list (v2) Chris Wilson
2012-07-13 13:14 ` [PATCH 01/13] drm/i915: Flush the context object from the CPU caches upon creation Chris Wilson
2012-07-13 15:28   ` Ben Widawsky
2012-07-13 15:54   ` Daniel Vetter
2012-07-14  9:38     ` Chris Wilson
2012-07-14 11:58   ` Daniel Vetter
2012-07-14 12:48     ` Chris Wilson
2012-07-14 12:59       ` Daniel Vetter
2012-07-13 13:14 ` [PATCH 02/13] drm/i915: fix invalid reference handling of the default ctx obj Chris Wilson
2012-07-13 15:25   ` Ben Widawsky
2012-07-13 15:37   ` Daniel Vetter
2012-07-14  9:55     ` Chris Wilson
2012-07-14 11:53       ` Daniel Vetter
2012-07-13 13:14 ` [PATCH 03/13] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
2012-07-13 13:14 ` [PATCH 04/13] drm/i915: Replace the pending_gpu_write flag with an explicit seqno Chris Wilson
2012-07-13 15:41   ` Daniel Vetter
2012-07-14  9:53     ` Chris Wilson
2012-07-13 13:14 ` [PATCH 05/13] drm/i915: Insert a flush between batches if the breadcrumb was dropped Chris Wilson
2012-07-13 15:46   ` Daniel Vetter
2012-07-14 10:24     ` Chris Wilson
2012-07-14 13:39   ` Daniel Vetter
2012-07-13 13:14 ` [PATCH 06/13] drm/i915: Remove the defunct flushing list Chris Wilson
2012-07-13 13:14 ` [PATCH 07/13] drm/i915: Remove the per-ring write list Chris Wilson
2012-07-13 13:14 ` [PATCH 08/13] drm/i915: Remove explicit flush from i915_gem_object_flush_fence() Chris Wilson
2012-07-13 13:14 ` [PATCH 09/13] drm/i915: Remove the explicit flush of the GPU write domain Chris Wilson
2012-07-13 13:14 ` [PATCH 10/13] drm/i915: Replace the complex flushing logic with simple invalidate/flush all Chris Wilson
2012-07-13 13:14 ` [PATCH 11/13] drm/i915: Clear the pending_gpu_fenced_access flag at the start of execbuffer Chris Wilson
2012-07-13 13:14 ` [PATCH 12/13] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs Chris Wilson
2012-07-13 13:14 ` [PATCH 13/13] drm/i915: Move the write seqno handling to move_to_active Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.