All of lore.kernel.org
 help / color / mirror / Atom feed
* Kill the flushing list
@ 2012-07-12 15:13 Chris Wilson
  2012-07-12 15:13 ` [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 15:13 UTC (permalink / raw)
  To: intel-gfx

So it appears I alone have been hitting the WARNs left inplace to flag
breakage from the initial prep work for the flushing list. Thanks to
this dubious honour, I've reviewed that work and decided to finish the
removal in order to kill the WARNs.

Please review, review, test, review.
-Chris

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request()
  2012-07-12 15:13 Kill the flushing list Chris Wilson
@ 2012-07-12 15:13 ` Chris Wilson
  2012-07-12 17:43   ` Daniel Vetter
  2012-07-12 18:02   ` [PATCH] " Chris Wilson
  2012-07-12 15:13 ` [PATCH 2/6] drm/i915: Remove the defunct flushing list Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 15:13 UTC (permalink / raw)
  To: intel-gfx

Request preallocation was added to i915_add_request() in order to
support the overlay. However, not all uses care and can quite happily
ignore the failure to allocate the request as they will simply repeat
the request in the future.

By pushing the allocation down into i915_add_request(), we can then
remove some rather ugly error handling in the callers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    6 ++---
 drivers/gpu/drm/i915/i915_gem.c            |   39 ++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +----
 3 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 627fe35..51e234e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1358,9 +1358,9 @@ void i915_gem_init_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_idle(struct drm_device *dev);
-int __must_check i915_add_request(struct intel_ring_buffer *ring,
-				  struct drm_file *file,
-				  struct drm_i915_gem_request *request);
+int i915_add_request(struct intel_ring_buffer *ring,
+		     struct drm_file *file,
+		     struct drm_i915_gem_request *request);
 int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
 				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53c3946..875b745 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1597,7 +1597,6 @@ i915_add_request(struct intel_ring_buffer *ring,
 		ring->gpu_caches_dirty = false;
 	}
 
-	BUG_ON(request == NULL);
 	seqno = i915_gem_next_request_seqno(ring);
 
 	/* Record the position of the start of the request so that
@@ -1609,7 +1608,13 @@ i915_add_request(struct intel_ring_buffer *ring,
 
 	ret = ring->add_request(ring, &seqno);
 	if (ret)
-	    return ret;
+		return ret;
+
+	if (request == NULL) {
+		request = kmalloc(sizeof(*request), GFP_KERNEL);
+		if (request == NULL)
+			return -ENOMEM;
+	}
 
 	trace_i915_gem_request_add(ring, seqno);
 
@@ -1859,14 +1864,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	 */
 	idle = true;
 	for_each_ring(ring, dev_priv, i) {
-		if (ring->gpu_caches_dirty) {
-			struct drm_i915_gem_request *request;
-
-			request = kzalloc(sizeof(*request), GFP_KERNEL);
-			if (request == NULL ||
-			    i915_add_request(ring, NULL, request))
-			    kfree(request);
-		}
+		if (ring->gpu_caches_dirty)
+			i915_add_request(ring, NULL, NULL);
 
 		idle &= list_empty(&ring->request_list);
 	}
@@ -1913,25 +1912,13 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv,
 static int
 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 {
-	int ret = 0;
+	int ret;
 
 	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
 
-	if (seqno == ring->outstanding_lazy_request) {
-		struct drm_i915_gem_request *request;
-
-		request = kzalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
-
-		ret = i915_add_request(ring, NULL, request);
-		if (ret) {
-			kfree(request);
-			return ret;
-		}
-
-		BUG_ON(seqno != request->seqno);
-	}
+	ret = 0;
+	if (seqno == ring->outstanding_lazy_request)
+		ret = i915_add_request(ring, NULL, NULL);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 88e2e11..0c26692 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -967,16 +967,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct drm_file *file,
 				    struct intel_ring_buffer *ring)
 {
-	struct drm_i915_gem_request *request;
-
 	/* Unconditionally force add_request to emit a full flush. */
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	request = kzalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL || i915_add_request(ring, file, request)) {
-		kfree(request);
-	}
+	(void)i915_add_request(ring, file, NULL);
 }
 
 static int
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/6] drm/i915: Remove the defunct flushing list
  2012-07-12 15:13 Kill the flushing list Chris Wilson
  2012-07-12 15:13 ` [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
@ 2012-07-12 15:13 ` Chris Wilson
  2012-07-13  9:12   ` Daniel Vetter
  2012-07-12 15:13 ` [PATCH 3/6] drm/i915: Remove the per-ring write list Chris Wilson
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 15:13 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |    7 ----
 drivers/gpu/drm/i915/i915_drv.h       |   19 +++--------
 drivers/gpu/drm/i915/i915_gem.c       |   57 ++++++---------------------------
 drivers/gpu/drm/i915/i915_gem_evict.c |   20 ------------
 4 files changed, 13 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 359f6e8..d149890 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -44,7 +44,6 @@
 
 enum {
 	ACTIVE_LIST,
-	FLUSHING_LIST,
 	INACTIVE_LIST,
 	PINNED_LIST,
 };
@@ -177,10 +176,6 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 		seq_printf(m, "Inactive:\n");
 		head = &dev_priv->mm.inactive_list;
 		break;
-	case FLUSHING_LIST:
-		seq_printf(m, "Flushing:\n");
-		head = &dev_priv->mm.flushing_list;
-		break;
 	default:
 		mutex_unlock(&dev->struct_mutex);
 		return -EINVAL;
@@ -238,7 +233,6 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
 
 	size = count = mappable_size = mappable_count = 0;
 	count_objects(&dev_priv->mm.active_list, mm_list);
-	count_objects(&dev_priv->mm.flushing_list, mm_list);
 	seq_printf(m, "  %u [%u] active objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
@@ -2006,7 +2000,6 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_gtt", i915_gem_gtt_info, 0},
 	{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
 	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
-	{"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST},
 	{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
 	{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
 	{"i915_gem_request", i915_gem_request_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 51e234e..38b95be 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -696,17 +696,6 @@ typedef struct drm_i915_private {
 		struct list_head active_list;
 
 		/**
-		 * List of objects which are not in the ringbuffer but which
-		 * still have a write_domain which needs to be flushed before
-		 * unbinding.
-		 *
-		 * last_rendering_seqno is 0 while an object is in this list.
-		 *
-		 * A reference is held on the buffer while on this list.
-		 */
-		struct list_head flushing_list;
-
-		/**
 		 * LRU list of objects which are not in the ringbuffer and
 		 * are ready to unbind, but are still in the GTT.
 		 *
@@ -873,7 +862,7 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *gtt_space;
 	struct list_head gtt_list;
 
-	/** This object's place on the active/flushing/inactive lists */
+	/** This object's place on the active/inactive lists */
 	struct list_head ring_list;
 	struct list_head mm_list;
 	/** This object's place on GPU write list */
@@ -882,9 +871,9 @@ struct drm_i915_gem_object {
 	struct list_head exec_list;
 
 	/**
-	 * This is set if the object is on the active or flushing lists
-	 * (has pending rendering), and is not set if it's on inactive (ready
-	 * to be unbound).
+	 * This is set if the object is on the active lists (has pending
+	 * rendering and so a non-zero seqno), and is not set if it i s on
+	 * inactive (ready to be unbound) list.
 	 */
 	unsigned int active:1;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 875b745..f69d897 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1458,26 +1458,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 }
 
 static void
-i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
-{
-	list_del_init(&obj->ring_list);
-	obj->last_rendering_seqno = 0;
-	obj->last_fenced_seqno = 0;
-}
-
-static void
-i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-
-	BUG_ON(!obj->active);
-	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
-
-	i915_gem_object_move_off_active(obj);
-}
-
-static void
 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
@@ -1487,13 +1467,18 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 
 	BUG_ON(!list_empty(&obj->gpu_write_list));
 	BUG_ON(!obj->active);
+
+	list_del_init(&obj->ring_list);
 	obj->ring = NULL;
 
-	i915_gem_object_move_off_active(obj);
+	obj->last_rendering_seqno = 0;
+	obj->last_fenced_seqno = 0;
 	obj->fenced_gpu_access = false;
 
-	obj->active = 0;
+	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 	obj->pending_gpu_write = false;
+
+	obj->active = 0;
 	drm_gem_object_unreference(&obj->base);
 
 	WARN_ON(i915_verify_lists(dev));
@@ -1728,20 +1713,6 @@ void i915_gem_reset(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i)
 		i915_gem_reset_ring_lists(dev_priv, ring);
 
-	/* Remove anything from the flushing lists. The GPU cache is likely
-	 * to be lost on reset along with the data, so simply move the
-	 * lost bo to the inactive list.
-	 */
-	while (!list_empty(&dev_priv->mm.flushing_list)) {
-		obj = list_first_entry(&dev_priv->mm.flushing_list,
-				      struct drm_i915_gem_object,
-				      mm_list);
-
-		obj->base.write_domain = 0;
-		list_del_init(&obj->gpu_write_list);
-		i915_gem_object_move_to_inactive(obj);
-	}
-
 	/* Move everything out of the GPU domains to ensure we do any
 	 * necessary invalidation upon reuse.
 	 */
@@ -1812,10 +1783,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 		if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
 			break;
 
-		if (obj->base.write_domain != 0)
-			i915_gem_object_move_to_flushing(obj);
-		else
-			i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_move_to_inactive(obj);
 	}
 
 	if (unlikely(ring->trace_irq_seqno &&
@@ -3877,7 +3845,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	}
 
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
 	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
 	mutex_unlock(&dev->struct_mutex);
 
@@ -3935,7 +3902,6 @@ i915_gem_load(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
-	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
@@ -4186,12 +4152,7 @@ static int
 i915_gpu_is_active(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	int lists_empty;
-
-	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
-		      list_empty(&dev_priv->mm.active_list);
-
-	return !lists_empty;
+	return !list_empty(&dev_priv->mm.active_list);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index ae7c24e..f61af59 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -92,23 +92,6 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 
 	/* Now merge in the soon-to-be-expired objects... */
 	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		/* Does the object require an outstanding flush? */
-		if (obj->base.write_domain)
-			continue;
-
-		if (mark_free(obj, &unwind_list))
-			goto found;
-	}
-
-	/* Finally add anything with a pending flush (in order of retirement) */
-	list_for_each_entry(obj, &dev_priv->mm.flushing_list, mm_list) {
-		if (mark_free(obj, &unwind_list))
-			goto found;
-	}
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		if (!obj->base.write_domain)
-			continue;
-
 		if (mark_free(obj, &unwind_list))
 			goto found;
 	}
@@ -171,7 +154,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
 	int ret;
 
 	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
-		       list_empty(&dev_priv->mm.flushing_list) &&
 		       list_empty(&dev_priv->mm.active_list));
 	if (lists_empty)
 		return -ENOSPC;
@@ -188,8 +170,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
 
 	i915_gem_retire_requests(dev);
 
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
-
 	/* Having flushed everything, unbind() should never raise an error */
 	list_for_each_entry_safe(obj, next,
 				 &dev_priv->mm.inactive_list, mm_list) {
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/6] drm/i915: Remove the per-ring write list
  2012-07-12 15:13 Kill the flushing list Chris Wilson
  2012-07-12 15:13 ` [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
  2012-07-12 15:13 ` [PATCH 2/6] drm/i915: Remove the defunct flushing list Chris Wilson
@ 2012-07-12 15:13 ` Chris Wilson
  2012-07-12 19:37   ` Daniel Vetter
  2012-07-12 15:13 ` [PATCH 4/6] drm/i915: Remove explicit flush from i915_gem_object_flush_fence() Chris Wilson
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 15:13 UTC (permalink / raw)
  To: intel-gfx

This is now handled by a global flag to ensure we emit a flush before
the next serialisation point (if we failed to queue one previously).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    2 -
 drivers/gpu/drm/i915/i915_gem.c            |   55 ++--------------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   10 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.c    |    2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h    |    9 -----
 5 files changed, 7 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 38b95be..7871760 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -865,8 +865,6 @@ struct drm_i915_gem_object {
 	/** This object's place on the active/inactive lists */
 	struct list_head ring_list;
 	struct list_head mm_list;
-	/** This object's place on GPU write list */
-	struct list_head gpu_write_list;
 	/** This object's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f69d897..535dd61 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1465,7 +1465,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 
 	list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	BUG_ON(!list_empty(&obj->gpu_write_list));
 	BUG_ON(!obj->active);
 
 	list_del_init(&obj->ring_list);
@@ -1510,30 +1509,6 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
 	return obj->madv == I915_MADV_DONTNEED;
 }
 
-static void
-i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
-			       uint32_t flush_domains)
-{
-	struct drm_i915_gem_object *obj, *next;
-
-	list_for_each_entry_safe(obj, next,
-				 &ring->gpu_write_list,
-				 gpu_write_list) {
-		if (obj->base.write_domain & flush_domains) {
-			uint32_t old_write_domain = obj->base.write_domain;
-
-			obj->base.write_domain = 0;
-			list_del_init(&obj->gpu_write_list);
-			i915_gem_object_move_to_active(obj, ring,
-						       i915_gem_next_request_seqno(ring));
-
-			trace_i915_gem_object_change_domain(obj,
-							    obj->base.read_domains,
-							    old_write_domain);
-		}
-	}
-}
-
 static u32
 i915_gem_get_seqno(struct drm_device *dev)
 {
@@ -1633,8 +1608,6 @@ i915_add_request(struct intel_ring_buffer *ring,
 					   &dev_priv->mm.retire_work, HZ);
 	}
 
-	WARN_ON(!list_empty(&ring->gpu_write_list));
-
 	return 0;
 }
 
@@ -1677,7 +1650,7 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
 				       ring_list);
 
 		obj->base.write_domain = 0;
-		list_del_init(&obj->gpu_write_list);
+		obj->pending_gpu_write = false;
 		i915_gem_object_move_to_inactive(obj);
 	}
 }
@@ -2005,11 +1978,6 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
-	/* This function only exists to support waiting for existing rendering,
-	 * not for emitting required flushes.
-	 */
-	BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
-
 	/* If there is rendering queued on the buffer being evicted, wait for
 	 * it.
 	 */
@@ -2017,6 +1985,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
 		ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno);
 		if (ret)
 			return ret;
+
 		i915_gem_retire_requests_ring(obj->ring);
 	}
 
@@ -2288,26 +2257,14 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,
 	if (ret)
 		return ret;
 
-	if (flush_domains & I915_GEM_GPU_DOMAINS)
-		i915_gem_process_flushing_list(ring, flush_domains);
-
 	return 0;
 }
 
 static int i915_ring_idle(struct intel_ring_buffer *ring)
 {
-	int ret;
-
-	if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
+	if (list_empty(&ring->active_list))
 		return 0;
 
-	if (!list_empty(&ring->gpu_write_list)) {
-		ret = i915_gem_flush_ring(ring,
-				    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
-		if (ret)
-			return ret;
-	}
-
 	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
 }
 
@@ -2323,10 +2280,6 @@ int i915_gpu_idle(struct drm_device *dev)
 		if (ret)
 			return ret;
 
-		/* Is the device fubar? */
-		if (WARN_ON(!list_empty(&ring->gpu_write_list)))
-			return -EBUSY;
-
 		ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
 		if (ret)
 			return ret;
@@ -3471,7 +3424,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	INIT_LIST_HEAD(&obj->gtt_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->exec_list);
-	INIT_LIST_HEAD(&obj->gpu_write_list);
 	obj->madv = I915_MADV_WILLNEED;
 	/* Avoid an unnecessary call to unbind on the first bind. */
 	obj->map_and_fenceable = true;
@@ -3892,7 +3844,6 @@ init_ring_lists(struct intel_ring_buffer *ring)
 {
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
-	INIT_LIST_HEAD(&ring->gpu_write_list);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0c26692..ca48978 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -938,9 +938,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 	struct drm_i915_gem_object *obj;
 
 	list_for_each_entry(obj, objects, exec_list) {
-		  u32 old_read = obj->base.read_domains;
-		  u32 old_write = obj->base.write_domain;
-
+		u32 old_read = obj->base.read_domains;
+		u32 old_write = obj->base.write_domain;
 
 		obj->base.read_domains = obj->base.pending_read_domains;
 		obj->base.write_domain = obj->base.pending_write_domain;
@@ -950,11 +949,10 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
 			obj->pending_gpu_write = true;
-			list_move_tail(&obj->gpu_write_list,
-				       &ring->gpu_write_list);
 			if (obj->pin_count) /* check for potential scanout */
 				intel_mark_busy(ring->dev, obj);
-		}
+		} else
+			obj->pending_gpu_write = false;
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ddc4859..7c166ff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1001,7 +1001,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
-	INIT_LIST_HEAD(&ring->gpu_write_list);
 	ring->size = 32 * PAGE_SIZE;
 
 	init_waitqueue_head(&ring->irq_queue);
@@ -1472,7 +1471,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
-	INIT_LIST_HEAD(&ring->gpu_write_list);
 
 	ring->size = size;
 	ring->effective_size = ring->size;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1d3c81f..7986f30 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -101,15 +101,6 @@ struct  intel_ring_buffer {
 	struct list_head request_list;
 
 	/**
-	 * List of objects currently pending a GPU write flush.
-	 *
-	 * All elements on this list will belong to either the
-	 * active_list or flushing_list, last_rendering_seqno can
-	 * be used to differentiate between the two elements.
-	 */
-	struct list_head gpu_write_list;
-
-	/**
 	 * Do we have some not yet emitted requests outstanding?
 	 */
 	u32 outstanding_lazy_request;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/6] drm/i915: Remove explicit flush from i915_gem_object_flush_fence()
  2012-07-12 15:13 Kill the flushing list Chris Wilson
                   ` (2 preceding siblings ...)
  2012-07-12 15:13 ` [PATCH 3/6] drm/i915: Remove the per-ring write list Chris Wilson
@ 2012-07-12 15:13 ` Chris Wilson
  2012-07-12 15:13 ` [PATCH 5/6] drm/i915: Remove the explicit flush of the GPU write domain Chris Wilson
  2012-07-12 15:13 ` [PATCH 6/6] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs Chris Wilson
  5 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 15:13 UTC (permalink / raw)
  To: intel-gfx

As the flush is either performed explictly immediately after the
execbuffer dispatch, or before the serialisation of last_fenced_seqno we
can forgo the explict i915_gem_flush_ring().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 535dd61..e574835 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2455,21 +2455,8 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 static int
 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 {
-	int ret;
-
-	if (obj->fenced_gpu_access) {
-		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-			ret = i915_gem_flush_ring(obj->ring,
-						  0, obj->base.write_domain);
-			if (ret)
-				return ret;
-		}
-
-		obj->fenced_gpu_access = false;
-	}
-
 	if (obj->last_fenced_seqno) {
-		ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
+		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
 		if (ret)
 			return ret;
 
@@ -2482,6 +2469,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 	if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
 		mb();
 
+	obj->fenced_gpu_access = false;
 	return 0;
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 5/6] drm/i915: Remove the explicit flush of the GPU write domain
  2012-07-12 15:13 Kill the flushing list Chris Wilson
                   ` (3 preceding siblings ...)
  2012-07-12 15:13 ` [PATCH 4/6] drm/i915: Remove explicit flush from i915_gem_object_flush_fence() Chris Wilson
@ 2012-07-12 15:13 ` Chris Wilson
  2012-07-12 15:13 ` [PATCH 6/6] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs Chris Wilson
  5 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 15:13 UTC (permalink / raw)
  To: intel-gfx

Rely instead on the insertion of the implicit flush before the seqno
breadcrumb.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e574835..b22bfc8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -37,7 +37,6 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
-static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
@@ -2003,10 +2002,6 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 	int ret;
 
 	if (obj->active) {
-		ret = i915_gem_object_flush_gpu_write_domain(obj);
-		if (ret)
-			return ret;
-
 		ret = i915_gem_check_olr(obj->ring,
 					 obj->last_rendering_seqno);
 		if (ret)
@@ -2764,17 +2759,6 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
 }
 
-/** Flushes any GPU write domain for the object if it's dirty. */
-static int
-i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
-{
-	if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
-		return 0;
-
-	/* Queue the GPU write cache flushing we need. */
-	return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
-}
-
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -2841,10 +2825,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
-	ret = i915_gem_object_flush_gpu_write_domain(obj);
-	if (ret)
-		return ret;
-
 	if (obj->pending_gpu_write || write) {
 		ret = i915_gem_object_wait_rendering(obj);
 		if (ret)
@@ -2957,10 +2937,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	u32 old_read_domains, old_write_domain;
 	int ret;
 
-	ret = i915_gem_object_flush_gpu_write_domain(obj);
-	if (ret)
-		return ret;
-
 	if (pipelined != obj->ring) {
 		ret = i915_gem_object_sync(obj, pipelined);
 		if (ret)
@@ -3014,12 +2990,6 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
 		return 0;
 
-	if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-		ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
-		if (ret)
-			return ret;
-	}
-
 	ret = i915_gem_object_wait_rendering(obj);
 	if (ret)
 		return ret;
@@ -3044,10 +3014,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return 0;
 
-	ret = i915_gem_object_flush_gpu_write_domain(obj);
-	if (ret)
-		return ret;
-
 	if (write || obj->pending_gpu_write) {
 		ret = i915_gem_object_wait_rendering(obj);
 		if (ret)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 6/6] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs
  2012-07-12 15:13 Kill the flushing list Chris Wilson
                   ` (4 preceding siblings ...)
  2012-07-12 15:13 ` [PATCH 5/6] drm/i915: Remove the explicit flush of the GPU write domain Chris Wilson
@ 2012-07-12 15:13 ` Chris Wilson
  5 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 15:13 UTC (permalink / raw)
  To: intel-gfx

By moving the function to intel_ringbuffer and currying the appropriate
parameter, hopefully we make the callsites easier to read and
understand.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    3 ---
 drivers/gpu/drm/i915/i915_gem.c            |   21 +--------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |   14 ++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h    |    2 ++
 5 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7871760..ee037e2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1261,9 +1261,6 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
 int i915_gem_init_object(struct drm_gem_object *obj);
-int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring,
-				     uint32_t invalidate_domains,
-				     uint32_t flush_domains);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b22bfc8..05635ed 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1549,7 +1549,7 @@ i915_add_request(struct intel_ring_buffer *ring,
 	 * what.
 	 */
 	if (ring->gpu_caches_dirty) {
-		ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS);
+		ret = intel_ring_flush_all_caches(ring);
 		if (ret)
 			return ret;
 
@@ -2236,25 +2236,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-int
-i915_gem_flush_ring(struct intel_ring_buffer *ring,
-		    uint32_t invalidate_domains,
-		    uint32_t flush_domains)
-{
-	int ret;
-
-	if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
-		return 0;
-
-	trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
-
-	ret = ring->flush(ring, invalidate_domains, flush_domains);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static int i915_ring_idle(struct intel_ring_buffer *ring)
 {
 	if (list_empty(&ring->active_list))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ca48978..101abc7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -886,7 +886,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 	}
 
 	/* Unconditionally invalidate gpu caches. */
-	ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, 0);
+	ret = intel_ring_invalidate_all_caches(ring);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7c166ff..ec57ee2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1563,3 +1563,17 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 
 	return intel_init_ring_buffer(dev, ring);
 }
+
+int
+intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
+{
+	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	return ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+}
+
+int
+intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
+{
+	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, 0);
+	return ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7986f30..8b2b92e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -195,6 +195,8 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring,
 void intel_ring_advance(struct intel_ring_buffer *ring);
 
 u32 intel_ring_get_seqno(struct intel_ring_buffer *ring);
+int intel_ring_flush_all_caches(struct intel_ring_buffer *ring);
+int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request()
  2012-07-12 15:13 ` [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
@ 2012-07-12 17:43   ` Daniel Vetter
  2012-07-12 17:56     ` Chris Wilson
  2012-07-12 18:02   ` [PATCH] " Chris Wilson
  1 sibling, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2012-07-12 17:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Jul 12, 2012 at 04:13:34PM +0100, Chris Wilson wrote:
> Request preallocation was added to i915_add_request() in order to
> support the overlay. However, not all uses care and can quite happily
> ignore the failure to allocate the request as they will simply repeat
> the request in the future.
> 
> By pushing the allocation down into i915_add_request(), we can then
> remove some rather ugly error handling in the callers.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |    6 ++---
>  drivers/gpu/drm/i915/i915_gem.c            |   39 ++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +----
>  3 files changed, 17 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 627fe35..51e234e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1358,9 +1358,9 @@ void i915_gem_init_ppgtt(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_idle(struct drm_device *dev);
> -int __must_check i915_add_request(struct intel_ring_buffer *ring,
> -				  struct drm_file *file,
> -				  struct drm_i915_gem_request *request);
> +int i915_add_request(struct intel_ring_buffer *ring,
> +		     struct drm_file *file,
> +		     struct drm_i915_gem_request *request);
>  int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
>  				 uint32_t seqno);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 53c3946..875b745 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1597,7 +1597,6 @@ i915_add_request(struct intel_ring_buffer *ring,
>  		ring->gpu_caches_dirty = false;
>  	}
>  
> -	BUG_ON(request == NULL);
>  	seqno = i915_gem_next_request_seqno(ring);
>  
>  	/* Record the position of the start of the request so that
> @@ -1609,7 +1608,13 @@ i915_add_request(struct intel_ring_buffer *ring,
>  
>  	ret = ring->add_request(ring, &seqno);
>  	if (ret)
> -	    return ret;
> +		return ret;
> +
> +	if (request == NULL) {
> +		request = kmalloc(sizeof(*request), GFP_KERNEL);
> +		if (request == NULL)
> +			return -ENOMEM;
> +	}

Hm, shouldn't we try to allocate the request struct before we emit the
request? Otherwise looks good.
-Daniel
>  
>  	trace_i915_gem_request_add(ring, seqno);
>  
> @@ -1859,14 +1864,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
>  	 */
>  	idle = true;
>  	for_each_ring(ring, dev_priv, i) {
> -		if (ring->gpu_caches_dirty) {
> -			struct drm_i915_gem_request *request;
> -
> -			request = kzalloc(sizeof(*request), GFP_KERNEL);
> -			if (request == NULL ||
> -			    i915_add_request(ring, NULL, request))
> -			    kfree(request);
> -		}
> +		if (ring->gpu_caches_dirty)
> +			i915_add_request(ring, NULL, NULL);
>  
>  		idle &= list_empty(&ring->request_list);
>  	}
> @@ -1913,25 +1912,13 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv,
>  static int
>  i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
>  {
> -	int ret = 0;
> +	int ret;
>  
>  	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>  
> -	if (seqno == ring->outstanding_lazy_request) {
> -		struct drm_i915_gem_request *request;
> -
> -		request = kzalloc(sizeof(*request), GFP_KERNEL);
> -		if (request == NULL)
> -			return -ENOMEM;
> -
> -		ret = i915_add_request(ring, NULL, request);
> -		if (ret) {
> -			kfree(request);
> -			return ret;
> -		}
> -
> -		BUG_ON(seqno != request->seqno);
> -	}
> +	ret = 0;
> +	if (seqno == ring->outstanding_lazy_request)
> +		ret = i915_add_request(ring, NULL, NULL);
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 88e2e11..0c26692 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -967,16 +967,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  				    struct drm_file *file,
>  				    struct intel_ring_buffer *ring)
>  {
> -	struct drm_i915_gem_request *request;
> -
>  	/* Unconditionally force add_request to emit a full flush. */
>  	ring->gpu_caches_dirty = true;
>  
>  	/* Add a breadcrumb for the completion of the batch buffer */
> -	request = kzalloc(sizeof(*request), GFP_KERNEL);
> -	if (request == NULL || i915_add_request(ring, file, request)) {
> -		kfree(request);
> -	}
> +	(void)i915_add_request(ring, file, NULL);
>  }
>  
>  static int
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request()
  2012-07-12 17:43   ` Daniel Vetter
@ 2012-07-12 17:56     ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 17:56 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, 12 Jul 2012 19:43:47 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Jul 12, 2012 at 04:13:34PM +0100, Chris Wilson wrote:
> >  	ret = ring->add_request(ring, &seqno);
> >  	if (ret)
> > -	    return ret;
> > +		return ret;
> > +
> > +	if (request == NULL) {
> > +		request = kmalloc(sizeof(*request), GFP_KERNEL);
> > +		if (request == NULL)
> > +			return -ENOMEM;
> > +	}
> 
> Hm, shouldn't we try to allocate the request struct before we emit the
> request? Otherwise looks good.

I considered the ordering immaterial so went with the cleanest error
path.

If the allocation fails after we add the breadcrumb, we will then reuse
the olr for further batches.... Oops.

I was wrong, it matters.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] drm/i915: Allow late allocation of request for i915_add_request()
  2012-07-12 15:13 ` [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
  2012-07-12 17:43   ` Daniel Vetter
@ 2012-07-12 18:02   ` Chris Wilson
  1 sibling, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 18:02 UTC (permalink / raw)
  To: intel-gfx

Request preallocation was added to i915_add_request() in order to
support the overlay. However, not all users care and can quite happily
ignore the failure to allocate the request as they will simply repeat
the request in the future.

By pushing the allocation down into i915_add_request(), we can then
remove some rather ugly error handling in the callers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    6 ++--
 drivers/gpu/drm/i915/i915_gem.c            |   43 +++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +----
 3 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 627fe35..51e234e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1358,9 +1358,9 @@ void i915_gem_init_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_idle(struct drm_device *dev);
-int __must_check i915_add_request(struct intel_ring_buffer *ring,
-				  struct drm_file *file,
-				  struct drm_i915_gem_request *request);
+int i915_add_request(struct intel_ring_buffer *ring,
+		     struct drm_file *file,
+		     struct drm_i915_gem_request *request);
 int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
 				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53c3946..1782369 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1597,7 +1597,12 @@ i915_add_request(struct intel_ring_buffer *ring,
 		ring->gpu_caches_dirty = false;
 	}
 
-	BUG_ON(request == NULL);
+	if (request == NULL) {
+		request = kmalloc(sizeof(*request), GFP_KERNEL);
+		if (request == NULL)
+			return -ENOMEM;
+	}
+
 	seqno = i915_gem_next_request_seqno(ring);
 
 	/* Record the position of the start of the request so that
@@ -1608,8 +1613,10 @@ i915_add_request(struct intel_ring_buffer *ring,
 	request_ring_position = intel_ring_get_tail(ring);
 
 	ret = ring->add_request(ring, &seqno);
-	if (ret)
-	    return ret;
+	if (ret) {
+		kfree(request);
+		return ret;
+	}
 
 	trace_i915_gem_request_add(ring, seqno);
 
@@ -1859,14 +1866,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	 */
 	idle = true;
 	for_each_ring(ring, dev_priv, i) {
-		if (ring->gpu_caches_dirty) {
-			struct drm_i915_gem_request *request;
-
-			request = kzalloc(sizeof(*request), GFP_KERNEL);
-			if (request == NULL ||
-			    i915_add_request(ring, NULL, request))
-			    kfree(request);
-		}
+		if (ring->gpu_caches_dirty)
+			i915_add_request(ring, NULL, NULL);
 
 		idle &= list_empty(&ring->request_list);
 	}
@@ -1913,25 +1914,13 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv,
 static int
 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 {
-	int ret = 0;
+	int ret;
 
 	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
 
-	if (seqno == ring->outstanding_lazy_request) {
-		struct drm_i915_gem_request *request;
-
-		request = kzalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
-
-		ret = i915_add_request(ring, NULL, request);
-		if (ret) {
-			kfree(request);
-			return ret;
-		}
-
-		BUG_ON(seqno != request->seqno);
-	}
+	ret = 0;
+	if (seqno == ring->outstanding_lazy_request)
+		ret = i915_add_request(ring, NULL, NULL);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 88e2e11..0c26692 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -967,16 +967,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct drm_file *file,
 				    struct intel_ring_buffer *ring)
 {
-	struct drm_i915_gem_request *request;
-
 	/* Unconditionally force add_request to emit a full flush. */
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	request = kzalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL || i915_add_request(ring, file, request)) {
-		kfree(request);
-	}
+	(void)i915_add_request(ring, file, NULL);
 }
 
 static int
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/6] drm/i915: Remove the per-ring write list
  2012-07-12 15:13 ` [PATCH 3/6] drm/i915: Remove the per-ring write list Chris Wilson
@ 2012-07-12 19:37   ` Daniel Vetter
  2012-07-12 20:07     ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2012-07-12 19:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Jul 12, 2012 at 04:13:36PM +0100, Chris Wilson wrote:
> This is now handled by a global flag to ensure we emit a flush before
> the next serialisation point (if we failed to queue one previously).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |    2 -
>  drivers/gpu/drm/i915/i915_gem.c            |   55 ++--------------------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   10 ++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |    2 -
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |    9 -----
>  5 files changed, 7 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 38b95be..7871760 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -865,8 +865,6 @@ struct drm_i915_gem_object {
>  	/** This object's place on the active/inactive lists */
>  	struct list_head ring_list;
>  	struct list_head mm_list;
> -	/** This object's place on GPU write list */
> -	struct list_head gpu_write_list;
>  	/** This object's place in the batchbuffer or on the eviction list */
>  	struct list_head exec_list;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f69d897..535dd61 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1465,7 +1465,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  
>  	list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
>  
> -	BUG_ON(!list_empty(&obj->gpu_write_list));
>  	BUG_ON(!obj->active);
>  
>  	list_del_init(&obj->ring_list);
> @@ -1510,30 +1509,6 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
>  	return obj->madv == I915_MADV_DONTNEED;
>  }
>  
> -static void
> -i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
> -			       uint32_t flush_domains)
> -{
> -	struct drm_i915_gem_object *obj, *next;
> -
> -	list_for_each_entry_safe(obj, next,
> -				 &ring->gpu_write_list,
> -				 gpu_write_list) {
> -		if (obj->base.write_domain & flush_domains) {
> -			uint32_t old_write_domain = obj->base.write_domain;
> -
> -			obj->base.write_domain = 0;
> -			list_del_init(&obj->gpu_write_list);
> -			i915_gem_object_move_to_active(obj, ring,
> -						       i915_gem_next_request_seqno(ring));
> -
> -			trace_i915_gem_object_change_domain(obj,
> -							    obj->base.read_domains,
> -							    old_write_domain);
> -		}
> -	}
> -}
> -
>  static u32
>  i915_gem_get_seqno(struct drm_device *dev)
>  {
> @@ -1633,8 +1608,6 @@ i915_add_request(struct intel_ring_buffer *ring,
>  					   &dev_priv->mm.retire_work, HZ);
>  	}
>  
> -	WARN_ON(!list_empty(&ring->gpu_write_list));
> -
>  	return 0;
>  }
>  
> @@ -1677,7 +1650,7 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
>  				       ring_list);
>  
>  		obj->base.write_domain = 0;
> -		list_del_init(&obj->gpu_write_list);
> +		obj->pending_gpu_write = false;
>  		i915_gem_object_move_to_inactive(obj);

Hm, this hunk makes me wonder whether we don't leak some bogus state
accross a gpu reset. Can't we just reuse the move_to_inactive helper here,
ensuring that we consistenly clear up any active state?

Otherwise I couldn't poke any other holes into this patch series, but let
me sleep over it a bit first ...
-Daniel

>  	}
>  }
> @@ -2005,11 +1978,6 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
>  {
>  	int ret;
>  
> -	/* This function only exists to support waiting for existing rendering,
> -	 * not for emitting required flushes.
> -	 */
> -	BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
> -
>  	/* If there is rendering queued on the buffer being evicted, wait for
>  	 * it.
>  	 */
> @@ -2017,6 +1985,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
>  		ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno);
>  		if (ret)
>  			return ret;
> +
>  		i915_gem_retire_requests_ring(obj->ring);
>  	}
>  
> @@ -2288,26 +2257,14 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,
>  	if (ret)
>  		return ret;
>  
> -	if (flush_domains & I915_GEM_GPU_DOMAINS)
> -		i915_gem_process_flushing_list(ring, flush_domains);
> -
>  	return 0;
>  }
>  
>  static int i915_ring_idle(struct intel_ring_buffer *ring)
>  {
> -	int ret;
> -
> -	if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
> +	if (list_empty(&ring->active_list))
>  		return 0;
>  
> -	if (!list_empty(&ring->gpu_write_list)) {
> -		ret = i915_gem_flush_ring(ring,
> -				    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> -		if (ret)
> -			return ret;
> -	}
> -
>  	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
>  }
>  
> @@ -2323,10 +2280,6 @@ int i915_gpu_idle(struct drm_device *dev)
>  		if (ret)
>  			return ret;
>  
> -		/* Is the device fubar? */
> -		if (WARN_ON(!list_empty(&ring->gpu_write_list)))
> -			return -EBUSY;
> -
>  		ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
>  		if (ret)
>  			return ret;
> @@ -3471,7 +3424,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	INIT_LIST_HEAD(&obj->gtt_list);
>  	INIT_LIST_HEAD(&obj->ring_list);
>  	INIT_LIST_HEAD(&obj->exec_list);
> -	INIT_LIST_HEAD(&obj->gpu_write_list);
>  	obj->madv = I915_MADV_WILLNEED;
>  	/* Avoid an unnecessary call to unbind on the first bind. */
>  	obj->map_and_fenceable = true;
> @@ -3892,7 +3844,6 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  {
>  	INIT_LIST_HEAD(&ring->active_list);
>  	INIT_LIST_HEAD(&ring->request_list);
> -	INIT_LIST_HEAD(&ring->gpu_write_list);
>  }
>  
>  void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0c26692..ca48978 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -938,9 +938,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
>  	struct drm_i915_gem_object *obj;
>  
>  	list_for_each_entry(obj, objects, exec_list) {
> -		  u32 old_read = obj->base.read_domains;
> -		  u32 old_write = obj->base.write_domain;
> -
> +		u32 old_read = obj->base.read_domains;
> +		u32 old_write = obj->base.write_domain;
>  
>  		obj->base.read_domains = obj->base.pending_read_domains;
>  		obj->base.write_domain = obj->base.pending_write_domain;
> @@ -950,11 +949,10 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
>  		if (obj->base.write_domain) {
>  			obj->dirty = 1;
>  			obj->pending_gpu_write = true;
> -			list_move_tail(&obj->gpu_write_list,
> -				       &ring->gpu_write_list);
>  			if (obj->pin_count) /* check for potential scanout */
>  				intel_mark_busy(ring->dev, obj);
> -		}
> +		} else
> +			obj->pending_gpu_write = false;
>  
>  		trace_i915_gem_object_change_domain(obj, old_read, old_write);
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ddc4859..7c166ff 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1001,7 +1001,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  	ring->dev = dev;
>  	INIT_LIST_HEAD(&ring->active_list);
>  	INIT_LIST_HEAD(&ring->request_list);
> -	INIT_LIST_HEAD(&ring->gpu_write_list);
>  	ring->size = 32 * PAGE_SIZE;
>  
>  	init_waitqueue_head(&ring->irq_queue);
> @@ -1472,7 +1471,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
>  	ring->dev = dev;
>  	INIT_LIST_HEAD(&ring->active_list);
>  	INIT_LIST_HEAD(&ring->request_list);
> -	INIT_LIST_HEAD(&ring->gpu_write_list);
>  
>  	ring->size = size;
>  	ring->effective_size = ring->size;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 1d3c81f..7986f30 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -101,15 +101,6 @@ struct  intel_ring_buffer {
>  	struct list_head request_list;
>  
>  	/**
> -	 * List of objects currently pending a GPU write flush.
> -	 *
> -	 * All elements on this list will belong to either the
> -	 * active_list or flushing_list, last_rendering_seqno can
> -	 * be used to differentiate between the two elements.
> -	 */
> -	struct list_head gpu_write_list;
> -
> -	/**
>  	 * Do we have some not yet emitted requests outstanding?
>  	 */
>  	u32 outstanding_lazy_request;
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/6] drm/i915: Remove the per-ring write list
  2012-07-12 19:37   ` Daniel Vetter
@ 2012-07-12 20:07     ` Chris Wilson
  2012-07-13  8:34       ` Daniel Vetter
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-07-12 20:07 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Thu, 12 Jul 2012 21:37:16 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Jul 12, 2012 at 04:13:36PM +0100, Chris Wilson wrote:
> >  		obj->base.write_domain = 0;
> > -		list_del_init(&obj->gpu_write_list);
> > +		obj->pending_gpu_write = false;
> >  		i915_gem_object_move_to_inactive(obj);
> 
> Hm, this hunk makes me wonder whether we don't leak some bogus state
> accross a gpu reset. Can't we just reuse the move_to_inactive helper here,
> ensuring that we consistenly clear up any active state?

Yes. I had planned to finish off with another patch to remove that pair
of then redundant lines, but apparently forgot. I think it is better
expressed as a second patch, at least from the point of view of how I
was applying the transformations.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/6] drm/i915: Remove the per-ring write list
  2012-07-12 20:07     ` Chris Wilson
@ 2012-07-13  8:34       ` Daniel Vetter
  2012-07-13  8:49         ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2012-07-13  8:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Jul 12, 2012 at 09:07:52PM +0100, Chris Wilson wrote:
> On Thu, 12 Jul 2012 21:37:16 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Thu, Jul 12, 2012 at 04:13:36PM +0100, Chris Wilson wrote:
> > >  		obj->base.write_domain = 0;
> > > -		list_del_init(&obj->gpu_write_list);
> > > +		obj->pending_gpu_write = false;
> > >  		i915_gem_object_move_to_inactive(obj);
> > 
> > Hm, this hunk makes me wonder whether we don't leak some bogus state
> > accross a gpu reset. Can't we just reuse the move_to_inactive helper here,
> > ensuring that we consistenly clear up any active state?
> 
> Yes. I had planned to finish off with another patch to remove that pair
> of then redundant lines, but apparently forgot. I think it is better
> expressed as a second patch, at least from the point of view of how I
> was applying the transformations.

Actually I've been confused yesterday, we do call move_to_inactive in the
next line ;-)

I've looked again at your patch, and the changes to how we handle
obj->pending_pug_write look buggy. The above hunk is superflous, because
move_to_inactive will do that.

The hunk int i915_gem_execbuffer's is buggy, we can't clear
pending_gpu_write there - we use that to decide whether we need to stall
for the gpu when the cpu does a read-only accesss. We then stall
completely because we don't keep track of the last write seqno separately.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/6] drm/i915: Remove the per-ring write list
  2012-07-13  8:34       ` Daniel Vetter
@ 2012-07-13  8:49         ` Chris Wilson
  2012-07-13  8:53           ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-07-13  8:49 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, 13 Jul 2012 10:34:43 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Jul 12, 2012 at 09:07:52PM +0100, Chris Wilson wrote:
> > On Thu, 12 Jul 2012 21:37:16 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > > On Thu, Jul 12, 2012 at 04:13:36PM +0100, Chris Wilson wrote:
> > > >  		obj->base.write_domain = 0;
> > > > -		list_del_init(&obj->gpu_write_list);
> > > > +		obj->pending_gpu_write = false;
> > > >  		i915_gem_object_move_to_inactive(obj);
> > > 
> > > Hm, this hunk makes me wonder whether we don't leak some bogus state
> > > accross a gpu reset. Can't we just reuse the move_to_inactive helper here,
> > > ensuring that we consistenly clear up any active state?
> > 
> > Yes. I had planned to finish off with another patch to remove that pair
> > of then redundant lines, but apparently forgot. I think it is better
> > expressed as a second patch, at least from the point of view of how I
> > was applying the transformations.
> 
> Actually I've been confused yesterday, we do call move_to_inactive in the
> next line ;-)
> 
> I've looked again at your patch, and the changes to how we handle
> obj->pending_pug_write look buggy. The above hunk is superflous, because
> move_to_inactive will do that.

Right, but the transformation is a two stage process. It was more
obvious to do the replacement of gpu_write_list with pending_gpu_write
and then do the removal of duplicate lines later.

> The hunk int i915_gem_execbuffer's is buggy, we can't clear
> pending_gpu_write there - we use that to decide whether we need to stall
> for the gpu when the cpu does a read-only accesss. We then stall
> completely because we don't keep track of the last write seqno separately.

No. Because by the time the previous breadcrumb has been seen we are
guarranteed to have flushed the gpu caches. So any wait in the future we
have flushed the caches before returning.

The next patch will be to remove the obsolete pending_gpu_write.
Hopefully that will make you feel calmer.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/6] drm/i915: Remove the per-ring write list
  2012-07-13  8:49         ` Chris Wilson
@ 2012-07-13  8:53           ` Chris Wilson
  2012-07-13  9:05             ` Daniel Vetter
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-07-13  8:53 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, 13 Jul 2012 09:49:48 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> No. Because by the time the previous breadcrumb has been seen we are
> guarranteed to have flushed the gpu caches. So any wait in the future we
> have flushed the caches before returning.

Egg on face time.

The issue is on the waiter side, since we don't wait unless
pending_gpu_write is set. Tracking the last write seqno seems safest
when removing the gpu_write_list.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/6] drm/i915: Remove the per-ring write list
  2012-07-13  8:53           ` Chris Wilson
@ 2012-07-13  9:05             ` Daniel Vetter
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2012-07-13  9:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Jul 13, 2012 at 09:53:33AM +0100, Chris Wilson wrote:
> On Fri, 13 Jul 2012 09:49:48 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > No. Because by the time the previous breadcrumb has been seen we are
> > guarranteed to have flushed the gpu caches. So any wait in the future we
> > have flushed the caches before returning.
> 
> Egg on face time.
> 
> The issue is on the waiter side, since we don't wait unless
> pending_gpu_write is set. Tracking the last write seqno seems safest
> when removing the gpu_write_list.

Imo tracking the last write seqno would be an extension (i.e. separate
patch), the current code that just tracks pending_gpu_write should work
(and if I haven't botched up the i-g-t tests, it should be able to catch
that kind of stuff).
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/6] drm/i915: Remove the defunct flushing list
  2012-07-12 15:13 ` [PATCH 2/6] drm/i915: Remove the defunct flushing list Chris Wilson
@ 2012-07-13  9:12   ` Daniel Vetter
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2012-07-13  9:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Jul 12, 2012 at 04:13:35PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I'd like to keep the analysis of why things blow up on the BUG_ON(seqno ==
0) here in the commit message, i.e. the two patches that together caused
the regression (I think we need both the flushing list prep patch an
-ERESTART from intel_ring_begin). Plus the scenario of what exactly needs
to be submitted and interrupted that you've laid out on irc.

Although I have to admit that I still fail to understand how the
unconditional flush can't plug all holes, i.e. if you come up with a
recipe to blow things up with only that broken patch applied, I'd much
appreciate it ;-)

/me feels way to dense about all the complexity we have here

One thing popped out a bit while reading this patch again, comment below.

Thanks, Daniel
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c   |    7 ----
>  drivers/gpu/drm/i915/i915_drv.h       |   19 +++--------
>  drivers/gpu/drm/i915/i915_gem.c       |   57 ++++++---------------------------
>  drivers/gpu/drm/i915/i915_gem_evict.c |   20 ------------
>  4 files changed, 13 insertions(+), 90 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 359f6e8..d149890 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -44,7 +44,6 @@
>  
>  enum {
>  	ACTIVE_LIST,
> -	FLUSHING_LIST,
>  	INACTIVE_LIST,
>  	PINNED_LIST,
>  };
> @@ -177,10 +176,6 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
>  		seq_printf(m, "Inactive:\n");
>  		head = &dev_priv->mm.inactive_list;
>  		break;
> -	case FLUSHING_LIST:
> -		seq_printf(m, "Flushing:\n");
> -		head = &dev_priv->mm.flushing_list;
> -		break;
>  	default:
>  		mutex_unlock(&dev->struct_mutex);
>  		return -EINVAL;
> @@ -238,7 +233,6 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
>  
>  	size = count = mappable_size = mappable_count = 0;
>  	count_objects(&dev_priv->mm.active_list, mm_list);
> -	count_objects(&dev_priv->mm.flushing_list, mm_list);
>  	seq_printf(m, "  %u [%u] active objects, %zu [%zu] bytes\n",
>  		   count, mappable_count, size, mappable_size);
>  
> @@ -2006,7 +2000,6 @@ static struct drm_info_list i915_debugfs_list[] = {
>  	{"i915_gem_gtt", i915_gem_gtt_info, 0},
>  	{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
>  	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
> -	{"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST},
>  	{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
>  	{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
>  	{"i915_gem_request", i915_gem_request_info, 0},
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 51e234e..38b95be 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -696,17 +696,6 @@ typedef struct drm_i915_private {
>  		struct list_head active_list;
>  
>  		/**
> -		 * List of objects which are not in the ringbuffer but which
> -		 * still have a write_domain which needs to be flushed before
> -		 * unbinding.
> -		 *
> -		 * last_rendering_seqno is 0 while an object is in this list.
> -		 *
> -		 * A reference is held on the buffer while on this list.
> -		 */
> -		struct list_head flushing_list;
> -
> -		/**
>  		 * LRU list of objects which are not in the ringbuffer and
>  		 * are ready to unbind, but are still in the GTT.
>  		 *
> @@ -873,7 +862,7 @@ struct drm_i915_gem_object {
>  	struct drm_mm_node *gtt_space;
>  	struct list_head gtt_list;
>  
> -	/** This object's place on the active/flushing/inactive lists */
> +	/** This object's place on the active/inactive lists */
>  	struct list_head ring_list;
>  	struct list_head mm_list;
>  	/** This object's place on GPU write list */
> @@ -882,9 +871,9 @@ struct drm_i915_gem_object {
>  	struct list_head exec_list;
>  
>  	/**
> -	 * This is set if the object is on the active or flushing lists
> -	 * (has pending rendering), and is not set if it's on inactive (ready
> -	 * to be unbound).
> +	 * This is set if the object is on the active lists (has pending
> +	 * rendering and so a non-zero seqno), and is not set if it i s on
> +	 * inactive (ready to be unbound) list.
>  	 */
>  	unsigned int active:1;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 875b745..f69d897 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1458,26 +1458,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>  }
>  
>  static void
> -i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
> -{
> -	list_del_init(&obj->ring_list);
> -	obj->last_rendering_seqno = 0;
> -	obj->last_fenced_seqno = 0;
> -}
> -
> -static void
> -i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
> -{
> -	struct drm_device *dev = obj->base.dev;
> -	drm_i915_private_t *dev_priv = dev->dev_private;
> -
> -	BUG_ON(!obj->active);
> -	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
> -
> -	i915_gem_object_move_off_active(obj);
> -}
> -
> -static void
>  i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  {
>  	struct drm_device *dev = obj->base.dev;
> @@ -1487,13 +1467,18 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  
>  	BUG_ON(!list_empty(&obj->gpu_write_list));
>  	BUG_ON(!obj->active);
> +
> +	list_del_init(&obj->ring_list);
>  	obj->ring = NULL;
>  
> -	i915_gem_object_move_off_active(obj);
> +	obj->last_rendering_seqno = 0;
> +	obj->last_fenced_seqno = 0;
>  	obj->fenced_gpu_access = false;
>  
> -	obj->active = 0;
> +	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;

Clearing the write_domain here makes me rather uneasy ... I see that we
should plug gpu write domains leaking out of obj->active (at least until
we've ripped out all that gpu domain tracking complexity for good). But I
also fear that this papers over some issues - at least in the old code we
should never be able to see this. Maybe just add a comment to explain
what's going on?

>  	obj->pending_gpu_write = false;
> +
> +	obj->active = 0;
>  	drm_gem_object_unreference(&obj->base);
>  
>  	WARN_ON(i915_verify_lists(dev));
> @@ -1728,20 +1713,6 @@ void i915_gem_reset(struct drm_device *dev)
>  	for_each_ring(ring, dev_priv, i)
>  		i915_gem_reset_ring_lists(dev_priv, ring);
>  
> -	/* Remove anything from the flushing lists. The GPU cache is likely
> -	 * to be lost on reset along with the data, so simply move the
> -	 * lost bo to the inactive list.
> -	 */
> -	while (!list_empty(&dev_priv->mm.flushing_list)) {
> -		obj = list_first_entry(&dev_priv->mm.flushing_list,
> -				      struct drm_i915_gem_object,
> -				      mm_list);
> -
> -		obj->base.write_domain = 0;
> -		list_del_init(&obj->gpu_write_list);
> -		i915_gem_object_move_to_inactive(obj);
> -	}
> -
>  	/* Move everything out of the GPU domains to ensure we do any
>  	 * necessary invalidation upon reuse.
>  	 */
> @@ -1812,10 +1783,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
>  		if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
>  			break;
>  
> -		if (obj->base.write_domain != 0)
> -			i915_gem_object_move_to_flushing(obj);
> -		else
> -			i915_gem_object_move_to_inactive(obj);
> +		i915_gem_object_move_to_inactive(obj);
>  	}
>  
>  	if (unlikely(ring->trace_irq_seqno &&
> @@ -3877,7 +3845,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	BUG_ON(!list_empty(&dev_priv->mm.active_list));
> -	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
>  	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
>  	mutex_unlock(&dev->struct_mutex);
>  
> @@ -3935,7 +3902,6 @@ i915_gem_load(struct drm_device *dev)
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  
>  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
> -	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
> @@ -4186,12 +4152,7 @@ static int
>  i915_gpu_is_active(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> -	int lists_empty;
> -
> -	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
> -		      list_empty(&dev_priv->mm.active_list);
> -
> -	return !lists_empty;
> +	return !list_empty(&dev_priv->mm.active_list);
>  }
>  
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index ae7c24e..f61af59 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -92,23 +92,6 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
>  
>  	/* Now merge in the soon-to-be-expired objects... */
>  	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
> -		/* Does the object require an outstanding flush? */
> -		if (obj->base.write_domain)
> -			continue;
> -
> -		if (mark_free(obj, &unwind_list))
> -			goto found;
> -	}
> -
> -	/* Finally add anything with a pending flush (in order of retirement) */
> -	list_for_each_entry(obj, &dev_priv->mm.flushing_list, mm_list) {
> -		if (mark_free(obj, &unwind_list))
> -			goto found;
> -	}
> -	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
> -		if (!obj->base.write_domain)
> -			continue;
> -
>  		if (mark_free(obj, &unwind_list))
>  			goto found;
>  	}
> @@ -171,7 +154,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
>  	int ret;
>  
>  	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
> -		       list_empty(&dev_priv->mm.flushing_list) &&
>  		       list_empty(&dev_priv->mm.active_list));
>  	if (lists_empty)
>  		return -ENOSPC;
> @@ -188,8 +170,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
>  
>  	i915_gem_retire_requests(dev);
>  
> -	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
> -
>  	/* Having flushed everything, unbind() should never raise an error */
>  	list_for_each_entry_safe(obj, next,
>  				 &dev_priv->mm.inactive_list, mm_list) {
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2012-07-13  9:12 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-12 15:13 Kill the flushing list Chris Wilson
2012-07-12 15:13 ` [PATCH 1/6] drm/i915: Allow late allocation of request for i915_add_request() Chris Wilson
2012-07-12 17:43   ` Daniel Vetter
2012-07-12 17:56     ` Chris Wilson
2012-07-12 18:02   ` [PATCH] " Chris Wilson
2012-07-12 15:13 ` [PATCH 2/6] drm/i915: Remove the defunct flushing list Chris Wilson
2012-07-13  9:12   ` Daniel Vetter
2012-07-12 15:13 ` [PATCH 3/6] drm/i915: Remove the per-ring write list Chris Wilson
2012-07-12 19:37   ` Daniel Vetter
2012-07-12 20:07     ` Chris Wilson
2012-07-13  8:34       ` Daniel Vetter
2012-07-13  8:49         ` Chris Wilson
2012-07-13  8:53           ` Chris Wilson
2012-07-13  9:05             ` Daniel Vetter
2012-07-12 15:13 ` [PATCH 4/6] drm/i915: Remove explicit flush from i915_gem_object_flush_fence() Chris Wilson
2012-07-12 15:13 ` [PATCH 5/6] drm/i915: Remove the explicit flush of the GPU write domain Chris Wilson
2012-07-12 15:13 ` [PATCH 6/6] drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.