intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl
@ 2012-08-23 12:12 Chris Wilson
  2012-08-23 12:12 ` [PATCH 2/3] drm/i915: Use cpu relocations if the object is in the GTT but not mappable Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Chris Wilson @ 2012-08-23 12:12 UTC (permalink / raw)
  To: intel-gfx

The principal use for set-to-domain is for userspace to serialise
operations with a particular buffer, for example to maintain coherency
with a CPU map or to ratelimit its rendering by waiting on all previous
operations before continuing. As such we tend to hold the struct_mutex
for long periods during the synchronisation and so cause contention
issues with other users of the graphics device, even for independent
operations as memory management. An example is the contention between
compiz and X which causes jitter in the display and a drop in peak
throughput.

The ultimate solution would be a set of fine grained locks and lockless
operations, but an intermediate step is to first attempt the
synchronisation for set-to-domain without holding the mutex. This
introduces a number of race conditions, so we limit it use to the ioctl
periphery where we have no dependent state and can safely complete with
a locked synchronisation afterwards.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |    3 +-
 drivers/gpu/drm/i915/i915_gem.c         |  417 ++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_overlay.c    |    4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 +-
 4 files changed, 223 insertions(+), 203 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cbd3cd0..7246373 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1385,7 +1385,8 @@ int i915_add_request(struct intel_ring_buffer *ring,
 		     struct drm_file *file,
 		     struct drm_i915_gem_request *request);
 int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
-				 uint32_t seqno);
+				 uint32_t seqno,
+				 bool nonblocking);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e1ec587..083b2ab 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -946,6 +946,205 @@ unlock:
 	return ret;
 }
 
+int
+i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+		     bool interruptible)
+{
+	if (atomic_read(&dev_priv->mm.wedged)) {
+		struct completion *x = &dev_priv->error_completion;
+		bool recovery_complete;
+		unsigned long flags;
+
+		/* Give the error handler a chance to run. */
+		spin_lock_irqsave(&x->wait.lock, flags);
+		recovery_complete = x->done > 0;
+		spin_unlock_irqrestore(&x->wait.lock, flags);
+
+		/* Non-interruptible callers can't handle -EAGAIN, hence return
+		 * -EIO unconditionally for these. */
+		if (!interruptible)
+			return -EIO;
+
+		/* Recovery complete, but still wedged means reset failure. */
+		if (recovery_complete)
+			return -EIO;
+
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/*
+ * Compare seqno against outstanding lazy request. Emit a request if they are
+ * equal.
+ */
+static int
+i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
+{
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	ret = 0;
+	if (seqno == ring->outstanding_lazy_request)
+		ret = i915_add_request(ring, NULL, NULL);
+
+	return ret;
+}
+
+/**
+ * __wait_seqno - wait until execution of seqno has finished
+ * @ring: the ring expected to report seqno
+ * @seqno: duh!
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout: in - how long to wait (NULL forever); out - how much time remaining
+ *
+ * Returns 0 if the seqno was found within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
+static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
+			bool interruptible, struct timespec *timeout)
+{
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
+	struct timespec before, now, wait_time={1,0};
+	unsigned long timeout_jiffies;
+	long end;
+	bool wait_forever = true;
+	int ret;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
+		return 0;
+
+	trace_i915_gem_request_wait_begin(ring, seqno);
+
+	if (timeout != NULL) {
+		wait_time = *timeout;
+		wait_forever = false;
+	}
+
+	timeout_jiffies = timespec_to_jiffies(&wait_time);
+
+	if (WARN_ON(!ring->irq_get(ring)))
+		return -ENODEV;
+
+	/* Record current time in case interrupted by signal, or wedged * */
+	getrawmonotonic(&before);
+
+#define EXIT_COND \
+	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
+	atomic_read(&dev_priv->mm.wedged))
+	do {
+		if (interruptible)
+			end = wait_event_interruptible_timeout(ring->irq_queue,
+							       EXIT_COND,
+							       timeout_jiffies);
+		else
+			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
+						 timeout_jiffies);
+
+		ret = i915_gem_check_wedge(dev_priv, interruptible);
+		if (ret)
+			end = ret;
+	} while (end == 0 && wait_forever);
+
+	getrawmonotonic(&now);
+
+	ring->irq_put(ring);
+	trace_i915_gem_request_wait_end(ring, seqno);
+#undef EXIT_COND
+
+	if (timeout) {
+		struct timespec sleep_time = timespec_sub(now, before);
+		*timeout = timespec_sub(*timeout, sleep_time);
+	}
+
+	switch (end) {
+	case -EIO:
+	case -EAGAIN: /* Wedged */
+	case -ERESTARTSYS: /* Signal */
+		return (int)end;
+	case 0: /* Timeout */
+		if (timeout)
+			set_normalized_timespec(timeout, 0, 0);
+		return -ETIME;
+	default: /* Completed */
+		WARN_ON(end < 0); /* We're not aware of other errors */
+		return 0;
+	}
+}
+
+/**
+ * Waits for a sequence number to be signaled, and cleans up the
+ * request and object lists appropriately for that event.
+ */
+int
+i915_wait_seqno(struct intel_ring_buffer *ring,
+		uint32_t seqno,
+		bool nonblocking)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool interruptible = dev_priv->mm.interruptible;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+	BUG_ON(seqno == 0);
+
+	ret = i915_gem_check_wedge(dev_priv, interruptible);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_check_olr(ring, seqno);
+	if (ret)
+		return ret;
+
+	if (nonblocking)
+		mutex_unlock(&dev->struct_mutex);
+
+	ret = __wait_seqno(ring, seqno, interruptible, NULL);
+
+	if (nonblocking)
+		mutex_lock(&dev->struct_mutex);
+
+	return ret;
+}
+
+/**
+ * Ensures that all rendering to the object has completed and the object is
+ * safe to unbind from the GTT or access from the CPU.
+ */
+static __must_check int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly,
+			       bool nonblocking)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+	u32 seqno;
+	int ret;
+
+	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
+	if (seqno == 0)
+		return 0;
+
+	ret = i915_wait_seqno(ring, seqno, nonblocking);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests_ring(ring);
+
+	/* Manually manage the write flush as we may have not yet
+	 * retired the buffer.
+	 */
+	if (obj->last_write_seqno &&
+	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
+		obj->last_write_seqno = 0;
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
+
+	return 0;
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -983,6 +1182,16 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
+	/* Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	ret = i915_gem_object_wait_rendering(obj,
+					     write_domain == 0,
+					     true);
+	if (ret)
+		goto unref;
+
 	if (read_domains & I915_GEM_DOMAIN_GTT) {
 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 
@@ -996,6 +1205,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 	}
 
+unref:
 	drm_gem_object_unreference(&obj->base);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
@@ -1950,197 +2160,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	mutex_unlock(&dev->struct_mutex);
 }
 
-int
-i915_gem_check_wedge(struct drm_i915_private *dev_priv,
-		     bool interruptible)
-{
-	if (atomic_read(&dev_priv->mm.wedged)) {
-		struct completion *x = &dev_priv->error_completion;
-		bool recovery_complete;
-		unsigned long flags;
-
-		/* Give the error handler a chance to run. */
-		spin_lock_irqsave(&x->wait.lock, flags);
-		recovery_complete = x->done > 0;
-		spin_unlock_irqrestore(&x->wait.lock, flags);
-
-		/* Non-interruptible callers can't handle -EAGAIN, hence return
-		 * -EIO unconditionally for these. */
-		if (!interruptible)
-			return -EIO;
-
-		/* Recovery complete, but still wedged means reset failure. */
-		if (recovery_complete)
-			return -EIO;
-
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
-/*
- * Compare seqno against outstanding lazy request. Emit a request if they are
- * equal.
- */
-static int
-i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
-{
-	int ret;
-
-	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-
-	ret = 0;
-	if (seqno == ring->outstanding_lazy_request)
-		ret = i915_add_request(ring, NULL, NULL);
-
-	return ret;
-}
-
-/**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
- * @interruptible: do an interruptible wait (normally yes)
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- *
- * Returns 0 if the seqno was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
- */
-static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
-			bool interruptible, struct timespec *timeout)
-{
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	struct timespec before, now, wait_time={1,0};
-	unsigned long timeout_jiffies;
-	long end;
-	bool wait_forever = true;
-	int ret;
-
-	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
-		return 0;
-
-	trace_i915_gem_request_wait_begin(ring, seqno);
-
-	if (timeout != NULL) {
-		wait_time = *timeout;
-		wait_forever = false;
-	}
-
-	timeout_jiffies = timespec_to_jiffies(&wait_time);
-
-	if (WARN_ON(!ring->irq_get(ring)))
-		return -ENODEV;
-
-	/* Record current time in case interrupted by signal, or wedged * */
-	getrawmonotonic(&before);
-
-#define EXIT_COND \
-	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
-	atomic_read(&dev_priv->mm.wedged))
-	do {
-		if (interruptible)
-			end = wait_event_interruptible_timeout(ring->irq_queue,
-							       EXIT_COND,
-							       timeout_jiffies);
-		else
-			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
-						 timeout_jiffies);
-
-		ret = i915_gem_check_wedge(dev_priv, interruptible);
-		if (ret)
-			end = ret;
-	} while (end == 0 && wait_forever);
-
-	getrawmonotonic(&now);
-
-	ring->irq_put(ring);
-	trace_i915_gem_request_wait_end(ring, seqno);
-#undef EXIT_COND
-
-	if (timeout) {
-		struct timespec sleep_time = timespec_sub(now, before);
-		*timeout = timespec_sub(*timeout, sleep_time);
-	}
-
-	switch (end) {
-	case -EIO:
-	case -EAGAIN: /* Wedged */
-	case -ERESTARTSYS: /* Signal */
-		return (int)end;
-	case 0: /* Timeout */
-		if (timeout)
-			set_normalized_timespec(timeout, 0, 0);
-		return -ETIME;
-	default: /* Completed */
-		WARN_ON(end < 0); /* We're not aware of other errors */
-		return 0;
-	}
-}
-
-/**
- * Waits for a sequence number to be signaled, and cleans up the
- * request and object lists appropriately for that event.
- */
-int
-i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
-{
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	int ret = 0;
-
-	BUG_ON(seqno == 0);
-
-	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_check_olr(ring, seqno);
-	if (ret)
-		return ret;
-
-	ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL);
-
-	return ret;
-}
-
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- */
-static __must_check int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly)
-{
-	u32 seqno;
-	int ret;
-
-	/* If there is rendering queued on the buffer being evicted, wait for
-	 * it.
-	 */
-	if (readonly)
-		seqno = obj->last_write_seqno;
-	else
-		seqno = obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
-
-	ret = i915_wait_seqno(obj->ring, seqno);
-	if (ret)
-		return ret;
-
-	/* Manually manage the write flush as we may have not yet retired
-	 * the buffer.
-	 */
-	if (obj->last_write_seqno &&
-	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
-		obj->last_write_seqno = 0;
-		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-	}
-
-	i915_gem_retire_requests_ring(obj->ring);
-	return 0;
-}
-
 /**
  * Ensures that an object will eventually get non-busy by flushing any required
  * write domains, emitting any outstanding lazy request and retiring and
@@ -2270,7 +2289,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		return 0;
 
 	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
+		return i915_gem_object_wait_rendering(obj, false, false);
 
 	idx = intel_ring_sync_index(from, to);
 
@@ -2372,7 +2391,7 @@ static int i915_ring_idle(struct intel_ring_buffer *ring)
 	if (list_empty(&ring->active_list))
 		return 0;
 
-	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
+	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring), false);
 }
 
 int i915_gpu_idle(struct drm_device *dev)
@@ -2563,7 +2582,7 @@ static int
 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 {
 	if (obj->last_fenced_seqno) {
-		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
+		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno, false);
 		if (ret)
 			return ret;
 
@@ -2984,8 +3003,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
-	ret = i915_gem_object_wait_rendering(obj, !write);
-	if (ret)
+	ret = i915_gem_object_wait_rendering(obj, !write, false);
+	if (ret && ret != -EIO)
 		return ret;
 
 	i915_gem_object_flush_cpu_write_domain(obj);
@@ -3218,7 +3237,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
 		return 0;
 
-	ret = i915_gem_object_wait_rendering(obj, false);
+	ret = i915_gem_object_wait_rendering(obj, false, false);
 	if (ret)
 		return ret;
 
@@ -3242,8 +3261,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return 0;
 
-	ret = i915_gem_object_wait_rendering(obj, !write);
-	if (ret)
+	ret = i915_gem_object_wait_rendering(obj, !write, false);
+	if (ret && ret != -EIO)
 		return ret;
 
 	i915_gem_object_flush_gtt_write_domain(obj);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c0f4858..cf3f6f1 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -226,7 +226,7 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 	}
 	overlay->last_flip_req = request->seqno;
 	overlay->flip_tail = tail;
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_seqno(ring, overlay->last_flip_req, false);
 	if (ret)
 		return ret;
 	i915_gem_retire_requests(dev);
@@ -396,7 +396,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 	if (overlay->last_flip_req == 0)
 		return 0;
 
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_seqno(ring, overlay->last_flip_req, false);
 	if (ret)
 		return ret;
 	i915_gem_retire_requests(dev);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c828169..68a874a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1134,7 +1134,7 @@ static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
 {
 	int ret;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_seqno(ring, seqno, false);
 	if (!ret)
 		i915_gem_retire_requests_ring(ring);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] drm/i915: Use cpu relocations if the object is in the GTT but not mappable
  2012-08-23 12:12 [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Chris Wilson
@ 2012-08-23 12:12 ` Chris Wilson
  2012-08-24  0:29   ` Daniel Vetter
  2012-08-23 12:12 ` [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer Chris Wilson
  2012-08-24  0:28 ` [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Daniel Vetter
  2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2012-08-23 12:12 UTC (permalink / raw)
  To: intel-gfx

This prevents the case of unbinding the object in order to process the
relocations through the GTT and then rebinding it only to then proceed
to use cpu relocations as the object is now in the CPU write domain. By
choosing to use cpu relocations up front, we can therefore avoid the
rebind penalty.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 834a636..23aa324 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -95,6 +95,7 @@ eb_destroy(struct eb_objects *eb)
 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 {
 	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+		!obj->map_and_fenceable ||
 		obj->cache_level != I915_CACHE_NONE);
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer
  2012-08-23 12:12 [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Chris Wilson
  2012-08-23 12:12 ` [PATCH 2/3] drm/i915: Use cpu relocations if the object is in the GTT but not mappable Chris Wilson
@ 2012-08-23 12:12 ` Chris Wilson
  2012-08-24 14:46   ` Daniel Vetter
  2012-08-24  0:28 ` [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Daniel Vetter
  2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2012-08-23 12:12 UTC (permalink / raw)
  To: intel-gfx

If we need to stall in order to complete the pin_and_fence operation
during execbuffer reservation, there is a high likelihood that the
operation will be interrupted by a signal (thanks X!). In order to
simplify the cleanup along that error path, the object was
unconditionally unbound and the error propagated. However, being
interrupted here is far more common than I would like and so we can
strive to avoid the extra work by eliminating the forced unbind.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   93 ++++++++++++----------------
 1 file changed, 38 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 23aa324..0c5a433 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
-#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
+#define  __EXEC_OBJECT_HAS_PIN (1<<31)
+#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 
 static int
 need_reloc_mappable(struct drm_i915_gem_object *obj)
@@ -344,6 +345,7 @@ static int
 pin_and_fence_object(struct drm_i915_gem_object *obj,
 		     struct intel_ring_buffer *ring)
 {
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	bool need_fence, need_mappable;
@@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
+	entry->flags |= __EXEC_OBJECT_HAS_PIN;
+
 	if (has_fenced_gpu_access) {
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 			ret = i915_gem_object_get_fence(obj);
 			if (ret)
-				goto err_unpin;
+				return ret;
 
 			if (i915_gem_object_pin_fence(obj))
 				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
 		}
 	}
 
+	/* ... and ensure ppgtt mapping exist if needed. */
+	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
+		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
+				       obj, obj->cache_level);
+
+		obj->has_aliasing_ppgtt_mapping = 1;
+	}
+
 	entry->offset = obj->gtt_offset;
 	return 0;
+}
 
-err_unpin:
-	i915_gem_object_unpin(obj);
-	return ret;
+static void
+unpin_and_fence_object(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_gem_exec_object2 *entry;
+
+	if (!obj->gtt_space)
+		return;
+
+	entry = obj->exec_entry;
+
+	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+		i915_gem_object_unpin_fence(obj);
+
+	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
+		i915_gem_object_unpin(obj);
+
+	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
 
 static int
@@ -385,7 +412,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
 			    struct list_head *objects)
 {
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	int ret, retry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
@@ -463,45 +489,13 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 				continue;
 
 			ret = pin_and_fence_object(obj, ring);
-			if (ret) {
-				int ret_ignore;
-
-				/* This can potentially raise a harmless
-				 * -EINVAL if we failed to bind in the above
-				 * call. It cannot raise -EINTR since we know
-				 * that the bo is freshly bound and so will
-				 * not need to be flushed or waited upon.
-				 */
-				ret_ignore = i915_gem_object_unbind(obj);
-				(void)ret_ignore;
-				WARN_ON(obj->gtt_space);
+			if (ret)
 				break;
-			}
 		}
 
 		/* Decrement pin count for bound objects */
-		list_for_each_entry(obj, objects, exec_list) {
-			struct drm_i915_gem_exec_object2 *entry;
-
-			if (!obj->gtt_space)
-				continue;
-
-			entry = obj->exec_entry;
-			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-				i915_gem_object_unpin_fence(obj);
-				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
-			}
-
-			i915_gem_object_unpin(obj);
-
-			/* ... and ensure ppgtt mapping exist if needed. */
-			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
-				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-						       obj, obj->cache_level);
-
-				obj->has_aliasing_ppgtt_mapping = 1;
-			}
-		}
+		list_for_each_entry(obj, objects, exec_list)
+			unpin_and_fence_object(obj);
 
 		if (ret != -ENOSPC || retry++)
 			return ret;
@@ -512,20 +506,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	} while (1);
 
 err:
-	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
-		struct drm_i915_gem_exec_object2 *entry;
-
-		if (!obj->gtt_space)
-			continue;
-
-		entry = obj->exec_entry;
-		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-			i915_gem_object_unpin_fence(obj);
-			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
-		}
-
-		i915_gem_object_unpin(obj);
-	}
+	do {
+		unpin_and_fence_object(obj);
+	} while (&(obj = list_entry(obj->exec_list.prev, typeof(*obj), exec_list))->exec_list != objects);
 
 	return ret;
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl
  2012-08-23 12:12 [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Chris Wilson
  2012-08-23 12:12 ` [PATCH 2/3] drm/i915: Use cpu relocations if the object is in the GTT but not mappable Chris Wilson
  2012-08-23 12:12 ` [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer Chris Wilson
@ 2012-08-24  0:28 ` Daniel Vetter
  2012-08-24  8:35   ` [PATCH 1/2] drm/i915: Juggle code order to ease flow of the next patch Chris Wilson
  2 siblings, 1 reply; 11+ messages in thread
From: Daniel Vetter @ 2012-08-24  0:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Aug 23, 2012 at 01:12:51PM +0100, Chris Wilson wrote:
> The principal use for set-to-domain is for userspace to serialise
> operations with a particular buffer, for example to maintain coherency
> with a CPU map or to ratelimit its rendering by waiting on all previous
> operations before continuing. As such we tend to hold the struct_mutex
> for long periods during the synchronisation and so cause contention
> issues with other users of the graphics device, even for independent
> operations as memory management. An example is the contention between
> compiz and X which causes jitter in the display and a drop in peak
> throughput.
> 
> The ultimate solution would be a set of fine grained locks and lockless
> operations, but an intermediate step is to first attempt the
> synchronisation for set-to-domain without holding the mutex. This
> introduces a number of race conditions, so we limit it use to the ioctl
> periphery where we have no dependent state and can safely complete with
> a locked synchronisation afterwards.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Can I have the "move functions around" patch split out, please?

Also, I think it doesn't make sense to splatter nonblocking bools all over
the code:
- We won't need such a dance anywhere else, since we require userspace to
  call set_domain before any cpu access anyway.
- I think hiding the mutex dropping deep down in the callchain is evil.

I.e. I'd prefer if we do the lock dropping in set_domain_ioctl itself and
just copy&pasta the required code to flush olr and whatnotelse. And since
we drop the mutex in between I actually think that the write domain
frobbery that wait_rendering does is positively harmful. But we don't need
it, so better cut it out (same applies to the retire_requests in there).
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] drm/i915: Use cpu relocations if the object is in the GTT but not mappable
  2012-08-23 12:12 ` [PATCH 2/3] drm/i915: Use cpu relocations if the object is in the GTT but not mappable Chris Wilson
@ 2012-08-24  0:29   ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2012-08-24  0:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Aug 23, 2012 at 01:12:52PM +0100, Chris Wilson wrote:
> This prevents the case of unbinding the object in order to process the
> relocations through the GTT and then rebinding it only to then proceed
> to use cpu relocations as the object is now in the CPU write domain. By
> choosing to use cpu relocations up front, we can therefore avoid the
> rebind penalty.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
lgtm, applied to dinq.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] drm/i915: Juggle code order to ease flow of the next patch
  2012-08-24  0:28 ` [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Daniel Vetter
@ 2012-08-24  8:35   ` Chris Wilson
  2012-08-24  8:35     ` [PATCH 2/2] drm/i915: Use a non-blocking wait for set-to-domain ioctl Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2012-08-24  8:35 UTC (permalink / raw)
  To: intel-gfx

Move the wait-for-rendering logic around in the file so that we can
group it together with the subsequent variations. The general goal is to
have the lower level routines clustered together and then the higher
level logic building upon those low level routines that came before.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  379 +++++++++++++++++++--------------------
 1 file changed, 188 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5531758..0b236ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -945,6 +945,194 @@ unlock:
 	return ret;
 }
 
+int
+i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+		     bool interruptible)
+{
+	if (atomic_read(&dev_priv->mm.wedged)) {
+		struct completion *x = &dev_priv->error_completion;
+		bool recovery_complete;
+		unsigned long flags;
+
+		/* Give the error handler a chance to run. */
+		spin_lock_irqsave(&x->wait.lock, flags);
+		recovery_complete = x->done > 0;
+		spin_unlock_irqrestore(&x->wait.lock, flags);
+
+		/* Non-interruptible callers can't handle -EAGAIN, hence return
+		 * -EIO unconditionally for these. */
+		if (!interruptible)
+			return -EIO;
+
+		/* Recovery complete, but still wedged means reset failure. */
+		if (recovery_complete)
+			return -EIO;
+
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/*
+ * Compare seqno against outstanding lazy request. Emit a request if they are
+ * equal.
+ */
+static int
+i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
+{
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	ret = 0;
+	if (seqno == ring->outstanding_lazy_request)
+		ret = i915_add_request(ring, NULL, NULL);
+
+	return ret;
+}
+
+/**
+ * __wait_seqno - wait until execution of seqno has finished
+ * @ring: the ring expected to report seqno
+ * @seqno: duh!
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout: in - how long to wait (NULL forever); out - how much time remaining
+ *
+ * Returns 0 if the seqno was found within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
+static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
+			bool interruptible, struct timespec *timeout)
+{
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
+	struct timespec before, now, wait_time={1,0};
+	unsigned long timeout_jiffies;
+	long end;
+	bool wait_forever = true;
+	int ret;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
+		return 0;
+
+	trace_i915_gem_request_wait_begin(ring, seqno);
+
+	if (timeout != NULL) {
+		wait_time = *timeout;
+		wait_forever = false;
+	}
+
+	timeout_jiffies = timespec_to_jiffies(&wait_time);
+
+	if (WARN_ON(!ring->irq_get(ring)))
+		return -ENODEV;
+
+	/* Record current time in case interrupted by signal, or wedged * */
+	getrawmonotonic(&before);
+
+#define EXIT_COND \
+	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
+	atomic_read(&dev_priv->mm.wedged))
+	do {
+		if (interruptible)
+			end = wait_event_interruptible_timeout(ring->irq_queue,
+							       EXIT_COND,
+							       timeout_jiffies);
+		else
+			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
+						 timeout_jiffies);
+
+		ret = i915_gem_check_wedge(dev_priv, interruptible);
+		if (ret)
+			end = ret;
+	} while (end == 0 && wait_forever);
+
+	getrawmonotonic(&now);
+
+	ring->irq_put(ring);
+	trace_i915_gem_request_wait_end(ring, seqno);
+#undef EXIT_COND
+
+	if (timeout) {
+		struct timespec sleep_time = timespec_sub(now, before);
+		*timeout = timespec_sub(*timeout, sleep_time);
+	}
+
+	switch (end) {
+	case -EIO:
+	case -EAGAIN: /* Wedged */
+	case -ERESTARTSYS: /* Signal */
+		return (int)end;
+	case 0: /* Timeout */
+		if (timeout)
+			set_normalized_timespec(timeout, 0, 0);
+		return -ETIME;
+	default: /* Completed */
+		WARN_ON(end < 0); /* We're not aware of other errors */
+		return 0;
+	}
+}
+
+/**
+ * Waits for a sequence number to be signaled, and cleans up the
+ * request and object lists appropriately for that event.
+ */
+int
+i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool interruptible = dev_priv->mm.interruptible;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+	BUG_ON(seqno == 0);
+
+	ret = i915_gem_check_wedge(dev_priv, interruptible);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_check_olr(ring, seqno);
+	if (ret)
+		return ret;
+
+	return __wait_seqno(ring, seqno, interruptible, NULL);
+}
+
+/**
+ * Ensures that all rendering to the object has completed and the object is
+ * safe to unbind from the GTT or access from the CPU.
+ */
+static __must_check int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+	u32 seqno;
+	int ret;
+
+	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
+	if (seqno == 0)
+		return 0;
+
+	ret = i915_wait_seqno(ring, seqno);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests_ring(ring);
+
+	/* Manually manage the write flush as we may have not yet
+	 * retired the buffer.
+	 */
+	if (obj->last_write_seqno &&
+	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
+		obj->last_write_seqno = 0;
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
+
+	return 0;
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1952,197 +2140,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	mutex_unlock(&dev->struct_mutex);
 }
 
-int
-i915_gem_check_wedge(struct drm_i915_private *dev_priv,
-		     bool interruptible)
-{
-	if (atomic_read(&dev_priv->mm.wedged)) {
-		struct completion *x = &dev_priv->error_completion;
-		bool recovery_complete;
-		unsigned long flags;
-
-		/* Give the error handler a chance to run. */
-		spin_lock_irqsave(&x->wait.lock, flags);
-		recovery_complete = x->done > 0;
-		spin_unlock_irqrestore(&x->wait.lock, flags);
-
-		/* Non-interruptible callers can't handle -EAGAIN, hence return
-		 * -EIO unconditionally for these. */
-		if (!interruptible)
-			return -EIO;
-
-		/* Recovery complete, but still wedged means reset failure. */
-		if (recovery_complete)
-			return -EIO;
-
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
-/*
- * Compare seqno against outstanding lazy request. Emit a request if they are
- * equal.
- */
-static int
-i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
-{
-	int ret;
-
-	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-
-	ret = 0;
-	if (seqno == ring->outstanding_lazy_request)
-		ret = i915_add_request(ring, NULL, NULL);
-
-	return ret;
-}
-
-/**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
- * @interruptible: do an interruptible wait (normally yes)
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- *
- * Returns 0 if the seqno was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
- */
-static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
-			bool interruptible, struct timespec *timeout)
-{
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	struct timespec before, now, wait_time={1,0};
-	unsigned long timeout_jiffies;
-	long end;
-	bool wait_forever = true;
-	int ret;
-
-	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
-		return 0;
-
-	trace_i915_gem_request_wait_begin(ring, seqno);
-
-	if (timeout != NULL) {
-		wait_time = *timeout;
-		wait_forever = false;
-	}
-
-	timeout_jiffies = timespec_to_jiffies(&wait_time);
-
-	if (WARN_ON(!ring->irq_get(ring)))
-		return -ENODEV;
-
-	/* Record current time in case interrupted by signal, or wedged * */
-	getrawmonotonic(&before);
-
-#define EXIT_COND \
-	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
-	atomic_read(&dev_priv->mm.wedged))
-	do {
-		if (interruptible)
-			end = wait_event_interruptible_timeout(ring->irq_queue,
-							       EXIT_COND,
-							       timeout_jiffies);
-		else
-			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
-						 timeout_jiffies);
-
-		ret = i915_gem_check_wedge(dev_priv, interruptible);
-		if (ret)
-			end = ret;
-	} while (end == 0 && wait_forever);
-
-	getrawmonotonic(&now);
-
-	ring->irq_put(ring);
-	trace_i915_gem_request_wait_end(ring, seqno);
-#undef EXIT_COND
-
-	if (timeout) {
-		struct timespec sleep_time = timespec_sub(now, before);
-		*timeout = timespec_sub(*timeout, sleep_time);
-	}
-
-	switch (end) {
-	case -EIO:
-	case -EAGAIN: /* Wedged */
-	case -ERESTARTSYS: /* Signal */
-		return (int)end;
-	case 0: /* Timeout */
-		if (timeout)
-			set_normalized_timespec(timeout, 0, 0);
-		return -ETIME;
-	default: /* Completed */
-		WARN_ON(end < 0); /* We're not aware of other errors */
-		return 0;
-	}
-}
-
-/**
- * Waits for a sequence number to be signaled, and cleans up the
- * request and object lists appropriately for that event.
- */
-int
-i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
-{
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	int ret = 0;
-
-	BUG_ON(seqno == 0);
-
-	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_check_olr(ring, seqno);
-	if (ret)
-		return ret;
-
-	ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL);
-
-	return ret;
-}
-
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- */
-static __must_check int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly)
-{
-	u32 seqno;
-	int ret;
-
-	/* If there is rendering queued on the buffer being evicted, wait for
-	 * it.
-	 */
-	if (readonly)
-		seqno = obj->last_write_seqno;
-	else
-		seqno = obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
-
-	ret = i915_wait_seqno(obj->ring, seqno);
-	if (ret)
-		return ret;
-
-	/* Manually manage the write flush as we may have not yet retired
-	 * the buffer.
-	 */
-	if (obj->last_write_seqno &&
-	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
-		obj->last_write_seqno = 0;
-		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-	}
-
-	i915_gem_retire_requests_ring(obj->ring);
-	return 0;
-}
-
 /**
  * Ensures that an object will eventually get non-busy by flushing any required
  * write domains, emitting any outstanding lazy request and retiring and
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] drm/i915: Use a non-blocking wait for set-to-domain ioctl
  2012-08-24  8:35   ` [PATCH 1/2] drm/i915: Juggle code order to ease flow of the next patch Chris Wilson
@ 2012-08-24  8:35     ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2012-08-24  8:35 UTC (permalink / raw)
  To: intel-gfx

The principal use for set-to-domain is for userspace to serialise
operations with a particular buffer, for example to maintain coherency
with a CPU map or to ratelimit its rendering by waiting on all previous
operations before continuing. As such we tend to hold the struct_mutex
for long periods during the synchronisation and so cause contention
issues with other users of the graphics device, even for independent
operations as memory management. An example is the contention between
compiz and X which causes jitter in the display and a drop in peak
throughput.

The ultimate solution would be a set of fine grained locks and lockless
operations, but an intermediate step is to first attempt the
synchronisation for set-to-domain without holding the mutex. This
introduces a number of race conditions, so we limit it use to the ioctl
periphery where we have no dependent state and can safely complete with
a locked synchronisation afterwards.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   55 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0b236ea..719a933 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1133,6 +1133,52 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+/* A nonblocking variant of the above wait. This is a highly dangerous routine
+ * as the object state may change during this call.
+ */
+static __must_check int
+i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
+					    bool readonly)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring = obj->ring;
+	u32 seqno;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+	BUG_ON(!dev_priv->mm.interruptible);
+
+	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
+	if (seqno == 0)
+		return 0;
+
+	ret = i915_gem_check_wedge(dev_priv, true);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_check_olr(ring, seqno);
+	if (ret)
+		return ret;
+
+	mutex_unlock(&dev->struct_mutex);
+	ret = __wait_seqno(ring, seqno, true, NULL);
+	mutex_lock(&dev->struct_mutex);
+
+	i915_gem_retire_requests_ring(ring);
+
+	/* Manually manage the write flush as we may have not yet
+	 * retired the buffer.
+	 */
+	if (obj->last_write_seqno &&
+	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
+		obj->last_write_seqno = 0;
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
+
+	return ret;
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1170,6 +1216,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
+	/* Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
+	if (ret)
+		goto unref;
+
 	if (read_domains & I915_GEM_DOMAIN_GTT) {
 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 
@@ -1183,6 +1237,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 	}
 
+unref:
 	drm_gem_object_unreference(&obj->base);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer
  2012-08-23 12:12 ` [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer Chris Wilson
@ 2012-08-24 14:46   ` Daniel Vetter
  2012-08-24 16:35     ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Daniel Vetter @ 2012-08-24 14:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Aug 23, 2012 at 01:12:53PM +0100, Chris Wilson wrote:
> If we need to stall in order to complete the pin_and_fence operation
> during execbuffer reservation, there is a high likelihood that the
> operation will be interrupted by a signal (thanks X!). In order to
> simplify the cleanup along that error path, the object was
> unconditionally unbound and the error propagated. However, being
> interrupted here is far more common than I would like and so we can
> strive to avoid the extra work by eliminating the forced unbind.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I've merged the resend of patch 1/3, thanks a lot. But for this one here
I've found a few tiny things to bitch about. Comments inline below.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   93 ++++++++++++----------------
>  1 file changed, 38 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 23aa324..0c5a433 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
>  	return ret;
>  }
>  
> -#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
> +#define  __EXEC_OBJECT_HAS_PIN (1<<31)
> +#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
>  
>  static int
>  need_reloc_mappable(struct drm_i915_gem_object *obj)
> @@ -344,6 +345,7 @@ static int
>  pin_and_fence_object(struct drm_i915_gem_object *obj,
>  		     struct intel_ring_buffer *ring)
>  {
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
>  	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>  	bool need_fence, need_mappable;
> @@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
>  	if (ret)
>  		return ret;
>  
> +	entry->flags |= __EXEC_OBJECT_HAS_PIN;
> +
>  	if (has_fenced_gpu_access) {
>  		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
>  			ret = i915_gem_object_get_fence(obj);
>  			if (ret)
> -				goto err_unpin;
> +				return ret;
>  
>  			if (i915_gem_object_pin_fence(obj))
>  				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> @@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
>  		}
>  	}
>  
> +	/* ... and ensure ppgtt mapping exist if needed. */

Nitpick: the "... and" in this move comment looks a bit stale with the
previously preceeding comment no longer in the same scope ;-)

> +	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> +		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> +				       obj, obj->cache_level);
> +
> +		obj->has_aliasing_ppgtt_mapping = 1;
> +	}
> +
>  	entry->offset = obj->gtt_offset;
>  	return 0;
> +}
>  
> -err_unpin:
> -	i915_gem_object_unpin(obj);
> -	return ret;
> +static void
> +unpin_and_fence_object(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_gem_exec_object2 *entry;
> +
> +	if (!obj->gtt_space)
> +		return;
> +
> +	entry = obj->exec_entry;
> +
> +	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
> +		i915_gem_object_unpin_fence(obj);
> +
> +	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
> +		i915_gem_object_unpin(obj);
> +
> +	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
>  }
>  
>  static int
> @@ -385,7 +412,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  			    struct drm_file *file,
>  			    struct list_head *objects)
>  {
> -	drm_i915_private_t *dev_priv = ring->dev->dev_private;
>  	struct drm_i915_gem_object *obj;
>  	int ret, retry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> @@ -463,45 +489,13 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  				continue;
>  
>  			ret = pin_and_fence_object(obj, ring);
> -			if (ret) {
> -				int ret_ignore;
> -
> -				/* This can potentially raise a harmless
> -				 * -EINVAL if we failed to bind in the above
> -				 * call. It cannot raise -EINTR since we know
> -				 * that the bo is freshly bound and so will
> -				 * not need to be flushed or waited upon.
> -				 */
> -				ret_ignore = i915_gem_object_unbind(obj);
> -				(void)ret_ignore;
> -				WARN_ON(obj->gtt_space);
> +			if (ret)
>  				break;
> -			}
>  		}
>  
>  		/* Decrement pin count for bound objects */
> -		list_for_each_entry(obj, objects, exec_list) {
> -			struct drm_i915_gem_exec_object2 *entry;
> -
> -			if (!obj->gtt_space)
> -				continue;
> -
> -			entry = obj->exec_entry;
> -			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -				i915_gem_object_unpin_fence(obj);
> -				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
> -			}
> -
> -			i915_gem_object_unpin(obj);
> -
> -			/* ... and ensure ppgtt mapping exist if needed. */
> -			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> -				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> -						       obj, obj->cache_level);
> -
> -				obj->has_aliasing_ppgtt_mapping = 1;
> -			}
> -		}
> +		list_for_each_entry(obj, objects, exec_list)
> +			unpin_and_fence_object(obj);
>  
>  		if (ret != -ENOSPC || retry++)
>  			return ret;
> @@ -512,20 +506,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  	} while (1);
>  
>  err:
> -	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
> -		struct drm_i915_gem_exec_object2 *entry;
> -
> -		if (!obj->gtt_space)
> -			continue;
> -
> -		entry = obj->exec_entry;
> -		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -			i915_gem_object_unpin_fence(obj);
> -			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
> -		}
> -
> -		i915_gem_object_unpin(obj);
> -	}
> +	do {
> +		unpin_and_fence_object(obj);
> +	} while (&(obj = list_entry(obj->exec_list.prev, typeof(*obj), exec_list))->exec_list != objects);

What's the reason here for no longer using the continue_reverse macro? On
a quick glance the new thing seems to do the same ...

>  
>  	return ret;
>  }
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer
  2012-08-24 14:46   ` Daniel Vetter
@ 2012-08-24 16:35     ` Chris Wilson
  2012-08-24 18:18       ` [PATCH] " Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2012-08-24 16:35 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, 24 Aug 2012 16:46:19 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Aug 23, 2012 at 01:12:53PM +0100, Chris Wilson wrote:
> > If we need to stall in order to complete the pin_and_fence operation
> > during execbuffer reservation, there is a high likelihood that the
> > operation will be interrupted by a signal (thanks X!). In order to
> > simplify the cleanup along that error path, the object was
> > unconditionally unbound and the error propagated. However, being
> > interrupted here is far more common than I would like and so we can
> > strive to avoid the extra work by eliminating the forced unbind.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> I've merged the resend of patch 1/3, thanks a lot. But for this one here
> I've found a few tiny things to bitch about. Comments inline below.
> -Daniel
> 
> > ---
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   93 ++++++++++++----------------
> >  1 file changed, 38 insertions(+), 55 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 23aa324..0c5a433 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
> >  	return ret;
> >  }
> >  
> > -#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
> > +#define  __EXEC_OBJECT_HAS_PIN (1<<31)
> > +#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> >  
> >  static int
> >  need_reloc_mappable(struct drm_i915_gem_object *obj)
> > @@ -344,6 +345,7 @@ static int
> >  pin_and_fence_object(struct drm_i915_gem_object *obj,
> >  		     struct intel_ring_buffer *ring)
> >  {
> > +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> >  	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> >  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> >  	bool need_fence, need_mappable;
> > @@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
> >  	if (ret)
> >  		return ret;
> >  
> > +	entry->flags |= __EXEC_OBJECT_HAS_PIN;
> > +
> >  	if (has_fenced_gpu_access) {
> >  		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> >  			ret = i915_gem_object_get_fence(obj);
> >  			if (ret)
> > -				goto err_unpin;
> > +				return ret;
> >  
> >  			if (i915_gem_object_pin_fence(obj))
> >  				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> > @@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
> >  		}
> >  	}
> >  
> > +	/* ... and ensure ppgtt mapping exist if needed. */
> 
> Nitpick: the "... and" in this move comment looks a bit stale with the
> previously preceeding comment no longer in the same scope ;-)

Hey, I thought you were just adding a bit of drama!

> > -	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
> > +	do {
> > +		unpin_and_fence_object(obj);
> > +	} while (&(obj = list_entry(obj->exec_list.prev, typeof(*obj), exec_list))->exec_list != objects);
> 
> What's the reason here for no longer using the continue_reverse macro? On
> a quick glance the new thing seems to do the same ...

Because this is actually list_for_each_entry_from_reverse(), which doesn't
yet exist. We used to unbind the current obj on error so that we could
walk backwards along the list, but now we need to start the reverse walk
from the error object.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer
  2012-08-24 16:35     ` Chris Wilson
@ 2012-08-24 18:18       ` Chris Wilson
  2012-08-24 19:03         ` Daniel Vetter
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2012-08-24 18:18 UTC (permalink / raw)
  To: intel-gfx

If we need to stall in order to complete the pin_and_fence operation
during execbuffer reservation, there is a high likelihood that the
operation will be interrupted by a signal (thanks X!). In order to
simplify the cleanup along that error path, the object was
unconditionally unbound and the error propagated. However, being
interrupted here is far more common than I would like and so we can
strive to avoid the extra work by eliminating the forced unbind.

v2: In discussion over the indecent colour of the new functions and
unwind path, we realised that we can use the new unreserve function to
clean up the code even further.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  114 +++++++++++-----------------
 1 file changed, 45 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dc87563..e6b2205 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
-#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
+#define  __EXEC_OBJECT_HAS_PIN (1<<31)
+#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 
 static int
 need_reloc_mappable(struct drm_i915_gem_object *obj)
@@ -341,9 +342,10 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
 }
 
 static int
-pin_and_fence_object(struct drm_i915_gem_object *obj,
-		     struct intel_ring_buffer *ring)
+i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
+				   struct intel_ring_buffer *ring)
 {
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	bool need_fence, need_mappable;
@@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
+	entry->flags |= __EXEC_OBJECT_HAS_PIN;
+
 	if (has_fenced_gpu_access) {
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 			ret = i915_gem_object_get_fence(obj);
 			if (ret)
-				goto err_unpin;
+				return ret;
 
 			if (i915_gem_object_pin_fence(obj))
 				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
 		}
 	}
 
+	/* Ensure ppgtt mapping exists if needed */
+	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
+		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
+				       obj, obj->cache_level);
+
+		obj->has_aliasing_ppgtt_mapping = 1;
+	}
+
 	entry->offset = obj->gtt_offset;
 	return 0;
+}
 
-err_unpin:
-	i915_gem_object_unpin(obj);
-	return ret;
+static void
+i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_gem_exec_object2 *entry;
+
+	if (!obj->gtt_space)
+		return;
+
+	entry = obj->exec_entry;
+
+	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+		i915_gem_object_unpin_fence(obj);
+
+	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
+		i915_gem_object_unpin(obj);
+
+	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
 
 static int
@@ -385,11 +412,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
 			    struct list_head *objects)
 {
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_object *obj;
-	int ret, retry;
-	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	struct list_head ordered_objects;
+	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+	int retry;
 
 	INIT_LIST_HEAD(&ordered_objects);
 	while (!list_empty(objects)) {
@@ -427,12 +453,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	 * 2.  Bind new objects.
 	 * 3.  Decrement pin count.
 	 *
-	 * This avoid unnecessary unbinding of later objects in order to makr
+	 * This avoid unnecessary unbinding of later objects in order to make
 	 * room for the earlier objects *unless* we need to defragment.
 	 */
 	retry = 0;
 	do {
-		ret = 0;
+		int ret = 0;
 
 		/* Unbind any ill-fitting objects or pin. */
 		list_for_each_entry(obj, objects, exec_list) {
@@ -452,7 +478,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    (need_mappable && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
-				ret = pin_and_fence_object(obj, ring);
+				ret = i915_gem_execbuffer_reserve_object(obj, ring);
 			if (ret)
 				goto err;
 		}
@@ -462,46 +488,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			if (obj->gtt_space)
 				continue;
 
-			ret = pin_and_fence_object(obj, ring);
-			if (ret) {
-				int ret_ignore;
-
-				/* This can potentially raise a harmless
-				 * -EINVAL if we failed to bind in the above
-				 * call. It cannot raise -EINTR since we know
-				 * that the bo is freshly bound and so will
-				 * not need to be flushed or waited upon.
-				 */
-				ret_ignore = i915_gem_object_unbind(obj);
-				(void)ret_ignore;
-				WARN_ON(obj->gtt_space);
-				break;
-			}
+			ret = i915_gem_execbuffer_reserve_object(obj, ring);
+			if (ret)
+				goto err;
 		}
 
-		/* Decrement pin count for bound objects */
-		list_for_each_entry(obj, objects, exec_list) {
-			struct drm_i915_gem_exec_object2 *entry;
-
-			if (!obj->gtt_space)
-				continue;
-
-			entry = obj->exec_entry;
-			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-				i915_gem_object_unpin_fence(obj);
-				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
-			}
-
-			i915_gem_object_unpin(obj);
-
-			/* ... and ensure ppgtt mapping exist if needed. */
-			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
-				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-						       obj, obj->cache_level);
-
-				obj->has_aliasing_ppgtt_mapping = 1;
-			}
-		}
+err:		/* Decrement pin count for bound objects */
+		list_for_each_entry(obj, objects, exec_list)
+			i915_gem_execbuffer_unreserve_object(obj);
 
 		if (ret != -ENOSPC || retry++)
 			return ret;
@@ -510,24 +504,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		if (ret)
 			return ret;
 	} while (1);
-
-err:
-	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
-		struct drm_i915_gem_exec_object2 *entry;
-
-		if (!obj->gtt_space)
-			continue;
-
-		entry = obj->exec_entry;
-		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-			i915_gem_object_unpin_fence(obj);
-			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
-		}
-
-		i915_gem_object_unpin(obj);
-	}
-
-	return ret;
 }
 
 static int
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer
  2012-08-24 18:18       ` [PATCH] " Chris Wilson
@ 2012-08-24 19:03         ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2012-08-24 19:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Aug 24, 2012 at 07:18:18PM +0100, Chris Wilson wrote:
> If we need to stall in order to complete the pin_and_fence operation
> during execbuffer reservation, there is a high likelihood that the
> operation will be interrupted by a signal (thanks X!). In order to
> simplify the cleanup along that error path, the object was
> unconditionally unbound and the error propagated. However, being
> interrupted here is far more common than I would like and so we can
> strive to avoid the extra work by eliminating the forced unbind.
> 
> v2: In discussion over the indecent colour of the new functions and
> unwind path, we realised that we can use the new unreserve function to
> clean up the code even further.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Nice colours, merged to dinq.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  114 +++++++++++-----------------
>  1 file changed, 45 insertions(+), 69 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index dc87563..e6b2205 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
>  	return ret;
>  }
>  
> -#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
> +#define  __EXEC_OBJECT_HAS_PIN (1<<31)
> +#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
>  
>  static int
>  need_reloc_mappable(struct drm_i915_gem_object *obj)
> @@ -341,9 +342,10 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
>  }
>  
>  static int
> -pin_and_fence_object(struct drm_i915_gem_object *obj,
> -		     struct intel_ring_buffer *ring)
> +i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
> +				   struct intel_ring_buffer *ring)
>  {
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
>  	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>  	bool need_fence, need_mappable;
> @@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
>  	if (ret)
>  		return ret;
>  
> +	entry->flags |= __EXEC_OBJECT_HAS_PIN;
> +
>  	if (has_fenced_gpu_access) {
>  		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
>  			ret = i915_gem_object_get_fence(obj);
>  			if (ret)
> -				goto err_unpin;
> +				return ret;
>  
>  			if (i915_gem_object_pin_fence(obj))
>  				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> @@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
>  		}
>  	}
>  
> +	/* Ensure ppgtt mapping exists if needed */
> +	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> +		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> +				       obj, obj->cache_level);
> +
> +		obj->has_aliasing_ppgtt_mapping = 1;
> +	}
> +
>  	entry->offset = obj->gtt_offset;
>  	return 0;
> +}
>  
> -err_unpin:
> -	i915_gem_object_unpin(obj);
> -	return ret;
> +static void
> +i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_gem_exec_object2 *entry;
> +
> +	if (!obj->gtt_space)
> +		return;
> +
> +	entry = obj->exec_entry;
> +
> +	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
> +		i915_gem_object_unpin_fence(obj);
> +
> +	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
> +		i915_gem_object_unpin(obj);
> +
> +	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
>  }
>  
>  static int
> @@ -385,11 +412,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  			    struct drm_file *file,
>  			    struct list_head *objects)
>  {
> -	drm_i915_private_t *dev_priv = ring->dev->dev_private;
>  	struct drm_i915_gem_object *obj;
> -	int ret, retry;
> -	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>  	struct list_head ordered_objects;
> +	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> +	int retry;
>  
>  	INIT_LIST_HEAD(&ordered_objects);
>  	while (!list_empty(objects)) {
> @@ -427,12 +453,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  	 * 2.  Bind new objects.
>  	 * 3.  Decrement pin count.
>  	 *
> -	 * This avoid unnecessary unbinding of later objects in order to makr
> +	 * This avoid unnecessary unbinding of later objects in order to make
>  	 * room for the earlier objects *unless* we need to defragment.
>  	 */
>  	retry = 0;
>  	do {
> -		ret = 0;
> +		int ret = 0;
>  
>  		/* Unbind any ill-fitting objects or pin. */
>  		list_for_each_entry(obj, objects, exec_list) {
> @@ -452,7 +478,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  			    (need_mappable && !obj->map_and_fenceable))
>  				ret = i915_gem_object_unbind(obj);
>  			else
> -				ret = pin_and_fence_object(obj, ring);
> +				ret = i915_gem_execbuffer_reserve_object(obj, ring);
>  			if (ret)
>  				goto err;
>  		}
> @@ -462,46 +488,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  			if (obj->gtt_space)
>  				continue;
>  
> -			ret = pin_and_fence_object(obj, ring);
> -			if (ret) {
> -				int ret_ignore;
> -
> -				/* This can potentially raise a harmless
> -				 * -EINVAL if we failed to bind in the above
> -				 * call. It cannot raise -EINTR since we know
> -				 * that the bo is freshly bound and so will
> -				 * not need to be flushed or waited upon.
> -				 */
> -				ret_ignore = i915_gem_object_unbind(obj);
> -				(void)ret_ignore;
> -				WARN_ON(obj->gtt_space);
> -				break;
> -			}
> +			ret = i915_gem_execbuffer_reserve_object(obj, ring);
> +			if (ret)
> +				goto err;
>  		}
>  
> -		/* Decrement pin count for bound objects */
> -		list_for_each_entry(obj, objects, exec_list) {
> -			struct drm_i915_gem_exec_object2 *entry;
> -
> -			if (!obj->gtt_space)
> -				continue;
> -
> -			entry = obj->exec_entry;
> -			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -				i915_gem_object_unpin_fence(obj);
> -				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
> -			}
> -
> -			i915_gem_object_unpin(obj);
> -
> -			/* ... and ensure ppgtt mapping exist if needed. */
> -			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> -				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> -						       obj, obj->cache_level);
> -
> -				obj->has_aliasing_ppgtt_mapping = 1;
> -			}
> -		}
> +err:		/* Decrement pin count for bound objects */
> +		list_for_each_entry(obj, objects, exec_list)
> +			i915_gem_execbuffer_unreserve_object(obj);
>  
>  		if (ret != -ENOSPC || retry++)
>  			return ret;
> @@ -510,24 +504,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  		if (ret)
>  			return ret;
>  	} while (1);
> -
> -err:
> -	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
> -		struct drm_i915_gem_exec_object2 *entry;
> -
> -		if (!obj->gtt_space)
> -			continue;
> -
> -		entry = obj->exec_entry;
> -		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -			i915_gem_object_unpin_fence(obj);
> -			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
> -		}
> -
> -		i915_gem_object_unpin(obj);
> -	}
> -
> -	return ret;
>  }
>  
>  static int
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-08-24 19:03 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-23 12:12 [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Chris Wilson
2012-08-23 12:12 ` [PATCH 2/3] drm/i915: Use cpu relocations if the object is in the GTT but not mappable Chris Wilson
2012-08-24  0:29   ` Daniel Vetter
2012-08-23 12:12 ` [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer Chris Wilson
2012-08-24 14:46   ` Daniel Vetter
2012-08-24 16:35     ` Chris Wilson
2012-08-24 18:18       ` [PATCH] " Chris Wilson
2012-08-24 19:03         ` Daniel Vetter
2012-08-24  0:28 ` [PATCH 1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl Daniel Vetter
2012-08-24  8:35   ` [PATCH 1/2] drm/i915: Juggle code order to ease flow of the next patch Chris Wilson
2012-08-24  8:35     ` [PATCH 2/2] drm/i915: Use a non-blocking wait for set-to-domain ioctl Chris Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).