intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
@ 2011-07-09  8:25 Chris Wilson
  2011-07-09 10:23 ` Paul Menzel
  2011-11-23 10:49 ` Daniel Vetter
  0 siblings, 2 replies; 13+ messages in thread
From: Chris Wilson @ 2011-07-09  8:25 UTC (permalink / raw)
  To: intel-gfx

In order to correctly account for reserving space in the GTT and fences
for a batch buffer, we need to independently track whether the fence is
pinned due to a fenced GPU access in the batch from from whether the
buffer is pinned in the aperture. Currently we count the fenced as
pinned if the buffer has already been seen in the execbuffer. This leads
to a false accounting of available fence registers, causing frequent
mass evictions. Worse, if coupled with the change to make
i915_gem_object_get_fence() report EDADLK upon fence starvation, the
batchbuffer can fail with only one fence required...

Fixes intel-gpu-tools/tests/gem_fenced_exec_thrash

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38735
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Tested-by: Paul Neumann <paul104x@yahoo.de>
---
 drivers/gpu/drm/i915/i915_drv.h            |   18 ++++
 drivers/gpu/drm/i915/i915_gem.c            |    7 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  139 ++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_display.c       |   21 ++++-
 4 files changed, 131 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 00dc59a..2798d27 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -798,6 +798,9 @@ struct drm_i915_gem_object {
 	unsigned int pin_count : 4;
 #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 
+	unsigned int fence_pin_count : 3;
+#define DRM_I915_GEM_OBJECT_MAX_FENCE_PIN_COUNT 0x7
+
 	/**
 	 * Is the object at the current location in the gtt mappable and
 	 * fenceable? Used to avoid costly recalculations.
@@ -1160,7 +1163,22 @@ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 					   struct intel_ring_buffer *pipelined);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
+static inline void
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE)
+		obj->fence_pin_count++;
+}
+
+static inline void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE)
+		obj->fence_pin_count--;
+}
+
 void i915_gem_retire_requests(struct drm_device *dev);
+
 void i915_gem_reset(struct drm_device *dev);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e9d1d5c..e357c73 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1843,6 +1843,7 @@ static void i915_gem_reset_fences(struct drm_device *dev)
 			i915_gem_release_mmap(obj);
 
 		reg->obj->fence_reg = I915_FENCE_REG_NONE;
+		reg->obj->fence_pin_count = 0;
 		reg->obj->fenced_gpu_access = false;
 		reg->obj->last_fenced_seqno = 0;
 		reg->obj->last_fenced_ring = NULL;
@@ -2516,6 +2517,8 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
+	WARN_ON(obj->fence_pin_count);
+
 	if (obj->tiling_mode)
 		i915_gem_release_mmap(obj);
 
@@ -2549,7 +2552,7 @@ i915_find_fence_reg(struct drm_device *dev,
 		if (!reg->obj)
 			return reg;
 
-		if (!reg->obj->pin_count)
+		if (!reg->obj->fence_pin_count)
 			avail = reg;
 	}
 
@@ -2559,7 +2562,7 @@ i915_find_fence_reg(struct drm_device *dev,
 	/* None available, try to steal one or wait for a user to finish */
 	avail = first = NULL;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-		if (reg->obj->pin_count)
+		if (reg->obj->fence_pin_count)
 			continue;
 
 		if (first == NULL)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4934cf8..1fea1cd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -460,6 +460,54 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
+#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
+
+static int
+pin_and_fence_object(struct drm_i915_gem_object *obj,
+		     struct intel_ring_buffer *ring)
+{
+	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+	bool need_fence, need_mappable;
+	int ret;
+
+	need_fence =
+		has_fenced_gpu_access &&
+		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+		obj->tiling_mode != I915_TILING_NONE;
+	need_mappable =
+		entry->relocation_count ? true : need_fence;
+
+	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
+	if (ret)
+		return ret;
+
+	if (has_fenced_gpu_access) {
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			if (obj->tiling_mode) {
+				ret = i915_gem_object_get_fence(obj, ring);
+				if (ret)
+					goto err_unpin;
+
+				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+				i915_gem_object_pin_fence(obj);
+			} else {
+				ret = i915_gem_object_put_fence(obj);
+				if (ret)
+					goto err_unpin;
+			}
+		}
+		obj->pending_fenced_gpu_access = need_fence;
+	}
+
+	entry->offset = obj->gtt_offset;
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin(obj);
+	return ret;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
@@ -517,6 +565,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		list_for_each_entry(obj, objects, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 			bool need_fence, need_mappable;
+
 			if (!obj->gtt_space)
 				continue;
 
@@ -531,58 +580,47 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    (need_mappable && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
+				ret = pin_and_fence_object(obj, ring);
 			if (ret)
 				goto err;
-
-			entry++;
 		}
 
 		/* Bind fresh objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-			bool need_fence;
-
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-
-			if (!obj->gtt_space) {
-				bool need_mappable =
-					entry->relocation_count ? true : need_fence;
-
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
-				if (ret)
-					break;
-			}
+			if (obj->gtt_space)
+				continue;
 
-			if (has_fenced_gpu_access) {
-				if (need_fence) {
-					ret = i915_gem_object_get_fence(obj, ring);
-					if (ret)
-						break;
-				} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-					   obj->tiling_mode == I915_TILING_NONE) {
-					/* XXX pipelined! */
-					ret = i915_gem_object_put_fence(obj);
-					if (ret)
-						break;
-				}
-				obj->pending_fenced_gpu_access = need_fence;
+			ret = pin_and_fence_object(obj, ring);
+			if (ret) {
+				int ret_ignore;
+
+				/* This can potentially raise a harmless
+				 * -EINVAL if we failed to bind in the above
+				 * call. It cannot raise -EINTR since we know
+				 * that the bo is freshly bound and so will
+				 * not need to be flushed or waited upon.
+				 */
+				ret_ignore = i915_gem_object_unbind(obj);
+				(void)ret_ignore;
+				WARN_ON(obj->gtt_space);
+				break;
 			}
-
-			entry->offset = obj->gtt_offset;
 		}
 
 		/* Decrement pin count for bound objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			if (obj->gtt_space)
-				i915_gem_object_unpin(obj);
+			struct drm_i915_gem_exec_object2 *entry;
+
+			if (!obj->gtt_space)
+				continue;
+
+			entry = obj->exec_entry;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+				i915_gem_object_unpin_fence(obj);
+				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+			}
+
+			i915_gem_object_unpin(obj);
 		}
 
 		if (ret != -ENOSPC || retry > 1)
@@ -599,16 +637,19 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	} while (1);
 
 err:
-	obj = list_entry(obj->exec_list.prev,
-			 struct drm_i915_gem_object,
-			 exec_list);
-	while (objects != &obj->exec_list) {
-		if (obj->gtt_space)
-			i915_gem_object_unpin(obj);
+	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry;
+
+		if (!obj->gtt_space)
+			continue;
+
+		entry = obj->exec_entry;
+		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+			i915_gem_object_unpin_fence(obj);
+			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+		}
 
-		obj = list_entry(obj->exec_list.prev,
-				 struct drm_i915_gem_object,
-				 exec_list);
+		i915_gem_object_unpin(obj);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b5b15bd..60496c4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1904,6 +1904,8 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 		ret = i915_gem_object_get_fence(obj, pipelined);
 		if (ret)
 			goto err_unpin;
+
+		i915_gem_object_pin_fence(obj);
 	}
 
 	dev_priv->mm.interruptible = true;
@@ -2140,14 +2142,22 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
 					 LEAVE_ATOMIC_MODE_SET);
 	if (ret) {
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
 	}
 
 	if (old_fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(old_fb)->obj;
+
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
-		i915_gem_object_unpin(to_intel_framebuffer(old_fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 	}
 
 	mutex_unlock(&dev->struct_mutex);
@@ -3141,8 +3151,12 @@ static void intel_crtc_disable(struct drm_crtc *crtc)
 	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 	if (crtc->fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
 		mutex_lock(&dev->struct_mutex);
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 	}
 }
@@ -6411,6 +6425,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 		container_of(__work, struct intel_unpin_work, work);
 
 	mutex_lock(&work->dev->struct_mutex);
+	i915_gem_object_unpin_fence(work->old_fb_obj);
 	i915_gem_object_unpin(work->old_fb_obj);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 	drm_gem_object_unreference(&work->old_fb_obj->base);
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-07-09  8:25 [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting Chris Wilson
@ 2011-07-09 10:23 ` Paul Menzel
  2011-07-09 10:32   ` Chris Wilson
  2011-11-23 10:49 ` Daniel Vetter
  1 sibling, 1 reply; 13+ messages in thread
From: Paul Menzel @ 2011-07-09 10:23 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1222 bytes --]

Am Samstag, den 09.07.2011, 09:25 +0100 schrieb Chris Wilson:

Whoever pushes this, please correct

s/Seperate/Separate/

in the commit summary.

> In order to correctly account for reserving space in the GTT and fences
> for a batch buffer, we need to independently track whether the fence is
> pinned due to a fenced GPU access in the batch from from whether the
> buffer is pinned in the aperture. Currently we count the fenced as

»the fenced« sounds strange. Probably I need to read up the code to
grasp that. Or is the »d« at the end a typo?

> pinned if the buffer has already been seen in the execbuffer. This leads
> to a false accounting of available fence registers, causing frequent
> mass evictions. Worse, if coupled with the change to make
> i915_gem_object_get_fence() report EDADLK upon fence starvation, the
> batchbuffer can fail with only one fence required...
> 
> Fixes intel-gpu-tools/tests/gem_fenced_exec_thrash
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38735
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Tested-by: Paul Neumann <paul104x@yahoo.de>

[…]


Thanks,

Paul

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-07-09 10:23 ` Paul Menzel
@ 2011-07-09 10:32   ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2011-07-09 10:32 UTC (permalink / raw)
  To: Paul Menzel, intel-gfx

[-- Attachment #1: Type: text/plain, Size: 882 bytes --]

On Sat, 09 Jul 2011 12:23:02 +0200, Paul Menzel <paulepanter@users.sourceforge.net> wrote:
> Am Samstag, den 09.07.2011, 09:25 +0100 schrieb Chris Wilson:
> 
> Whoever pushes this, please correct
> 
> s/Seperate/Separate/
> 
> in the commit summary.

Yes, I forgot to fix the typo again.

> 
> > In order to correctly account for reserving space in the GTT and fences
> > for a batch buffer, we need to independently track whether the fence is
> > pinned due to a fenced GPU access in the batch from from whether the
> > buffer is pinned in the aperture. Currently we count the fenced as
> 
> »the fenced« sounds strange. Probably I need to read up the code to
> grasp that. Or is the »d« at the end a typo?

I was probably thinking of a fenced bo at the time, but the sentence reads
correctly with s/fenced/fence/.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-07-09  8:25 [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting Chris Wilson
  2011-07-09 10:23 ` Paul Menzel
@ 2011-11-23 10:49 ` Daniel Vetter
  1 sibling, 0 replies; 13+ messages in thread
From: Daniel Vetter @ 2011-11-23 10:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sat, Jul 09, 2011 at 09:25:04AM +0100, Chris Wilson wrote:
> In order to correctly account for reserving space in the GTT and fences
> for a batch buffer, we need to independently track whether the fence is
> pinned due to a fenced GPU access in the batch from from whether the
> buffer is pinned in the aperture. Currently we count the fenced as
> pinned if the buffer has already been seen in the execbuffer. This leads
> to a false accounting of available fence registers, causing frequent
> mass evictions. Worse, if coupled with the change to make
> i915_gem_object_get_fence() report EDADLK upon fence starvation, the
> batchbuffer can fail with only one fence required...
> 
> Fixes intel-gpu-tools/tests/gem_fenced_exec_thrash
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38735
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Tested-by: Paul Neumann <paul104x@yahoo.de>

I'm voting for an intel_unpin_fb_obj for symmetry (and so Jesse can't
botch the sprite code) and maybe a paranoid WARN on if we put a pinned
fence. But still

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-11-13 11:00 ` Chris Wilson
  2011-11-13 11:21   ` Paul Menzel
@ 2011-11-23 12:38   ` Daniel Vetter
  1 sibling, 0 replies; 13+ messages in thread
From: Daniel Vetter @ 2011-11-23 12:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, stable

On Sun, Nov 13, 2011 at 11:00:22AM +0000, Chris Wilson wrote:
> In order to correctly account for reserving space in the GTT and fences
> for a batch buffer, we need to independently track whether the fence is
> pinned due to a fenced GPU access in the batch from from whether the
> buffer is pinned in the aperture. Currently we count the fenced as
> pinned if the buffer has already been seen in the execbuffer. This leads
> to a false accounting of available fence registers, causing frequent
> mass evictions. Worse, if coupled with the change to make
> i915_gem_object_get_fence() report EDADLK upon fence starvation, the
> batchbuffer can fail with only one fence required...
> 
> Fixes intel-gpu-tools/tests/gem_fenced_exec_thrash
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38735
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Tested-by: Paul Neumann <paul104x@yahoo.de>
> Cc: stable@kernel.org

How embarassing, I've re-reviewed the wrong patch without noticing.
Anyway, still looks good and I like the pin_count moved to the fence quite
a bit more ...
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-11-13 11:00 ` Chris Wilson
@ 2011-11-13 11:21   ` Paul Menzel
  2011-11-23 12:38   ` Daniel Vetter
  1 sibling, 0 replies; 13+ messages in thread
From: Paul Menzel @ 2011-11-13 11:21 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 107 bytes --]

Am Sonntag, den 13.11.2011, 11:00 +0000 schrieb Chris Wilson:

sep*a*rate

[…]


Thanks,

Paul

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-06-04  8:55 Chris Wilson
  2011-06-04 17:38 ` Keith Packard
  2011-06-05 20:55 ` Daniel Vetter
@ 2011-11-13 11:00 ` Chris Wilson
  2011-11-13 11:21   ` Paul Menzel
  2011-11-23 12:38   ` Daniel Vetter
  2 siblings, 2 replies; 13+ messages in thread
From: Chris Wilson @ 2011-11-13 11:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

In order to correctly account for reserving space in the GTT and fences
for a batch buffer, we need to independently track whether the fence is
pinned due to a fenced GPU access in the batch from from whether the
buffer is pinned in the aperture. Currently we count the fenced as
pinned if the buffer has already been seen in the execbuffer. This leads
to a false accounting of available fence registers, causing frequent
mass evictions. Worse, if coupled with the change to make
i915_gem_object_get_fence() report EDADLK upon fence starvation, the
batchbuffer can fail with only one fence required...

Fixes intel-gpu-tools/tests/gem_fenced_exec_thrash

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38735
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Tested-by: Paul Neumann <paul104x@yahoo.de>
Cc: stable@kernel.org
---

v2: Even though Daniel accepted version 1 many months, he suggested that
the pin counting for the fences should be kept on the fence object rather
than stashed inside the bo.

---
 drivers/gpu/drm/i915/i915_drv.h            |   19 ++++
 drivers/gpu/drm/i915/i915_gem.c            |    5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  139 ++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_display.c       |   21 ++++-
 4 files changed, 130 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fe4b680..0eae584 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -134,6 +134,7 @@ struct drm_i915_fence_reg {
 	struct list_head lru_list;
 	struct drm_i915_gem_object *obj;
 	uint32_t setup_seqno;
+	int pin_count;
 };
 
 struct sdvo_device_mapping {
@@ -1158,6 +1159,24 @@ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 					   struct intel_ring_buffer *pipelined);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
+static inline void
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		dev_priv->fence_regs[obj->fence_reg].pin_count++;
+	}
+}
+
+static inline void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		dev_priv->fence_regs[obj->fence_reg].pin_count--;
+	}
+}
+
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_reset(struct drm_device *dev);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ebaa0f0..694d3e4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2207,7 +2207,7 @@ i915_find_fence_reg(struct drm_device *dev,
 		if (!reg->obj)
 			return reg;
 
-		if (!reg->obj->pin_count)
+		if (!reg->pin_count)
 			avail = reg;
 	}
 
@@ -2217,7 +2217,7 @@ i915_find_fence_reg(struct drm_device *dev,
 	/* None available, try to steal one or wait for a user to finish */
 	avail = first = NULL;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-		if (reg->obj->pin_count)
+		if (reg->pin_count)
 			continue;
 
 		if (first == NULL)
@@ -2411,6 +2411,7 @@ i915_gem_clear_fence_reg(struct drm_device *dev,
 	list_del_init(&reg->lru_list);
 	reg->obj = NULL;
 	reg->setup_seqno = 0;
+	reg->pin_count = 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 926ed48..c918124 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -460,6 +460,54 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
+#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
+
+static int
+pin_and_fence_object(struct drm_i915_gem_object *obj,
+		     struct intel_ring_buffer *ring)
+{
+	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+	bool need_fence, need_mappable;
+	int ret;
+
+	need_fence =
+		has_fenced_gpu_access &&
+		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+		obj->tiling_mode != I915_TILING_NONE;
+	need_mappable =
+		entry->relocation_count ? true : need_fence;
+
+	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
+	if (ret)
+		return ret;
+
+	if (has_fenced_gpu_access) {
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			if (obj->tiling_mode) {
+				ret = i915_gem_object_get_fence(obj, ring);
+				if (ret)
+					goto err_unpin;
+
+				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+				i915_gem_object_pin_fence(obj);
+			} else {
+				ret = i915_gem_object_put_fence(obj);
+				if (ret)
+					goto err_unpin;
+			}
+		}
+		obj->pending_fenced_gpu_access = need_fence;
+	}
+
+	entry->offset = obj->gtt_offset;
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin(obj);
+	return ret;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
@@ -517,6 +565,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		list_for_each_entry(obj, objects, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 			bool need_fence, need_mappable;
+
 			if (!obj->gtt_space)
 				continue;
 
@@ -531,58 +580,47 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    (need_mappable && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
+				ret = pin_and_fence_object(obj, ring);
 			if (ret)
 				goto err;
-
-			entry++;
 		}
 
 		/* Bind fresh objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-			bool need_fence;
-
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-
-			if (!obj->gtt_space) {
-				bool need_mappable =
-					entry->relocation_count ? true : need_fence;
-
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
-				if (ret)
-					break;
-			}
+			if (obj->gtt_space)
+				continue;
 
-			if (has_fenced_gpu_access) {
-				if (need_fence) {
-					ret = i915_gem_object_get_fence(obj, ring);
-					if (ret)
-						break;
-				} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-					   obj->tiling_mode == I915_TILING_NONE) {
-					/* XXX pipelined! */
-					ret = i915_gem_object_put_fence(obj);
-					if (ret)
-						break;
-				}
-				obj->pending_fenced_gpu_access = need_fence;
+			ret = pin_and_fence_object(obj, ring);
+			if (ret) {
+				int ret_ignore;
+
+				/* This can potentially raise a harmless
+				 * -EINVAL if we failed to bind in the above
+				 * call. It cannot raise -EINTR since we know
+				 * that the bo is freshly bound and so will
+				 * not need to be flushed or waited upon.
+				 */
+				ret_ignore = i915_gem_object_unbind(obj);
+				(void)ret_ignore;
+				WARN_ON(obj->gtt_space);
+				break;
 			}
-
-			entry->offset = obj->gtt_offset;
 		}
 
 		/* Decrement pin count for bound objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			if (obj->gtt_space)
-				i915_gem_object_unpin(obj);
+			struct drm_i915_gem_exec_object2 *entry;
+
+			if (!obj->gtt_space)
+				continue;
+
+			entry = obj->exec_entry;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+				i915_gem_object_unpin_fence(obj);
+				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+			}
+
+			i915_gem_object_unpin(obj);
 		}
 
 		if (ret != -ENOSPC || retry > 1)
@@ -599,16 +637,19 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	} while (1);
 
 err:
-	obj = list_entry(obj->exec_list.prev,
-			 struct drm_i915_gem_object,
-			 exec_list);
-	while (objects != &obj->exec_list) {
-		if (obj->gtt_space)
-			i915_gem_object_unpin(obj);
+	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry;
+
+		if (!obj->gtt_space)
+			continue;
+
+		entry = obj->exec_entry;
+		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+			i915_gem_object_unpin_fence(obj);
+			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+		}
 
-		obj = list_entry(obj->exec_list.prev,
-				 struct drm_i915_gem_object,
-				 exec_list);
+		i915_gem_object_unpin(obj);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9fa342e..a79b8a1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2004,6 +2004,8 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 		ret = i915_gem_object_get_fence(obj, pipelined);
 		if (ret)
 			goto err_unpin;
+
+		i915_gem_object_pin_fence(obj);
 	}
 
 	dev_priv->mm.interruptible = true;
@@ -2247,15 +2249,23 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
 					 LEAVE_ATOMIC_MODE_SET);
 	if (ret) {
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 		DRM_ERROR("failed to update base address\n");
 		return ret;
 	}
 
 	if (old_fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(old_fb)->obj;
+
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
-		i915_gem_object_unpin(to_intel_framebuffer(old_fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 	}
 
 	mutex_unlock(&dev->struct_mutex);
@@ -3314,8 +3324,12 @@ static void intel_crtc_disable(struct drm_crtc *crtc)
 	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 	if (crtc->fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
 		mutex_lock(&dev->struct_mutex);
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 	}
 }
@@ -6859,6 +6873,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 		container_of(__work, struct intel_unpin_work, work);
 
 	mutex_lock(&work->dev->struct_mutex);
+	i915_gem_object_unpin_fence(work->old_fb_obj);
 	i915_gem_object_unpin(work->old_fb_obj);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 	drm_gem_object_unreference(&work->old_fb_obj->base);
-- 
1.7.7.2

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-06-04  8:55 Chris Wilson
  2011-06-04 17:38 ` Keith Packard
@ 2011-06-05 20:55 ` Daniel Vetter
  2011-11-13 11:00 ` Chris Wilson
  2 siblings, 0 replies; 13+ messages in thread
From: Daniel Vetter @ 2011-06-05 20:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Sat, Jun 04, 2011 at 09:55:43AM +0100, Chris Wilson wrote:
> +			ret = pin_and_fence_object(obj, ring);
> +			if (ret) {
> +				int ret_ignore;
> +
> +				/* This can potentially raise a harmless
> +				 * -EINVAL if we failed to bind in the above
> +				 * call. It cannot raise -EINTR since we know
> +				 * that the bo is freshly bound and so will
> +				 * not need to be flushed or waited upon.
> +				 */
> +				ret_ignore = i915_gem_object_unbind(obj);
> +				(void)ret_ignore;
> +				WARN_ON(obj->gtt_space);
> +				break;
>  			}

Chris clarified my confusion about this piece of the patch on irc: It's
required to ensure the "valid gtt_space implies that execbuffer_reserve
holds a pin count ref on this object" invariant, which is used later on in
the unwind loop. I think this should be mentioned in the comment. [The
confusion mostly stemmed from the second (slightly different) unwind loop
which is used in an earlier error path.]

I couldn't poke any other holes into this (and I don't have clear ideas
for straightening out the code-flow in execbuffer_reserve) so:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-06-04 18:31   ` Chris Wilson
@ 2011-06-05  1:07     ` Keith Packard
  0 siblings, 0 replies; 13+ messages in thread
From: Keith Packard @ 2011-06-05  1:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter


[-- Attachment #1.1: Type: text/plain, Size: 582 bytes --]

On Sat, 04 Jun 2011 19:31:27 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Yes, we can't apply the EDEADLK patch until we fix the accounting.

Ok, I'll rearrange drm-intel-next then so that the EDEADLK patch occurs
after this fix.

> Speedups on q35 (or equally because I finally noticed the regression of):
>  midori-zoomed            3576.78 -> 2059.74: 1.74x speedup
>  firefox-planet-gnome    14500.77 -> 9523.07: 1.52x speedup

Nice speedup!

I'll try to figure out how it works and review the code for
drm-intel-next.

-- 
keith.packard@intel.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 189 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-06-04 17:38 ` Keith Packard
  2011-06-04 18:31   ` Chris Wilson
@ 2011-06-04 22:18   ` Chris Wilson
  1 sibling, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2011-06-04 22:18 UTC (permalink / raw)
  To: Keith Packard, intel-gfx; +Cc: Daniel Vetter

On Sat, 04 Jun 2011 10:38:07 -0700, Keith Packard <keithp@keithp.com> wrote:
> On Sat,  4 Jun 2011 09:55:43 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> I'm afraid you've completely lost me here. Can you provide a small
> example (libdrm?) program which exhibits the failure so I can follow
> what the problem is?

See intel-gpu-tools/tests/gem_fenced_exec_thrash
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-06-04 17:38 ` Keith Packard
@ 2011-06-04 18:31   ` Chris Wilson
  2011-06-05  1:07     ` Keith Packard
  2011-06-04 22:18   ` Chris Wilson
  1 sibling, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2011-06-04 18:31 UTC (permalink / raw)
  To: Keith Packard, intel-gfx; +Cc: Daniel Vetter


On Sat, 04 Jun 2011 10:38:07 -0700, Keith Packard <keithp@keithp.com> wrote:
> On Sat,  4 Jun 2011 09:55:43 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> I'm afraid you've completely lost me here. Can you provide a small
> example (libdrm?) program which exhibits the failure so I can follow
> what the problem is?

The outline of the test case is (tuned for num_avail_fences):

batch 1 with 16 buffers, all with fenced relocations.
batch 2 with all 16 buffers from batch, but with no fenced relocations,
and 16 fresh buffers with fenced relocations.

At the start of batch 2, we first pin the 16 reused and currently bound
buffers from batch 1, which we do to avoid eviction any of those active
buffers to make room for batch 2. This has the side-effect of also
pinning all 16 fence registers, of which none are going to be used in
batch 2. Then when we try to pin and fence a fresh buffer, we cannot
find an available fence, abort the reservation and evict everything. On
the second pass we are then able to pin and fence, as required, all 32
buffers.

> And, if I understand any of this at all, I should remove the patch to
> return -EDEADLK from i915_gem_object_get_fence as we may run out of
> fence registers even if the client is accounting for them correctly. If
> so, I'll remove that from my list of -fixes patches.

Yes, we can't apply the EDEADLK patch until we fix the accounting.

> As this is a performance optimization, I also expect to see convincing
> benchmark data before this patch could be considered for merging.

Since the problem is that we prematurely run out of fences, we frequently
trigger evict_everything(inactive) and so cause mass evictions and clflush,
and a large performance regression for heavy fenced BLT usage:

Speedups on q35 (or equally because I finally noticed the regression of):
 midori-zoomed            3576.78 -> 2059.74: 1.74x speedup
 firefox-planet-gnome    14500.77 -> 9523.07: 1.52x speedup
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
  2011-06-04  8:55 Chris Wilson
@ 2011-06-04 17:38 ` Keith Packard
  2011-06-04 18:31   ` Chris Wilson
  2011-06-04 22:18   ` Chris Wilson
  2011-06-05 20:55 ` Daniel Vetter
  2011-11-13 11:00 ` Chris Wilson
  2 siblings, 2 replies; 13+ messages in thread
From: Keith Packard @ 2011-06-04 17:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter


[-- Attachment #1.1: Type: text/plain, Size: 1319 bytes --]

On Sat,  4 Jun 2011 09:55:43 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:

> In order to correctly account for reserving space in the GTT and fences
> for a batch buffer, we need to independently track whether the fence is
> pinned due to a fenced GPU access in the batch from from whether the
> buffer is pinned in the aperture. Currently we count the fenced as
> pinned if the buffer has already been seen in the execbuffer. This leads
> to a false accounting of available fence registers, causing frequent
> mass evictions. Worse, if coupled with the change to make
> i915_gem_object_get_fence() report EDADLK upon fence starvation, the
> batchbuffer can fail with only one fence required...

I'm afraid you've completely lost me here. Can you provide a small
example (libdrm?) program which exhibits the failure so I can follow
what the problem is?

And, if I understand any of this at all, I should remove the patch to
return -EDEADLK from i915_gem_object_get_fence as we may run out of
fence registers even if the client is accounting for them correctly. If
so, I'll remove that from my list of -fixes patches.

As this is a performance optimization, I also expect to see convincing
benchmark data before this patch could be considered for merging.

-- 
keith.packard@intel.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 189 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting
@ 2011-06-04  8:55 Chris Wilson
  2011-06-04 17:38 ` Keith Packard
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Chris Wilson @ 2011-06-04  8:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

In order to correctly account for reserving space in the GTT and fences
for a batch buffer, we need to independently track whether the fence is
pinned due to a fenced GPU access in the batch from from whether the
buffer is pinned in the aperture. Currently we count the fenced as
pinned if the buffer has already been seen in the execbuffer. This leads
to a false accounting of available fence registers, causing frequent
mass evictions. Worse, if coupled with the change to make
i915_gem_object_get_fence() report EDADLK upon fence starvation, the
batchbuffer can fail with only one fence required...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |   21 ++++
 drivers/gpu/drm/i915/i915_gem.c            |    7 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  139 ++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_display.c       |   21 ++++-
 4 files changed, 134 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 624b9cc..2776b30 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -766,6 +766,9 @@ struct drm_i915_gem_object {
 	unsigned int pin_count : 4;
 #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 
+	unsigned int fence_pin_count : 3;
+#define DRM_I915_GEM_OBJECT_MAX_FENCE_PIN_COUNT 0x7
+
 	/**
 	 * Is the object at the current location in the gtt mappable and
 	 * fenceable? Used to avoid costly recalculations.
@@ -1171,6 +1174,24 @@ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 					   struct intel_ring_buffer *pipelined);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
+static inline void
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		BUG_ON(obj->fence_pin_count == DRM_I915_GEM_OBJECT_MAX_FENCE_PIN_COUNT);
+		obj->fence_pin_count++;
+	}
+}
+
+static inline void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		BUG_ON(obj->fence_pin_count == 0);
+		obj->fence_pin_count--;
+	}
+}
+
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_reset(struct drm_device *dev);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e3b85f..470240b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2150,6 +2150,7 @@ static void i915_gem_reset_fences(struct drm_device *dev)
 			i915_gem_release_mmap(obj);
 
 		reg->obj->fence_reg = I915_FENCE_REG_NONE;
+		reg->obj->fence_pin_count = 0;
 		reg->obj->fenced_gpu_access = false;
 		reg->obj->last_fenced_seqno = 0;
 		i915_gem_clear_fence_reg(dev, reg);
@@ -2836,6 +2837,8 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
+	BUG_ON(obj->fence_pin_count);
+
 	if (obj->tiling_mode)
 		i915_gem_release_mmap(obj);
 
@@ -2869,7 +2872,7 @@ i915_find_fence_reg(struct drm_device *dev,
 		if (!reg->obj)
 			return reg;
 
-		if (!reg->obj->pin_count)
+		if (!reg->obj->fence_pin_count)
 			avail = reg;
 	}
 
@@ -2879,7 +2882,7 @@ i915_find_fence_reg(struct drm_device *dev,
 	/* None available, try to steal one or wait for a user to finish */
 	avail = first = NULL;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-		if (reg->obj->pin_count)
+		if (reg->obj->fence_pin_count)
 			continue;
 
 		if (first == NULL)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 741bf39..0ffe062 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -460,6 +460,54 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
+#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
+
+static int
+pin_and_fence_object(struct drm_i915_gem_object *obj,
+		     struct intel_ring_buffer *ring)
+{
+	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+	bool need_fence, need_mappable;
+	int ret;
+
+	need_fence =
+		has_fenced_gpu_access &&
+		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+		obj->tiling_mode != I915_TILING_NONE;
+	need_mappable =
+		entry->relocation_count ? true : need_fence;
+
+	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
+	if (ret)
+		return ret;
+
+	if (has_fenced_gpu_access) {
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			if (obj->tiling_mode) {
+				ret = i915_gem_object_get_fence(obj, ring);
+				if (ret)
+					goto err_unpin;
+
+				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+				i915_gem_object_pin_fence(obj);
+			} else {
+				ret = i915_gem_object_put_fence(obj);
+				if (ret)
+					goto err_unpin;
+			}
+		}
+		obj->pending_fenced_gpu_access = need_fence;
+	}
+
+	entry->offset = obj->gtt_offset;
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin(obj);
+	return ret;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
@@ -517,6 +565,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		list_for_each_entry(obj, objects, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 			bool need_fence, need_mappable;
+
 			if (!obj->gtt_space)
 				continue;
 
@@ -531,58 +580,47 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    (need_mappable && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
+				ret = pin_and_fence_object(obj, ring);
 			if (ret)
 				goto err;
-
-			entry++;
 		}
 
 		/* Bind fresh objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-			bool need_fence;
-
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-
-			if (!obj->gtt_space) {
-				bool need_mappable =
-					entry->relocation_count ? true : need_fence;
-
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
-				if (ret)
-					break;
-			}
+			if (obj->gtt_space)
+				continue;
 
-			if (has_fenced_gpu_access) {
-				if (need_fence) {
-					ret = i915_gem_object_get_fence(obj, ring);
-					if (ret)
-						break;
-				} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-					   obj->tiling_mode == I915_TILING_NONE) {
-					/* XXX pipelined! */
-					ret = i915_gem_object_put_fence(obj);
-					if (ret)
-						break;
-				}
-				obj->pending_fenced_gpu_access = need_fence;
+			ret = pin_and_fence_object(obj, ring);
+			if (ret) {
+				int ret_ignore;
+
+				/* This can potentially raise a harmless
+				 * -EINVAL if we failed to bind in the above
+				 * call. It cannot raise -EINTR since we know
+				 * that the bo is freshly bound and so will
+				 * not need to be flushed or waited upon.
+				 */
+				ret_ignore = i915_gem_object_unbind(obj);
+				(void)ret_ignore;
+				WARN_ON(obj->gtt_space);
+				break;
 			}
-
-			entry->offset = obj->gtt_offset;
 		}
 
 		/* Decrement pin count for bound objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			if (obj->gtt_space)
-				i915_gem_object_unpin(obj);
+			struct drm_i915_gem_exec_object2 *entry;
+
+			if (!obj->gtt_space)
+				continue;
+
+			entry = obj->exec_entry;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+				i915_gem_object_unpin_fence(obj);
+				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+			}
+
+			i915_gem_object_unpin(obj);
 		}
 
 		if (ret != -ENOSPC || retry > 1)
@@ -599,16 +637,19 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	} while (1);
 
 err:
-	obj = list_entry(obj->exec_list.prev,
-			 struct drm_i915_gem_object,
-			 exec_list);
-	while (objects != &obj->exec_list) {
-		if (obj->gtt_space)
-			i915_gem_object_unpin(obj);
+	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry;
+
+		if (!obj->gtt_space)
+			continue;
+
+		entry = obj->exec_entry;
+		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+			i915_gem_object_unpin_fence(obj);
+			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+		}
 
-		obj = list_entry(obj->exec_list.prev,
-				 struct drm_i915_gem_object,
-				 exec_list);
+		i915_gem_object_unpin(obj);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d4e79d3..79c3849 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1851,6 +1851,8 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 		ret = i915_gem_object_get_fence(obj, pipelined);
 		if (ret)
 			goto err_unpin;
+
+		i915_gem_object_pin_fence(obj);
 	}
 
 	dev_priv->mm.interruptible = true;
@@ -2000,14 +2002,22 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
 					 LEAVE_ATOMIC_MODE_SET);
 	if (ret) {
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
 	}
 
 	if (old_fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(old_fb)->obj;
+
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
-		i915_gem_object_unpin(to_intel_framebuffer(old_fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 	}
 
 	mutex_unlock(&dev->struct_mutex);
@@ -2895,8 +2905,12 @@ static void intel_crtc_disable(struct drm_crtc *crtc)
 	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 	if (crtc->fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
 		mutex_lock(&dev->struct_mutex);
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 	}
 }
@@ -6116,6 +6130,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 		container_of(__work, struct intel_unpin_work, work);
 
 	mutex_lock(&work->dev->struct_mutex);
+	i915_gem_object_unpin_fence(work->old_fb_obj);
 	i915_gem_object_unpin(work->old_fb_obj);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 	drm_gem_object_unreference(&work->old_fb_obj->base);
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2011-11-23 12:37 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-09  8:25 [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting Chris Wilson
2011-07-09 10:23 ` Paul Menzel
2011-07-09 10:32   ` Chris Wilson
2011-11-23 10:49 ` Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2011-06-04  8:55 Chris Wilson
2011-06-04 17:38 ` Keith Packard
2011-06-04 18:31   ` Chris Wilson
2011-06-05  1:07     ` Keith Packard
2011-06-04 22:18   ` Chris Wilson
2011-06-05 20:55 ` Daniel Vetter
2011-11-13 11:00 ` Chris Wilson
2011-11-13 11:21   ` Paul Menzel
2011-11-23 12:38   ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).