All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/i915: Pin fence for iomap
@ 2017-07-03 10:14 Chris Wilson
  2017-07-03 10:14 ` [PATCH 2/4] drm/i915: Consolidate get_fence with pin_fence Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-03 10:14 UTC (permalink / raw)
  To: intel-gfx

We probably want for the caller to specify whether the fence should be
pinned for their usage, but at the moment all callers do want the
associated fence, or none, so take it on their behalf.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_vma.c                  | 19 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_vma.h                  |  7 +------
 drivers/gpu/drm/i915/selftests/i915_gem_object.c |  8 --------
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 958be0a95960..d7330d3eeab6 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -278,6 +278,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 {
 	void __iomem *ptr;
+	int ret;
 
 	/* Access through the GTT requires the device to be awake. */
 	assert_rpm_wakelock_held(vma->vm->i915);
@@ -301,9 +302,27 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	}
 
 	__i915_vma_pin(vma);
+
+	ret = i915_vma_get_fence(vma);
+	if (ret) {
+		__i915_vma_unpin(vma);
+		return IO_ERR_PTR(ret);
+	}
+	i915_vma_pin_fence(vma);
+
 	return ptr;
 }
 
+void i915_vma_unpin_iomap(struct i915_vma *vma)
+{
+	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
+
+	GEM_BUG_ON(vma->iomap == NULL);
+
+	i915_vma_unpin_fence(vma);
+	i915_vma_unpin(vma);
+}
+
 void i915_vma_unpin_and_release(struct i915_vma **p_vma)
 {
 	struct i915_vma *vma;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4a673fc1a432..15e86e50a825 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -319,12 +319,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
  * Callers must hold the struct_mutex. This function is only valid to be
  * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
  */
-static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
-	GEM_BUG_ON(vma->iomap == NULL);
-	i915_vma_unpin(vma);
-}
+void i915_vma_unpin_iomap(struct i915_vma *vma);
 
 static inline struct page *i915_vma_first_page(struct i915_vma *vma)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 8f011c447e41..1b8774a42e48 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -251,14 +251,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 			return PTR_ERR(io);
 		}
 
-		err = i915_vma_get_fence(vma);
-		if (err) {
-			pr_err("Failed to get fence for partial view: offset=%lu\n",
-			       page);
-			i915_vma_unpin_iomap(vma);
-			return err;
-		}
-
 		iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
 		i915_vma_unpin_iomap(vma);
 
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] drm/i915: Consolidate get_fence with pin_fence
  2017-07-03 10:14 [PATCH 1/4] drm/i915: Pin fence for iomap Chris Wilson
@ 2017-07-03 10:14 ` Chris Wilson
  2017-07-04 14:47   ` Tvrtko Ursulin
  2017-07-03 10:14 ` [PATCH 3/4] drm/i915: Emit pipelined fence changes Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-07-03 10:14 UTC (permalink / raw)
  To: intel-gfx

Following the pattern now used for obj->mm.pages, use just pin_fence and
unpin_fence to control access to the fence registers. I.e. instead of
calling get_fence(); pin_fence(), we now just need to call pin_fence().
This will make it easier to reduce the locking requirements around
fence registers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |  4 ----
 drivers/gpu/drm/i915/i915_gem.c            |  3 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++-----
 drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 27 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_vma.c            |  3 +--
 drivers/gpu/drm/i915/i915_vma.h            | 20 ++++++++------------
 drivers/gpu/drm/i915/intel_display.c       |  3 +--
 7 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0029bb949e90..b08b56a2e680 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3545,10 +3545,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
 	return container_of(vm, struct i915_hw_ppgtt, base);
 }
 
-/* i915_gem_fence_reg.c */
-int __must_check i915_vma_get_fence(struct i915_vma *vma);
-int __must_check i915_vma_put_fence(struct i915_vma *vma);
-
 void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
 void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1b2dfa8bdeef..939a299260e9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1941,7 +1941,7 @@ int i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
-	ret = i915_vma_get_fence(vma);
+	ret = i915_vma_pin_fence(vma);
 	if (ret)
 		goto err_unpin;
 
@@ -1957,6 +1957,7 @@ int i915_gem_fault(struct vm_fault *vmf)
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->mappable);
 
+	i915_vma_unpin_fence(vma);
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 929f275e67aa..22a9f5358322 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -365,12 +365,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		return;
 
 	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		if (unlikely(i915_vma_get_fence(vma))) {
+		if (unlikely(i915_vma_pin_fence(vma))) {
 			i915_vma_unpin(vma);
 			return;
 		}
 
-		if (i915_vma_pin_fence(vma))
+		if (vma->fence)
 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
@@ -384,7 +384,7 @@ __eb_unreserve_vma(struct i915_vma *vma,
 	GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
 
 	if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
-		i915_vma_unpin_fence(vma);
+		__i915_vma_unpin_fence(vma);
 
 	__i915_vma_unpin(vma);
 }
@@ -564,13 +564,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
 
 	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		err = i915_vma_get_fence(vma);
+		err = i915_vma_pin_fence(vma);
 		if (unlikely(err)) {
 			i915_vma_unpin(vma);
 			return err;
 		}
 
-		if (i915_vma_pin_fence(vma))
+		if (vma->fence)
 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 5fe2cd8c8f28..55ac7bc14fce 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -280,8 +280,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
  *
  * 0 on success, negative error code on failure.
  */
-int
-i915_vma_put_fence(struct i915_vma *vma)
+int i915_vma_put_fence(struct i915_vma *vma)
 {
 	struct drm_i915_fence_reg *fence = vma->fence;
 
@@ -299,6 +298,8 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 	struct drm_i915_fence_reg *fence;
 
 	list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
+		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
+
 		if (fence->pin_count)
 			continue;
 
@@ -313,7 +314,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 }
 
 /**
- * i915_vma_get_fence - set up fencing for a vma
+ * i915_vma_pin_fence - set up fencing for a vma
  * @vma: vma to map through a fence reg
  *
  * When mapping objects through the GTT, userspace wants to be able to write
@@ -331,10 +332,11 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
  * 0 on success, negative error code on failure.
  */
 int
-i915_vma_get_fence(struct i915_vma *vma)
+i915_vma_pin_fence(struct i915_vma *vma)
 {
 	struct drm_i915_fence_reg *fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
+	int err;
 
 	/* Note that we revoke fences on runtime suspend. Therefore the user
 	 * must keep the device awake whilst using the fence.
@@ -344,6 +346,8 @@ i915_vma_get_fence(struct i915_vma *vma)
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (vma->fence) {
 		fence = vma->fence;
+		GEM_BUG_ON(fence->vma != vma);
+		fence->pin_count++;
 		if (!fence->dirty) {
 			list_move_tail(&fence->link,
 				       &fence->i915->mm.fence_list);
@@ -353,10 +357,19 @@ i915_vma_get_fence(struct i915_vma *vma)
 		fence = fence_find(vma->vm->i915);
 		if (IS_ERR(fence))
 			return PTR_ERR(fence);
+		fence->pin_count++;
 	} else
 		return 0;
 
-	return fence_update(fence, set);
+	err = fence_update(fence, set);
+	if (err)
+		goto err_unpin;
+
+	return 0;
+
+err_unpin:
+	fence->pin_count--;
+	return err;
 }
 
 /**
@@ -378,6 +391,8 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
 
+		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
+
 		if (fence->vma)
 			i915_gem_release_mmap(fence->vma->obj);
 	}
@@ -399,6 +414,8 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 		struct i915_vma *vma = reg->vma;
 
+		GEM_BUG_ON(vma && vma->fence != reg);
+
 		/*
 		 * Commit delayed tiling changes if we have an object still
 		 * attached to the fence, otherwise just clear the fence.
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d7330d3eeab6..efbfee8eac99 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -303,12 +303,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 	__i915_vma_pin(vma);
 
-	ret = i915_vma_get_fence(vma);
+	ret = i915_vma_pin_fence(vma);
 	if (ret) {
 		__i915_vma_unpin(vma);
 		return IO_ERR_PTR(ret);
 	}
-	i915_vma_pin_fence(vma);
 
 	return ptr;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 15e86e50a825..19f58af4f1bf 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -342,15 +342,13 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
  *
  * True if the vma has a fence, false otherwise.
  */
-static inline bool
-i915_vma_pin_fence(struct i915_vma *vma)
+int i915_vma_pin_fence(struct i915_vma *vma);
+int __must_check i915_vma_put_fence(struct i915_vma *vma);
+
+static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
-	if (vma->fence) {
-		vma->fence->pin_count++;
-		return true;
-	} else
-		return false;
+	GEM_BUG_ON(vma->fence->pin_count <= 0);
+	vma->fence->pin_count--;
 }
 
 /**
@@ -365,10 +363,8 @@ static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
-	if (vma->fence) {
-		GEM_BUG_ON(vma->fence->pin_count <= 0);
-		vma->fence->pin_count--;
-	}
+	if (vma->fence)
+		__i915_vma_unpin_fence(vma);
 }
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4e03ca6c946f..432bbaf460b4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2183,8 +2183,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 		 * something and try to run the system in a "less than optimal"
 		 * mode that matches the user configuration.
 		 */
-		if (i915_vma_get_fence(vma) == 0)
-			i915_vma_pin_fence(vma);
+		i915_vma_pin_fence(vma);
 	}
 
 	i915_vma_get(vma);
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] drm/i915: Emit pipelined fence changes
  2017-07-03 10:14 [PATCH 1/4] drm/i915: Pin fence for iomap Chris Wilson
  2017-07-03 10:14 ` [PATCH 2/4] drm/i915: Consolidate get_fence with pin_fence Chris Wilson
@ 2017-07-03 10:14 ` Chris Wilson
  2017-07-05 10:19   ` Tvrtko Ursulin
  2017-07-03 10:14 ` [PATCH 4/4] drm/i915: Make fence->pin_count atomic to allow unlocked unpinning Chris Wilson
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-07-03 10:14 UTC (permalink / raw)
  To: intel-gfx

Many years ago, long before requests, we tried doing this. We never
quite got it right, but now with requests we have the tracking to do the
job properly!

One of the stall points for gen2/gen3 is the use of fence registers for
GPU operations. There are only a few available, and currently if we
exhaust the available fence register we must stall the GPU between
batches. By pipelining the fence register writes, we can avoid the stall
and continuously emit the batches. The challenge is remembering to wait
for those pipelined LRI before accessing the fence with the CPU, and
that is what our request tracking makes easy.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            |   1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  15 ++-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 193 +++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_fence_reg.h  |   1 +
 drivers/gpu/drm/i915/i915_vma.c            |   1 +
 drivers/gpu/drm/i915/i915_vma.h            |   4 +
 6 files changed, 186 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 939a299260e9..5318e321ce50 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4880,6 +4880,7 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 		fence->i915 = dev_priv;
 		fence->id = i;
 		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+		init_request_active(&fence->pipelined, NULL);
 	}
 	i915_gem_restore_fences(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 22a9f5358322..19347ad0e7ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -365,11 +365,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		return;
 
 	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		if (unlikely(i915_vma_pin_fence(vma))) {
+		if (unlikely(i915_vma_reserve_fence(vma))) {
 			i915_vma_unpin(vma);
 			return;
 		}
 
+		entry->flags &= ~EXEC_OBJECT_ASYNC;
 		if (vma->fence)
 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
@@ -564,12 +565,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
 
 	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		err = i915_vma_pin_fence(vma);
+		err = i915_vma_reserve_fence(vma);
 		if (unlikely(err)) {
 			i915_vma_unpin(vma);
 			return err;
 		}
 
+		entry->flags &= ~EXEC_OBJECT_ASYNC;
 		if (vma->fence)
 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
@@ -1848,6 +1850,12 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		if (unlikely(obj->cache_dirty && !obj->cache_coherent))
 			i915_gem_clflush_object(obj, 0);
 
+		if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+			err = i915_vma_emit_pipelined_fence(vma, eb->request);
+			if (err)
+				return err;
+		}
+
 		err = i915_gem_request_await_object
 			(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
 		if (err)
@@ -1932,9 +1940,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		obj->base.read_domains = 0;
 	}
 	obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
-
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		i915_gem_active_set(&vma->last_fence, req);
 }
 
 static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 55ac7bc14fce..d0cd051c19fd 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -55,10 +55,9 @@
  * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
  */
 
-#define pipelined 0
-
-static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct drm_i915_gem_request *pipelined)
 {
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
@@ -110,11 +109,30 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 		I915_WRITE(fence_reg_hi, upper_32_bits(val));
 		I915_WRITE(fence_reg_lo, lower_32_bits(val));
 		POSTING_READ(fence_reg_lo);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 8);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(3);
+		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
+		*cs++ = 0;
+		*cs++ = i915_mmio_reg_offset(fence_reg_hi);
+		*cs++ = upper_32_bits(val);
+		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
+		*cs++ = lower_32_bits(val);
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct drm_i915_gem_request *pipelined)
 {
 	u32 val;
 
@@ -150,11 +168,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
 
 		I915_WRITE(reg, val);
 		POSTING_READ(reg);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+		*cs++ = val;
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static int i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+				struct i915_vma *vma,
+				struct drm_i915_gem_request *pipelined)
 {
 	u32 val;
 
@@ -182,29 +215,49 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
 
 		I915_WRITE(reg, val);
 		POSTING_READ(reg);
+	} else {
+		u32 *cs;
+
+		cs = intel_ring_begin(pipelined, 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
+		*cs++ = val;
+		*cs++ = MI_NOOP;
+		intel_ring_advance(pipelined, cs);
 	}
+
+	return 0;
 }
 
-static void fence_write(struct drm_i915_fence_reg *fence,
-			struct i915_vma *vma)
+static int fence_write(struct drm_i915_fence_reg *fence,
+		       struct i915_vma *vma,
+		       struct drm_i915_gem_request *rq)
 {
+	int err;
+
 	/* Previous access through the fence register is marshalled by
 	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
 	 * and explicitly managed for internal users.
 	 */
 
 	if (IS_GEN2(fence->i915))
-		i830_write_fence_reg(fence, vma);
+		err = i830_write_fence_reg(fence, vma, rq);
 	else if (IS_GEN3(fence->i915))
-		i915_write_fence_reg(fence, vma);
+		err = i915_write_fence_reg(fence, vma, rq);
 	else
-		i965_write_fence_reg(fence, vma);
+		err = i965_write_fence_reg(fence, vma, rq);
+	if (err)
+		return err;
 
 	/* Access through the fenced region afterwards is
 	 * ordered by the posting reads whilst writing the registers.
 	 */
 
 	fence->dirty = false;
+	return 0;
 }
 
 static int fence_update(struct drm_i915_fence_reg *fence,
@@ -212,17 +265,15 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 {
 	int ret;
 
+	ret = i915_gem_active_retire(&fence->pipelined,
+				     &fence->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
 	if (vma) {
 		if (!i915_vma_is_map_and_fenceable(vma))
 			return -EINVAL;
 
-		if (WARN(!i915_gem_object_get_stride(vma->obj) ||
-			 !i915_gem_object_get_tiling(vma->obj),
-			 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
-			 i915_gem_object_get_stride(vma->obj),
-			 i915_gem_object_get_tiling(vma->obj)))
-			return -EINVAL;
-
 		ret = i915_gem_active_retire(&vma->last_fence,
 					     &vma->obj->base.dev->struct_mutex);
 		if (ret)
@@ -253,7 +304,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	 * to the runtime resume, see i915_gem_restore_fences().
 	 */
 	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
-		fence_write(fence, vma);
+		fence_write(fence, vma, NULL);
 		intel_runtime_pm_put(fence->i915);
 	}
 
@@ -287,6 +338,8 @@ int i915_vma_put_fence(struct i915_vma *vma)
 	if (!fence)
 		return 0;
 
+	GEM_BUG_ON(fence->vma != vma);
+
 	if (fence->pin_count)
 		return -EBUSY;
 
@@ -344,11 +397,16 @@ i915_vma_pin_fence(struct i915_vma *vma)
 	assert_rpm_wakelock_held(vma->vm->i915);
 
 	/* Just update our place in the LRU if our fence is getting reused. */
-	if (vma->fence) {
-		fence = vma->fence;
+	fence = vma->fence;
+	if (fence) {
 		GEM_BUG_ON(fence->vma != vma);
 		fence->pin_count++;
 		if (!fence->dirty) {
+			err = i915_gem_active_retire(&fence->pipelined,
+						     &fence->i915->drm.struct_mutex);
+			if (err)
+				goto err_unpin;
+
 			list_move_tail(&fence->link,
 				       &fence->i915->mm.fence_list);
 			return 0;
@@ -372,6 +430,93 @@ i915_vma_pin_fence(struct i915_vma *vma)
 	return err;
 }
 
+int i915_vma_reserve_fence(struct i915_vma *vma)
+{
+	struct drm_i915_fence_reg *fence;
+
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+	fence = vma->fence;
+	if (!fence) {
+		if (!i915_gem_object_is_tiled(vma->obj))
+			return 0;
+
+		if (!i915_vma_is_map_and_fenceable(vma))
+			return -EINVAL;
+
+		fence = fence_find(vma->vm->i915);
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+
+		vma->fence = fence;
+
+		if (fence->vma) {
+			i915_gem_release_mmap(fence->vma->obj);
+			fence->vma->fence = NULL;
+		}
+		fence->vma = vma;
+		fence->dirty = true;
+	}
+	fence->pin_count++;
+	list_move_tail(&fence->link, &fence->i915->mm.fence_list);
+
+	GEM_BUG_ON(!i915_gem_object_is_tiled(vma->obj));
+	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+	GEM_BUG_ON(vma->node.size != vma->fence_size);
+	GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_alignment));
+
+	return 0;
+}
+
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+				  struct drm_i915_gem_request *rq)
+{
+	struct drm_i915_fence_reg *fence = vma->fence;
+	struct drm_i915_gem_request *prev;
+	int err;
+
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	GEM_BUG_ON(fence && !fence->pin_count);
+
+	if (!fence)
+		goto out;
+
+	prev = i915_gem_active_raw(&fence->pipelined,
+				   &fence->i915->drm.struct_mutex);
+	if (prev) {
+		err = i915_gem_request_await_dma_fence(rq, &prev->fence);
+		if (err)
+			return err;
+	}
+
+	if (!fence->dirty)
+		goto out;
+
+	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+
+	if (fence->vma) {
+		prev = i915_gem_active_raw(&fence->vma->last_fence,
+					   &fence->i915->drm.struct_mutex);
+		if (prev) {
+			err = i915_gem_request_await_dma_fence(rq,
+							       &prev->fence);
+			if (err)
+				return err;
+		}
+	}
+
+	err = fence_write(fence, vma, rq);
+	if (err)
+		return err;
+
+	i915_gem_active_set(&fence->pipelined, rq);
+out:
+	i915_gem_active_set(&vma->last_fence, rq);
+	return 0;
+}
+
 /**
  * i915_gem_revoke_fences - revoke fence state
  * @dev_priv: i915 device private
@@ -429,7 +574,7 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 			vma = NULL;
 		}
 
-		fence_write(reg, vma);
+		fence_write(reg, vma, NULL);
 		reg->vma = vma;
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99a31ded4dfd..ce45972fc5c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -47,6 +47,7 @@ struct drm_i915_fence_reg {
 	 * command (such as BLT on gen2/3), as a "fence".
 	 */
 	bool dirty;
+	struct i915_gem_active pipelined;
 };
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index efbfee8eac99..0c489090d4ab 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -728,6 +728,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		__i915_vma_iounmap(vma);
 		vma->flags &= ~I915_VMA_CAN_FENCE;
 	}
+	GEM_BUG_ON(vma->fence);
 
 	if (likely(!vma->vm->closed)) {
 		trace_i915_vma_unbind(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 19f58af4f1bf..f0dc6eaebeab 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -367,5 +367,9 @@ i915_vma_unpin_fence(struct i915_vma *vma)
 		__i915_vma_unpin_fence(vma);
 }
 
+int __must_check i915_vma_reserve_fence(struct i915_vma *vma);
+int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
+				  struct drm_i915_gem_request *rq);
+
 #endif
 
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] drm/i915: Make fence->pin_count atomic to allow unlocked unpinning
  2017-07-03 10:14 [PATCH 1/4] drm/i915: Pin fence for iomap Chris Wilson
  2017-07-03 10:14 ` [PATCH 2/4] drm/i915: Consolidate get_fence with pin_fence Chris Wilson
  2017-07-03 10:14 ` [PATCH 3/4] drm/i915: Emit pipelined fence changes Chris Wilson
@ 2017-07-03 10:14 ` Chris Wilson
  2017-07-03 11:09 ` ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915: Pin fence for iomap Patchwork
  2017-07-04  9:53 ` [PATCH 1/4] " Tvrtko Ursulin
  4 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-03 10:14 UTC (permalink / raw)
  To: intel-gfx

When we unpin a fence, we do no other operation than mark it available.
This we can do outside of any locking by making the operation atomic.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c    |  2 +-
 drivers/gpu/drm/i915/i915_debugfs.c       |  2 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 15 ++++++++-------
 drivers/gpu/drm/i915/i915_gem_fence_reg.h |  2 +-
 drivers/gpu/drm/i915/i915_vma.h           |  9 ++++++---
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 325618d969fe..c34cd56eb83a 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -196,7 +196,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 	i = 0;
 	list_for_each_safe(pos, q, &dev_priv->mm.fence_list) {
 		reg = list_entry(pos, struct drm_i915_fence_reg, link);
-		if (reg->pin_count || reg->vma)
+		if (atomic_read(&reg->pin_count) || reg->vma)
 			continue;
 		list_del(pos);
 		vgpu->fence.regs[i] = reg;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9fdafed9a601..cee9d2e8832e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -965,7 +965,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 		struct i915_vma *vma = dev_priv->fence_regs[i].vma;
 
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
-			   i, dev_priv->fence_regs[i].pin_count);
+			   i, atomic_read(&dev_priv->fence_regs[i].pin_count));
 		if (!vma)
 			seq_puts(m, "unused");
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d0cd051c19fd..02d487c44a77 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -340,7 +340,7 @@ int i915_vma_put_fence(struct i915_vma *vma)
 
 	GEM_BUG_ON(fence->vma != vma);
 
-	if (fence->pin_count)
+	if (atomic_read(&fence->pin_count))
 		return -EBUSY;
 
 	return fence_update(fence, NULL);
@@ -353,7 +353,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 	list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
 		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
 
-		if (fence->pin_count)
+		if (atomic_read(&fence->pin_count))
 			continue;
 
 		return fence;
@@ -400,7 +400,7 @@ i915_vma_pin_fence(struct i915_vma *vma)
 	fence = vma->fence;
 	if (fence) {
 		GEM_BUG_ON(fence->vma != vma);
-		fence->pin_count++;
+		atomic_inc(&fence->pin_count);
 		if (!fence->dirty) {
 			err = i915_gem_active_retire(&fence->pipelined,
 						     &fence->i915->drm.struct_mutex);
@@ -415,7 +415,8 @@ i915_vma_pin_fence(struct i915_vma *vma)
 		fence = fence_find(vma->vm->i915);
 		if (IS_ERR(fence))
 			return PTR_ERR(fence);
-		fence->pin_count++;
+
+		atomic_inc(&fence->pin_count);
 	} else
 		return 0;
 
@@ -426,7 +427,7 @@ i915_vma_pin_fence(struct i915_vma *vma)
 	return 0;
 
 err_unpin:
-	fence->pin_count--;
+	atomic_dec(&fence->pin_count);
 	return err;
 }
 
@@ -459,7 +460,7 @@ int i915_vma_reserve_fence(struct i915_vma *vma)
 		fence->vma = vma;
 		fence->dirty = true;
 	}
-	fence->pin_count++;
+	atomic_inc(&fence->pin_count);
 	list_move_tail(&fence->link, &fence->i915->mm.fence_list);
 
 	GEM_BUG_ON(!i915_gem_object_is_tiled(vma->obj));
@@ -478,7 +479,7 @@ int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
 	int err;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-	GEM_BUG_ON(fence && !fence->pin_count);
+	GEM_BUG_ON(fence && !atomic_read(&fence->pin_count));
 
 	if (!fence)
 		goto out;
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index ce45972fc5c6..f69d894997fb 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -36,7 +36,7 @@ struct drm_i915_fence_reg {
 	struct list_head link;
 	struct drm_i915_private *i915;
 	struct i915_vma *vma;
-	int pin_count;
+	atomic_t pin_count;
 	int id;
 	/**
 	 * Whether the tiling parameters for the currently
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index f0dc6eaebeab..9d95fdae017c 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -347,8 +347,8 @@ int __must_check i915_vma_put_fence(struct i915_vma *vma);
 
 static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 {
-	GEM_BUG_ON(vma->fence->pin_count <= 0);
-	vma->fence->pin_count--;
+	GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0);
+	atomic_dec(&vma->fence->pin_count);
 }
 
 /**
@@ -362,7 +362,10 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
+	/* The assumption is if the caller has a fence, the caller owns a pin
+	 * on that fence, i.e. that vma->fence cannot become NULL prior to us
+	 * releasing our pin.
+	 */
 	if (vma->fence)
 		__i915_vma_unpin_fence(vma);
 }
-- 
2.13.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915: Pin fence for iomap
  2017-07-03 10:14 [PATCH 1/4] drm/i915: Pin fence for iomap Chris Wilson
                   ` (2 preceding siblings ...)
  2017-07-03 10:14 ` [PATCH 4/4] drm/i915: Make fence->pin_count atomic to allow unlocked unpinning Chris Wilson
@ 2017-07-03 11:09 ` Patchwork
  2017-07-04  9:53 ` [PATCH 1/4] " Tvrtko Ursulin
  4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2017-07-03 11:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915: Pin fence for iomap
URL   : https://patchwork.freedesktop.org/series/26734/
State : success

== Summary ==

Series 26734v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/26734/revisions/1/mbox/

Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-atomic:
                pass       -> FAIL       (fi-snb-2600) fdo#100215
Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-pnv-d510) fdo#101597

fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fdo#101597 https://bugs.freedesktop.org/show_bug.cgi?id=101597

fi-bdw-5557u     total:279  pass:264  dwarn:0   dfail:0   fail:3   skip:11  time:436s
fi-bdw-gvtdvm    total:279  pass:257  dwarn:8   dfail:0   fail:0   skip:14  time:428s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:357s
fi-bsw-n3050     total:279  pass:239  dwarn:0   dfail:0   fail:3   skip:36  time:511s
fi-bxt-j4205     total:279  pass:256  dwarn:0   dfail:0   fail:3   skip:19  time:506s
fi-byt-j1900     total:279  pass:250  dwarn:1   dfail:0   fail:3   skip:24  time:477s
fi-byt-n2820     total:279  pass:246  dwarn:1   dfail:0   fail:3   skip:28  time:469s
fi-glk-2a        total:279  pass:256  dwarn:0   dfail:0   fail:3   skip:19  time:589s
fi-hsw-4770      total:279  pass:259  dwarn:0   dfail:0   fail:3   skip:16  time:428s
fi-hsw-4770r     total:279  pass:259  dwarn:0   dfail:0   fail:3   skip:16  time:409s
fi-ilk-650       total:279  pass:225  dwarn:0   dfail:0   fail:3   skip:50  time:411s
fi-ivb-3520m     total:279  pass:257  dwarn:0   dfail:0   fail:3   skip:18  time:492s
fi-ivb-3770      total:279  pass:257  dwarn:0   dfail:0   fail:3   skip:18  time:470s
fi-kbl-7500u     total:279  pass:257  dwarn:0   dfail:0   fail:3   skip:18  time:455s
fi-kbl-7560u     total:279  pass:264  dwarn:1   dfail:0   fail:3   skip:10  time:557s
fi-kbl-r         total:279  pass:256  dwarn:1   dfail:0   fail:3   skip:18  time:565s
fi-pnv-d510      total:279  pass:222  dwarn:2   dfail:0   fail:0   skip:55  time:557s
fi-skl-6260u     total:279  pass:265  dwarn:0   dfail:0   fail:3   skip:10  time:449s
fi-skl-6700hq    total:279  pass:219  dwarn:1   dfail:0   fail:33  skip:24  time:305s
fi-skl-6700k     total:279  pass:257  dwarn:0   dfail:0   fail:3   skip:18  time:464s
fi-skl-6770hq    total:279  pass:265  dwarn:0   dfail:0   fail:3   skip:10  time:469s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:434s
fi-snb-2520m     total:279  pass:247  dwarn:0   dfail:0   fail:3   skip:28  time:534s
fi-snb-2600      total:279  pass:245  dwarn:0   dfail:0   fail:4   skip:29  time:411s

f74d8b31009401f5c871870ada49230a3ee12f66 drm-tip: 2017y-07m-03d-10h-03m-28s UTC integration manifest
8860b82 drm/i915: Make fence->pin_count atomic to allow unlocked unpinning
2cefec5 drm/i915: Emit pipelined fence changes
d1de12d drm/i915: Consolidate get_fence with pin_fence
93b2a8a drm/i915: Pin fence for iomap

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_5088/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Pin fence for iomap
  2017-07-03 10:14 [PATCH 1/4] drm/i915: Pin fence for iomap Chris Wilson
                   ` (3 preceding siblings ...)
  2017-07-03 11:09 ` ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915: Pin fence for iomap Patchwork
@ 2017-07-04  9:53 ` Tvrtko Ursulin
  2017-07-04 10:04   ` Chris Wilson
  4 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2017-07-04  9:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/07/2017 11:14, Chris Wilson wrote:
> We probably want for the caller to specify whether the fence should be
> pinned for their usage, but at the moment all callers do want the
> associated fence, or none, so take it on their behalf.
Wouldn't this be wasting fences for ring buffers?

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_vma.c                  | 19 +++++++++++++++++++
>   drivers/gpu/drm/i915/i915_vma.h                  |  7 +------
>   drivers/gpu/drm/i915/selftests/i915_gem_object.c |  8 --------
>   3 files changed, 20 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 958be0a95960..d7330d3eeab6 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -278,6 +278,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>   void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   {
>   	void __iomem *ptr;
> +	int ret;
>   
>   	/* Access through the GTT requires the device to be awake. */
>   	assert_rpm_wakelock_held(vma->vm->i915);
> @@ -301,9 +302,27 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   	}
>   
>   	__i915_vma_pin(vma);
> +
> +	ret = i915_vma_get_fence(vma);
> +	if (ret) {
> +		__i915_vma_unpin(vma);
> +		return IO_ERR_PTR(ret);
> +	}
> +	i915_vma_pin_fence(vma);
> +
>   	return ptr;
>   }
>   
> +void i915_vma_unpin_iomap(struct i915_vma *vma)
> +{
> +	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
> +
> +	GEM_BUG_ON(vma->iomap == NULL);
> +
> +	i915_vma_unpin_fence(vma);
> +	i915_vma_unpin(vma);
> +}
> +
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma)
>   {
>   	struct i915_vma *vma;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 4a673fc1a432..15e86e50a825 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -319,12 +319,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
>    * Callers must hold the struct_mutex. This function is only valid to be
>    * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
>    */
> -static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
> -{
> -	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
> -	GEM_BUG_ON(vma->iomap == NULL);
> -	i915_vma_unpin(vma);
> -}
> +void i915_vma_unpin_iomap(struct i915_vma *vma);
>   
>   static inline struct page *i915_vma_first_page(struct i915_vma *vma)
>   {
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 8f011c447e41..1b8774a42e48 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -251,14 +251,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
>   			return PTR_ERR(io);
>   		}
>   
> -		err = i915_vma_get_fence(vma);
> -		if (err) {
> -			pr_err("Failed to get fence for partial view: offset=%lu\n",
> -			       page);
> -			i915_vma_unpin_iomap(vma);
> -			return err;
> -		}
> -
>   		iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
>   		i915_vma_unpin_iomap(vma);
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Pin fence for iomap
  2017-07-04  9:53 ` [PATCH 1/4] " Tvrtko Ursulin
@ 2017-07-04 10:04   ` Chris Wilson
  2017-07-04 11:19     ` Tvrtko Ursulin
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-07-04 10:04 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2017-07-04 10:53:53)
> 
> On 03/07/2017 11:14, Chris Wilson wrote:
> > We probably want for the caller to specify whether the fence should be
> > pinned for their usage, but at the moment all callers do want the
> > associated fence, or none, so take it on their behalf.
> Wouldn't this be wasting fences for ring buffers?

No. We don't claim a fence unless used (the or none clause above). If
the object is untiled, like ringbuffers, we don't assign a fence. The
problem I'm thinking about for the future is having an iomap cached for
fenced and unfenced access, i.e. we need to track the type of iomap
similarly to regular vmaps.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Pin fence for iomap
  2017-07-04 10:04   ` Chris Wilson
@ 2017-07-04 11:19     ` Tvrtko Ursulin
  2017-07-04 11:25       ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2017-07-04 11:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/07/2017 11:04, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-07-04 10:53:53)
>>
>> On 03/07/2017 11:14, Chris Wilson wrote:
>>> We probably want for the caller to specify whether the fence should be
>>> pinned for their usage, but at the moment all callers do want the
>>> associated fence, or none, so take it on their behalf.
>> Wouldn't this be wasting fences for ring buffers?
> 
> No. We don't claim a fence unless used (the or none clause above). If
> the object is untiled, like ringbuffers, we don't assign a fence. The
> problem I'm thinking about for the future is having an iomap cached for
> fenced and unfenced access, i.e. we need to track the type of iomap
> similarly to regular vmaps.

I missed that i915_vma_get_fence only sets one up for tiled objects. 
Code looks fine to me then, but the commit message reads like the main 
paragraph is missing. With that improved a bit:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

But for the series as a whole - what is the cover story? What does it do 
on the high level? Less waiting in eb path, and instead sets up awaits? 
Fences are still reserved in advance so when it is runnable it is 
guaranteed which one it is getting?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Pin fence for iomap
  2017-07-04 11:19     ` Tvrtko Ursulin
@ 2017-07-04 11:25       ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-04 11:25 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2017-07-04 12:19:17)
> 
> On 04/07/2017 11:04, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2017-07-04 10:53:53)
> >>
> >> On 03/07/2017 11:14, Chris Wilson wrote:
> >>> We probably want for the caller to specify whether the fence should be
> >>> pinned for their usage, but at the moment all callers do want the
> >>> associated fence, or none, so take it on their behalf.
> >> Wouldn't this be wasting fences for ring buffers?
> > 
> > No. We don't claim a fence unless used (the or none clause above). If
> > the object is untiled, like ringbuffers, we don't assign a fence. The
> > problem I'm thinking about for the future is having an iomap cached for
> > fenced and unfenced access, i.e. we need to track the type of iomap
> > similarly to regular vmaps.
> 
> I missed that i915_vma_get_fence only sets one up for tiled objects. 
> Code looks fine to me then, but the commit message reads like the main 
> paragraph is missing. With that improved a bit:
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> But for the series as a whole - what is the cover story?

Just working towards de-struct_mutexing the display paths.

> What does it do 
> on the high level? Less waiting in eb path, and instead sets up awaits? 

Yes, that's the merit of pipelining the fence changes.

> Fences are still reserved in advance so when it is runnable it is 
> guaranteed which one it is getting?

Yes.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] drm/i915: Consolidate get_fence with pin_fence
  2017-07-03 10:14 ` [PATCH 2/4] drm/i915: Consolidate get_fence with pin_fence Chris Wilson
@ 2017-07-04 14:47   ` Tvrtko Ursulin
  0 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2017-07-04 14:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/07/2017 11:14, Chris Wilson wrote:
> Following the pattern now used for obj->mm.pages, use just pin_fence and
> unpin_fence to control access to the fence registers. I.e. instead of
> calling get_fence(); pin_fence(), we now just need to call pin_fence().
> This will make it easier to reduce the locking requirements around
> fence registers.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            |  4 ----
>   drivers/gpu/drm/i915/i915_gem.c            |  3 ++-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++-----
>   drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 27 ++++++++++++++++++++++-----
>   drivers/gpu/drm/i915/i915_vma.c            |  3 +--
>   drivers/gpu/drm/i915/i915_vma.h            | 20 ++++++++------------
>   drivers/gpu/drm/i915/intel_display.c       |  3 +--
>   7 files changed, 39 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0029bb949e90..b08b56a2e680 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3545,10 +3545,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
>   	return container_of(vm, struct i915_hw_ppgtt, base);
>   }
>   
> -/* i915_gem_fence_reg.c */
> -int __must_check i915_vma_get_fence(struct i915_vma *vma);
> -int __must_check i915_vma_put_fence(struct i915_vma *vma);
> -
>   void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
>   void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1b2dfa8bdeef..939a299260e9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1941,7 +1941,7 @@ int i915_gem_fault(struct vm_fault *vmf)
>   	if (ret)
>   		goto err_unpin;
>   
> -	ret = i915_vma_get_fence(vma);
> +	ret = i915_vma_pin_fence(vma);

Hm, I think fence stealing stops working with this change?

Regards,

Tvrtko

>   	if (ret)
>   		goto err_unpin;
>   
> @@ -1957,6 +1957,7 @@ int i915_gem_fault(struct vm_fault *vmf)
>   			       min_t(u64, vma->size, area->vm_end - area->vm_start),
>   			       &ggtt->mappable);
>   
> +	i915_vma_unpin_fence(vma);
>   err_unpin:
>   	__i915_vma_unpin(vma);
>   err_unlock:
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 929f275e67aa..22a9f5358322 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -365,12 +365,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
>   		return;
>   
>   	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> -		if (unlikely(i915_vma_get_fence(vma))) {
> +		if (unlikely(i915_vma_pin_fence(vma))) {
>   			i915_vma_unpin(vma);
>   			return;
>   		}
>   
> -		if (i915_vma_pin_fence(vma))
> +		if (vma->fence)
>   			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
>   	}
>   
> @@ -384,7 +384,7 @@ __eb_unreserve_vma(struct i915_vma *vma,
>   	GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
>   
>   	if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
> -		i915_vma_unpin_fence(vma);
> +		__i915_vma_unpin_fence(vma);
>   
>   	__i915_vma_unpin(vma);
>   }
> @@ -564,13 +564,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
>   	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
>   
>   	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> -		err = i915_vma_get_fence(vma);
> +		err = i915_vma_pin_fence(vma);
>   		if (unlikely(err)) {
>   			i915_vma_unpin(vma);
>   			return err;
>   		}
>   
> -		if (i915_vma_pin_fence(vma))
> +		if (vma->fence)
>   			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 5fe2cd8c8f28..55ac7bc14fce 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -280,8 +280,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>    *
>    * 0 on success, negative error code on failure.
>    */
> -int
> -i915_vma_put_fence(struct i915_vma *vma)
> +int i915_vma_put_fence(struct i915_vma *vma)
>   {
>   	struct drm_i915_fence_reg *fence = vma->fence;
>   
> @@ -299,6 +298,8 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
>   	struct drm_i915_fence_reg *fence;
>   
>   	list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
> +		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
> +
>   		if (fence->pin_count)
>   			continue;
>   
> @@ -313,7 +314,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
>   }
>   
>   /**
> - * i915_vma_get_fence - set up fencing for a vma
> + * i915_vma_pin_fence - set up fencing for a vma
>    * @vma: vma to map through a fence reg
>    *
>    * When mapping objects through the GTT, userspace wants to be able to write
> @@ -331,10 +332,11 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
>    * 0 on success, negative error code on failure.
>    */
>   int
> -i915_vma_get_fence(struct i915_vma *vma)
> +i915_vma_pin_fence(struct i915_vma *vma)
>   {
>   	struct drm_i915_fence_reg *fence;
>   	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
> +	int err;
>   
>   	/* Note that we revoke fences on runtime suspend. Therefore the user
>   	 * must keep the device awake whilst using the fence.
> @@ -344,6 +346,8 @@ i915_vma_get_fence(struct i915_vma *vma)
>   	/* Just update our place in the LRU if our fence is getting reused. */
>   	if (vma->fence) {
>   		fence = vma->fence;
> +		GEM_BUG_ON(fence->vma != vma);
> +		fence->pin_count++;
>   		if (!fence->dirty) {
>   			list_move_tail(&fence->link,
>   				       &fence->i915->mm.fence_list);
> @@ -353,10 +357,19 @@ i915_vma_get_fence(struct i915_vma *vma)
>   		fence = fence_find(vma->vm->i915);
>   		if (IS_ERR(fence))
>   			return PTR_ERR(fence);
> +		fence->pin_count++;
>   	} else
>   		return 0;
>   
> -	return fence_update(fence, set);
> +	err = fence_update(fence, set);
> +	if (err)
> +		goto err_unpin;
> +
> +	return 0;
> +
> +err_unpin:
> +	fence->pin_count--;
> +	return err;
>   }
>   
>   /**
> @@ -378,6 +391,8 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
>   	for (i = 0; i < dev_priv->num_fence_regs; i++) {
>   		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
>   
> +		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
> +
>   		if (fence->vma)
>   			i915_gem_release_mmap(fence->vma->obj);
>   	}
> @@ -399,6 +414,8 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
>   		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
>   		struct i915_vma *vma = reg->vma;
>   
> +		GEM_BUG_ON(vma && vma->fence != reg);
> +
>   		/*
>   		 * Commit delayed tiling changes if we have an object still
>   		 * attached to the fence, otherwise just clear the fence.
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d7330d3eeab6..efbfee8eac99 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -303,12 +303,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   
>   	__i915_vma_pin(vma);
>   
> -	ret = i915_vma_get_fence(vma);
> +	ret = i915_vma_pin_fence(vma);
>   	if (ret) {
>   		__i915_vma_unpin(vma);
>   		return IO_ERR_PTR(ret);
>   	}
> -	i915_vma_pin_fence(vma);
>   
>   	return ptr;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 15e86e50a825..19f58af4f1bf 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -342,15 +342,13 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
>    *
>    * True if the vma has a fence, false otherwise.
>    */
> -static inline bool
> -i915_vma_pin_fence(struct i915_vma *vma)
> +int i915_vma_pin_fence(struct i915_vma *vma);
> +int __must_check i915_vma_put_fence(struct i915_vma *vma);
> +
> +static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
>   {
> -	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
> -	if (vma->fence) {
> -		vma->fence->pin_count++;
> -		return true;
> -	} else
> -		return false;
> +	GEM_BUG_ON(vma->fence->pin_count <= 0);
> +	vma->fence->pin_count--;
>   }
>   
>   /**
> @@ -365,10 +363,8 @@ static inline void
>   i915_vma_unpin_fence(struct i915_vma *vma)
>   {
>   	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
> -	if (vma->fence) {
> -		GEM_BUG_ON(vma->fence->pin_count <= 0);
> -		vma->fence->pin_count--;
> -	}
> +	if (vma->fence)
> +		__i915_vma_unpin_fence(vma);
>   }
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 4e03ca6c946f..432bbaf460b4 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2183,8 +2183,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>   		 * something and try to run the system in a "less than optimal"
>   		 * mode that matches the user configuration.
>   		 */
> -		if (i915_vma_get_fence(vma) == 0)
> -			i915_vma_pin_fence(vma);
> +		i915_vma_pin_fence(vma);
>   	}
>   
>   	i915_vma_get(vma);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] drm/i915: Emit pipelined fence changes
  2017-07-03 10:14 ` [PATCH 3/4] drm/i915: Emit pipelined fence changes Chris Wilson
@ 2017-07-05 10:19   ` Tvrtko Ursulin
  2017-07-06 11:35     ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2017-07-05 10:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/07/2017 11:14, Chris Wilson wrote:
> Many years ago, long before requests, we tried doing this. We never
> quite got it right, but now with requests we have the tracking to do the
> job properly!

Add a few words on the benefits in certain use cases/benchmarks.

> 
> One of the stall points for gen2/gen3 is the use of fence registers for
> GPU operations. There are only a few available, and currently if we
> exhaust the available fence register we must stall the GPU between
> batches. By pipelining the fence register writes, we can avoid the stall
> and continuously emit the batches. The challenge is remembering to wait
> for those pipelined LRI before accessing the fence with the CPU, and
> that is what our request tracking makes easy.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c            |   1 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  15 ++-
>   drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 193 +++++++++++++++++++++++++----
>   drivers/gpu/drm/i915/i915_gem_fence_reg.h  |   1 +
>   drivers/gpu/drm/i915/i915_vma.c            |   1 +
>   drivers/gpu/drm/i915/i915_vma.h            |   4 +
>   6 files changed, 186 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 939a299260e9..5318e321ce50 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4880,6 +4880,7 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
>   		fence->i915 = dev_priv;
>   		fence->id = i;
>   		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
> +		init_request_active(&fence->pipelined, NULL);
>   	}
>   	i915_gem_restore_fences(dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 22a9f5358322..19347ad0e7ac 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -365,11 +365,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
>   		return;
>   
>   	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> -		if (unlikely(i915_vma_pin_fence(vma))) {
> +		if (unlikely(i915_vma_reserve_fence(vma))) {
>   			i915_vma_unpin(vma);
>   			return;
>   		}
>   
> +		entry->flags &= ~EXEC_OBJECT_ASYNC;
>   		if (vma->fence)
>   			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
>   	}
> @@ -564,12 +565,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
>   	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
>   
>   	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> -		err = i915_vma_pin_fence(vma);
> +		err = i915_vma_reserve_fence(vma);
>   		if (unlikely(err)) {
>   			i915_vma_unpin(vma);
>   			return err;
>   		}
>   
> +		entry->flags &= ~EXEC_OBJECT_ASYNC;

A comment here would be good.

>   		if (vma->fence)
>   			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
>   	}
> @@ -1848,6 +1850,12 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   		if (unlikely(obj->cache_dirty && !obj->cache_coherent))
>   			i915_gem_clflush_object(obj, 0);
>   
> +		if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> +			err = i915_vma_emit_pipelined_fence(vma, eb->request);
> +			if (err)
> +				return err;
> +		}
> +
>   		err = i915_gem_request_await_object
>   			(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
>   		if (err)
> @@ -1932,9 +1940,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   		obj->base.read_domains = 0;
>   	}
>   	obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
> -
> -	if (flags & EXEC_OBJECT_NEEDS_FENCE)
> -		i915_gem_active_set(&vma->last_fence, req);
>   }
>   
>   static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 55ac7bc14fce..d0cd051c19fd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -55,10 +55,9 @@
>    * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
>    */
>   
> -#define pipelined 0
> -
> -static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> -				 struct i915_vma *vma)
> +static int i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> +				struct i915_vma *vma,
> +				struct drm_i915_gem_request *pipelined)
>   {
>   	i915_reg_t fence_reg_lo, fence_reg_hi;
>   	int fence_pitch_shift;
> @@ -110,11 +109,30 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
>   		I915_WRITE(fence_reg_hi, upper_32_bits(val));
>   		I915_WRITE(fence_reg_lo, lower_32_bits(val));
>   		POSTING_READ(fence_reg_lo);
> +	} else {
> +		u32 *cs;
> +
> +		cs = intel_ring_begin(pipelined, 8);

Do we need to adjust the ring space reservation amount?

> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		*cs++ = MI_LOAD_REGISTER_IMM(3);
> +		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
> +		*cs++ = 0;
> +		*cs++ = i915_mmio_reg_offset(fence_reg_hi);
> +		*cs++ = upper_32_bits(val);
> +		*cs++ = i915_mmio_reg_offset(fence_reg_lo);
> +		*cs++ = lower_32_bits(val);
> +		*cs++ = MI_NOOP;
> +		intel_ring_advance(pipelined, cs);
>   	}
> +
> +	return 0;
>   }
>   
> -static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
> -				 struct i915_vma *vma)
> +static int i915_write_fence_reg(struct drm_i915_fence_reg *fence,
> +				struct i915_vma *vma,
> +				struct drm_i915_gem_request *pipelined)
>   {
>   	u32 val;
>   
> @@ -150,11 +168,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
>   
>   		I915_WRITE(reg, val);
>   		POSTING_READ(reg);
> +	} else {
> +		u32 *cs;
> +
> +		cs = intel_ring_begin(pipelined, 4);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		*cs++ = MI_LOAD_REGISTER_IMM(1);
> +		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
> +		*cs++ = val;
> +		*cs++ = MI_NOOP;
> +		intel_ring_advance(pipelined, cs);
>   	}
> +
> +	return 0;
>   }
>   
> -static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
> -				 struct i915_vma *vma)
> +static int i830_write_fence_reg(struct drm_i915_fence_reg *fence,
> +				struct i915_vma *vma,
> +				struct drm_i915_gem_request *pipelined)
>   {
>   	u32 val;
>   
> @@ -182,29 +215,49 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
>   
>   		I915_WRITE(reg, val);
>   		POSTING_READ(reg);
> +	} else {
> +		u32 *cs;
> +
> +		cs = intel_ring_begin(pipelined, 4);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		*cs++ = MI_LOAD_REGISTER_IMM(1);
> +		*cs++ = i915_mmio_reg_offset(FENCE_REG(fence->id));
> +		*cs++ = val;
> +		*cs++ = MI_NOOP;
> +		intel_ring_advance(pipelined, cs);
>   	}
> +
> +	return 0;
>   }
>   
> -static void fence_write(struct drm_i915_fence_reg *fence,
> -			struct i915_vma *vma)
> +static int fence_write(struct drm_i915_fence_reg *fence,
> +		       struct i915_vma *vma,
> +		       struct drm_i915_gem_request *rq)
>   {
> +	int err;
> +
>   	/* Previous access through the fence register is marshalled by
>   	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
>   	 * and explicitly managed for internal users.
>   	 */
>   
>   	if (IS_GEN2(fence->i915))
> -		i830_write_fence_reg(fence, vma);
> +		err = i830_write_fence_reg(fence, vma, rq);
>   	else if (IS_GEN3(fence->i915))
> -		i915_write_fence_reg(fence, vma);
> +		err = i915_write_fence_reg(fence, vma, rq);
>   	else
> -		i965_write_fence_reg(fence, vma);
> +		err = i965_write_fence_reg(fence, vma, rq);
> +	if (err)
> +		return err;
>   
>   	/* Access through the fenced region afterwards is
>   	 * ordered by the posting reads whilst writing the registers.
>   	 */
>   
>   	fence->dirty = false;
> +	return 0;
>   }
>   
>   static int fence_update(struct drm_i915_fence_reg *fence,
> @@ -212,17 +265,15 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>   {
>   	int ret;
>   
> +	ret = i915_gem_active_retire(&fence->pipelined,
> +				     &fence->i915->drm.struct_mutex) > +	if (ret)
> +		return ret;
> +
>   	if (vma) {
>   		if (!i915_vma_is_map_and_fenceable(vma))
>   			return -EINVAL;
>   
> -		if (WARN(!i915_gem_object_get_stride(vma->obj) ||
> -			 !i915_gem_object_get_tiling(vma->obj),
> -			 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
> -			 i915_gem_object_get_stride(vma->obj),
> -			 i915_gem_object_get_tiling(vma->obj)))
> -			return -EINVAL;
> -
>   		ret = i915_gem_active_retire(&vma->last_fence,
>   					     &vma->obj->base.dev->struct_mutex);
>   		if (ret)
> @@ -253,7 +304,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>   	 * to the runtime resume, see i915_gem_restore_fences().
>   	 */
>   	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
> -		fence_write(fence, vma);
> +		fence_write(fence, vma, NULL);
>   		intel_runtime_pm_put(fence->i915);
>   	}
>   
> @@ -287,6 +338,8 @@ int i915_vma_put_fence(struct i915_vma *vma)
>   	if (!fence)
>   		return 0;
>   
> +	GEM_BUG_ON(fence->vma != vma);
> +
>   	if (fence->pin_count)
>   		return -EBUSY;
>   
> @@ -344,11 +397,16 @@ i915_vma_pin_fence(struct i915_vma *vma)
>   	assert_rpm_wakelock_held(vma->vm->i915);
>   
>   	/* Just update our place in the LRU if our fence is getting reused. */
> -	if (vma->fence) {
> -		fence = vma->fence;
> +	fence = vma->fence;
> +	if (fence) {
>   		GEM_BUG_ON(fence->vma != vma);
>   		fence->pin_count++;
>   		if (!fence->dirty) {
> +			err = i915_gem_active_retire(&fence->pipelined,
> +						     &fence->i915->drm.struct_mutex);
> +			if (err)
> +				goto err_unpin;

Comment explaining the need for this block. There is one CPU wait on the 
fence_update path already. At least I couldn't easily figure it out. And 
why also in the !dirty case specifically?

> +
>   			list_move_tail(&fence->link,
>   				       &fence->i915->mm.fence_list);
>   			return 0;
> @@ -372,6 +430,93 @@ i915_vma_pin_fence(struct i915_vma *vma)
>   	return err;
>   }
>   
> +int i915_vma_reserve_fence(struct i915_vma *vma)
> +{
> +	struct drm_i915_fence_reg *fence;
> +
> +	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> +	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> +	GEM_BUG_ON(!i915_vma_is_pinned(vma));
> +
> +	fence = vma->fence;
> +	if (!fence) {
> +		if (!i915_gem_object_is_tiled(vma->obj))
> +			return 0;
> +
> +		if (!i915_vma_is_map_and_fenceable(vma))
> +			return -EINVAL;
> +
> +		fence = fence_find(vma->vm->i915);
> +		if (IS_ERR(fence))
> +			return PTR_ERR(fence);

If all fences are pinned via i915_gem_fault which I thought can happen 
with the previous patch then here we error out instead of waiting?

> +
> +		vma->fence = fence;
> +
> +		if (fence->vma) {
> +			i915_gem_release_mmap(fence->vma->obj);
> +			fence->vma->fence = NULL;
> +		}
> +		fence->vma = vma;
> +		fence->dirty = true;
> +	}
> +	fence->pin_count++;
> +	list_move_tail(&fence->link, &fence->i915->mm.fence_list);
> +
> +	GEM_BUG_ON(!i915_gem_object_is_tiled(vma->obj));
> +	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
> +	GEM_BUG_ON(vma->node.size != vma->fence_size);
> +	GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_alignment));
> +
> +	return 0;
> +}
> +
> +int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
> +				  struct drm_i915_gem_request *rq)
> +{
> +	struct drm_i915_fence_reg *fence = vma->fence;
> +	struct drm_i915_gem_request *prev;
> +	int err;
> +
> +	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> +	GEM_BUG_ON(fence && !fence->pin_count);
> +
> +	if (!fence)
> +		goto out;
> +
> +	prev = i915_gem_active_raw(&fence->pipelined,
> +				   &fence->i915->drm.struct_mutex);
> +	if (prev) {
> +		err = i915_gem_request_await_dma_fence(rq, &prev->fence);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (!fence->dirty)
> +		goto out;

Hm so unless "dirty" no actual fence management will happen. What is the 
meaning of dirty? Has it changed? Does it needs a write up in the header 
file?

Looks like in any case I will need to apply this to follow the flow.

Regards,

Tvrtko

> +
> +	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
> +
> +	if (fence->vma) {
> +		prev = i915_gem_active_raw(&fence->vma->last_fence,
> +					   &fence->i915->drm.struct_mutex);
> +		if (prev) {
> +			err = i915_gem_request_await_dma_fence(rq,
> +							       &prev->fence);
> +			if (err)
> +				return err;
> +		}
> +	}
> +
> +	err = fence_write(fence, vma, rq);
> +	if (err)
> +		return err;
> +
> +	i915_gem_active_set(&fence->pipelined, rq);
> +out:
> +	i915_gem_active_set(&vma->last_fence, rq);
> +	return 0;
> +}
> +
>   /**
>    * i915_gem_revoke_fences - revoke fence state
>    * @dev_priv: i915 device private
> @@ -429,7 +574,7 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
>   			vma = NULL;
>   		}
>   
> -		fence_write(reg, vma);
> +		fence_write(reg, vma, NULL);
>   		reg->vma = vma;
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> index 99a31ded4dfd..ce45972fc5c6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> @@ -47,6 +47,7 @@ struct drm_i915_fence_reg {
>   	 * command (such as BLT on gen2/3), as a "fence".
>   	 */
>   	bool dirty;
> +	struct i915_gem_active pipelined;
>   };
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index efbfee8eac99..0c489090d4ab 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -728,6 +728,7 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		__i915_vma_iounmap(vma);
>   		vma->flags &= ~I915_VMA_CAN_FENCE;
>   	}
> +	GEM_BUG_ON(vma->fence);
>   
>   	if (likely(!vma->vm->closed)) {
>   		trace_i915_vma_unbind(vma);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 19f58af4f1bf..f0dc6eaebeab 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -367,5 +367,9 @@ i915_vma_unpin_fence(struct i915_vma *vma)
>   		__i915_vma_unpin_fence(vma);
>   }
>   
> +int __must_check i915_vma_reserve_fence(struct i915_vma *vma);
> +int i915_vma_emit_pipelined_fence(struct i915_vma *vma,
> +				  struct drm_i915_gem_request *rq);
> +
>   #endif
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] drm/i915: Emit pipelined fence changes
  2017-07-05 10:19   ` Tvrtko Ursulin
@ 2017-07-06 11:35     ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-07-06 11:35 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2017-07-05 11:19:43)
> 
> On 03/07/2017 11:14, Chris Wilson wrote:
> > Many years ago, long before requests, we tried doing this. We never
> > quite got it right, but now with requests we have the tracking to do the
> > job properly!
> 
> Add a few words on the benefits in certain use cases/benchmarks.

A few percent in padman on Pineview. One presumes that it would have a
better impact on minimal framerates than average but I didn't check.

> > One of the stall points for gen2/gen3 is the use of fence registers for
> > GPU operations. There are only a few available, and currently if we
> > exhaust the available fence register we must stall the GPU between
> > batches. By pipelining the fence register writes, we can avoid the stall
> > and continuously emit the batches. The challenge is remembering to wait
> > for those pipelined LRI before accessing the fence with the CPU, and
> > that is what our request tracking makes easy.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c            |   1 +
> >   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  15 ++-
> >   drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 193 +++++++++++++++++++++++++----
> >   drivers/gpu/drm/i915/i915_gem_fence_reg.h  |   1 +
> >   drivers/gpu/drm/i915/i915_vma.c            |   1 +
> >   drivers/gpu/drm/i915/i915_vma.h            |   4 +
> >   6 files changed, 186 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 939a299260e9..5318e321ce50 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4880,6 +4880,7 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
> >               fence->i915 = dev_priv;
> >               fence->id = i;
> >               list_add_tail(&fence->link, &dev_priv->mm.fence_list);
> > +             init_request_active(&fence->pipelined, NULL);
> >       }
> >       i915_gem_restore_fences(dev_priv);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 22a9f5358322..19347ad0e7ac 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -365,11 +365,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
> >               return;
> >   
> >       if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> > -             if (unlikely(i915_vma_pin_fence(vma))) {
> > +             if (unlikely(i915_vma_reserve_fence(vma))) {
> >                       i915_vma_unpin(vma);
> >                       return;
> >               }
> >   
> > +             entry->flags &= ~EXEC_OBJECT_ASYNC;
> >               if (vma->fence)
> >                       entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> >       }
> > @@ -564,12 +565,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
> >       GEM_BUG_ON(eb_vma_misplaced(entry, vma));
> >   
> >       if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> > -             err = i915_vma_pin_fence(vma);
> > +             err = i915_vma_reserve_fence(vma);
> >               if (unlikely(err)) {
> >                       i915_vma_unpin(vma);
> >                       return err;
> >               }
> >   
> > +             entry->flags &= ~EXEC_OBJECT_ASYNC;
> 
> A comment here would be good.

We rely on the serialisation between LRI, that the user has no knowledge
over and so we have to ignore any request to skip the required LRI.

> 
> >               if (vma->fence)
> >                       entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> >       }
> > @@ -1848,6 +1850,12 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
> >               if (unlikely(obj->cache_dirty && !obj->cache_coherent))
> >                       i915_gem_clflush_object(obj, 0);
> >   
> > +             if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> > +                     err = i915_vma_emit_pipelined_fence(vma, eb->request);
> > +                     if (err)
> > +                             return err;
> > +             }
> > +
> >               err = i915_gem_request_await_object
> >                       (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
> >               if (err)
> > @@ -1932,9 +1940,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
> >               obj->base.read_domains = 0;
> >       }
> >       obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
> > -
> > -     if (flags & EXEC_OBJECT_NEEDS_FENCE)
> > -             i915_gem_active_set(&vma->last_fence, req);
> >   }
> >   
> >   static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
> > diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> > index 55ac7bc14fce..d0cd051c19fd 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> > @@ -55,10 +55,9 @@
> >    * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
> >    */
> >   
> > -#define pipelined 0
> > -
> > -static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> > -                              struct i915_vma *vma)
> > +static int i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> > +                             struct i915_vma *vma,
> > +                             struct drm_i915_gem_request *pipelined)
> >   {
> >       i915_reg_t fence_reg_lo, fence_reg_hi;
> >       int fence_pitch_shift;
> > @@ -110,11 +109,30 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> >               I915_WRITE(fence_reg_hi, upper_32_bits(val));
> >               I915_WRITE(fence_reg_lo, lower_32_bits(val));
> >               POSTING_READ(fence_reg_lo);
> > +     } else {
> > +             u32 *cs;
> > +
> > +             cs = intel_ring_begin(pipelined, 8);
> 
> Do we need to adjust the ring space reservation amount?

No. We don't reset the registers at the end of a request, but either
emit a new request or serialise with mmio from the CPU.

> > @@ -344,11 +397,16 @@ i915_vma_pin_fence(struct i915_vma *vma)
> >       assert_rpm_wakelock_held(vma->vm->i915);
> >   
> >       /* Just update our place in the LRU if our fence is getting reused. */
> > -     if (vma->fence) {
> > -             fence = vma->fence;
> > +     fence = vma->fence;
> > +     if (fence) {
> >               GEM_BUG_ON(fence->vma != vma);
> >               fence->pin_count++;
> >               if (!fence->dirty) {
> > +                     err = i915_gem_active_retire(&fence->pipelined,
> > +                                                  &fence->i915->drm.struct_mutex);
> > +                     if (err)
> > +                             goto err_unpin;
> 
> Comment explaining the need for this block. There is one CPU wait on the 
> fence_update path already. At least I couldn't easily figure it out. And 
> why also in the !dirty case specifically?

!dirty is a shortcut, but here we only need to wait for the LRI (to set
the fence register) itself and not for the subsequent usage of the fence.

It is strictly an earlier point than the fence->vma->last_fence request.
(Hmm, strictly is a bit of an exaggeration, it does assume single
timeline gen3.)

> > +int i915_vma_reserve_fence(struct i915_vma *vma)
> > +{
> > +     struct drm_i915_fence_reg *fence;
> > +
> > +     lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> > +     GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> > +     GEM_BUG_ON(!i915_vma_is_pinned(vma));
> > +
> > +     fence = vma->fence;
> > +     if (!fence) {
> > +             if (!i915_gem_object_is_tiled(vma->obj))
> > +                     return 0;
> > +
> > +             if (!i915_vma_is_map_and_fenceable(vma))
> > +                     return -EINVAL;
> > +
> > +             fence = fence_find(vma->vm->i915);
> > +             if (IS_ERR(fence))
> > +                     return PTR_ERR(fence);
> 
> If all fences are pinned via i915_gem_fault which I thought can happen 
> with the previous patch then here we error out instead of waiting?

i915_gem_fault doesn't pin a page except for the duration it is
inserting the CPU PTE. It is expected that we do hit an error here
(EDEADLK) for exhaustion, a single batch using more than is available.
Currently, atomic kms has a huge regression in that it doesn't
report the fences that it is only temporarily holding.

> > +
> > +     if (!fence->dirty)
> > +             goto out;
> 
> Hm so unless "dirty" no actual fence management will happen. What is the 
> meaning of dirty? Has it changed? Does it needs a write up in the header 
> file?

It means the register no longer matches expectation and we need to
update it before use.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-07-06 11:35 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-03 10:14 [PATCH 1/4] drm/i915: Pin fence for iomap Chris Wilson
2017-07-03 10:14 ` [PATCH 2/4] drm/i915: Consolidate get_fence with pin_fence Chris Wilson
2017-07-04 14:47   ` Tvrtko Ursulin
2017-07-03 10:14 ` [PATCH 3/4] drm/i915: Emit pipelined fence changes Chris Wilson
2017-07-05 10:19   ` Tvrtko Ursulin
2017-07-06 11:35     ` Chris Wilson
2017-07-03 10:14 ` [PATCH 4/4] drm/i915: Make fence->pin_count atomic to allow unlocked unpinning Chris Wilson
2017-07-03 11:09 ` ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915: Pin fence for iomap Patchwork
2017-07-04  9:53 ` [PATCH 1/4] " Tvrtko Ursulin
2017-07-04 10:04   ` Chris Wilson
2017-07-04 11:19     ` Tvrtko Ursulin
2017-07-04 11:25       ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.