All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence
@ 2012-03-22 15:10 Chris Wilson
  2012-03-22 15:10 ` [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Chris Wilson @ 2012-03-22 15:10 UTC (permalink / raw)
  To: intel-gfx

By simplifying the rules to calling get_fence when writing to the
through the GTT in a tiled manner, and calling put_fence before writing
to the object through the GTT in a linear manner, the code becomes
clearer and there is less chance of making a mistake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    6 ++++--
 drivers/gpu/drm/i915/i915_gem.c            |   14 +++++++-------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   15 +++++----------
 drivers/gpu/drm/i915/intel_display.c       |   10 ++++------
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0f91e22..f8f46d7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1233,13 +1233,15 @@ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 					   struct intel_ring_buffer *pipelined);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
-static inline void
+static inline bool
 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
 {
 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
 		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 		dev_priv->fence_regs[obj->fence_reg].pin_count++;
-	}
+		return true;
+	} else
+		return false;
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0918f02..a8f758f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1185,10 +1185,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 			goto unlock;
 	}
 
-	if (obj->tiling_mode == I915_TILING_NONE)
-		ret = i915_gem_object_put_fence(obj);
-	else
-		ret = i915_gem_object_get_fence(obj, NULL);
+	ret = i915_gem_object_get_fence(obj, NULL);
 	if (ret)
 		goto unlock;
 
@@ -2591,19 +2588,19 @@ i915_find_fence_reg(struct drm_device *dev,
 }
 
 /**
- * i915_gem_object_get_fence - set up a fence reg for an object
+ * i915_gem_object_get_fence - set up fencing for an object
  * @obj: object to map through a fence reg
  * @pipelined: ring on which to queue the change, or NULL for CPU access
- * @interruptible: must we wait uninterruptibly for the register to retire?
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
- *
  * This function walks the fence regs looking for a free one for @obj,
  * stealing one if it can't find any.
  *
  * It then sets up the reg based on the object's properties: address, pitch
  * and tiling format.
+ *
+ * For an untiled surface, this removing any existing fence.
  */
 int
 i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
@@ -2614,6 +2611,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 	struct drm_i915_fence_reg *reg;
 	int ret;
 
+	if (obj->tiling_mode == I915_TILING_NONE)
+		return i915_gem_object_put_fence(obj);
+
 	/* XXX disable pipelining. There are bugs. Shocking. */
 	pipelined = NULL;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6d604ed..d56ba9d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -486,18 +486,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
 
 	if (has_fenced_gpu_access) {
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			if (obj->tiling_mode) {
-				ret = i915_gem_object_get_fence(obj, ring);
-				if (ret)
-					goto err_unpin;
+			ret = i915_gem_object_get_fence(obj, ring);
+			if (ret)
+				goto err_unpin;
 
+			if (i915_gem_object_pin_fence(obj))
 				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
-				i915_gem_object_pin_fence(obj);
-			} else {
-				ret = i915_gem_object_put_fence(obj);
-				if (ret)
-					goto err_unpin;
-			}
+
 			obj->pending_fenced_gpu_access = true;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7ea5d4d..a67200a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2051,13 +2051,11 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	 * framebuffer compression.  For simplicity, we always install
 	 * a fence as the cost is not that onerous.
 	 */
-	if (obj->tiling_mode != I915_TILING_NONE) {
-		ret = i915_gem_object_get_fence(obj, pipelined);
-		if (ret)
-			goto err_unpin;
+	ret = i915_gem_object_get_fence(obj, pipelined);
+	if (ret)
+		goto err_unpin;
 
-		i915_gem_object_pin_fence(obj);
-	}
+	i915_gem_object_pin_fence(obj);
 
 	dev_priv->mm.interruptible = true;
 	return 0;
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure
  2012-03-22 15:10 [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence Chris Wilson
@ 2012-03-22 15:10 ` Chris Wilson
  2012-03-23 19:18   ` Daniel Vetter
  2012-03-23 19:08 ` [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence Daniel Vetter
  2012-04-10 10:02 ` Daniel Vetter
  2 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2012-03-22 15:10 UTC (permalink / raw)
  To: intel-gfx

I have failed to find a way to make this work without random GPU deaths,
so remove the ability to pipeline a fence update using the GPU. In the
process, we can refactor the code to improve the error handling and
avoid unnecessary modifications to our VMA if we do not need to update
the fence register.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    8 +-
 drivers/gpu/drm/i915/i915_gem.c            |  516 ++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
 drivers/gpu/drm/i915/intel_display.c       |    2 +-
 4 files changed, 185 insertions(+), 343 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f8f46d7..1b9fdaa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -135,7 +135,6 @@ struct drm_i915_master_private {
 struct drm_i915_fence_reg {
 	struct list_head lru_list;
 	struct drm_i915_gem_object *obj;
-	uint32_t setup_seqno;
 	int pin_count;
 };
 
@@ -908,13 +907,11 @@ struct drm_i915_gem_object {
 	 */
 	uint32_t gtt_offset;
 
+	struct intel_ring_buffer *ring;
 	/** Breadcrumb of last rendering to the buffer. */
 	uint32_t last_rendering_seqno;
-	struct intel_ring_buffer *ring;
-
 	/** Breadcrumb of last fenced GPU access to the buffer. */
 	uint32_t last_fenced_seqno;
-	struct intel_ring_buffer *last_fenced_ring;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	uint32_t stride;
@@ -1229,8 +1226,7 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 
 u32 i915_gem_next_request_seqno(struct intel_ring_buffer *ring);
 
-int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
-					   struct intel_ring_buffer *pipelined);
+int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8f758f..be01e1a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,13 +49,15 @@ static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *
 						    unsigned alignment,
 						    bool map_and_fenceable,
 						    bool nonblocking);
-static void i915_gem_clear_fence_reg(struct drm_device *dev,
-				     struct drm_i915_fence_reg *reg);
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
 				struct drm_file *file);
 
+static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
+					 struct drm_i915_fence_reg *fence,
+					 bool enable);
+
 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
 				    struct shrink_control *sc);
 static long i915_gem_shrink_by(struct drm_i915_private *dev_priv,
@@ -70,6 +72,14 @@ first_unbound_bo(struct drm_i915_private *dev_priv)
 				mm_list);
 }
 
+static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
+{
+	if (obj->tiling_mode)
+		i915_gem_release_mmap(obj);
+	else
+		obj->tiling_changed = false;
+}
+
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
 				  size_t size)
@@ -1185,7 +1195,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 			goto unlock;
 	}
 
-	ret = i915_gem_object_get_fence(obj, NULL);
+	ret = i915_gem_object_get_fence(obj);
 	if (ret)
 		goto unlock;
 
@@ -1656,7 +1666,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 
 	if (obj->fenced_gpu_access) {
 		obj->last_fenced_seqno = seqno;
-		obj->last_fenced_ring = ring;
 
 		/* Bump MRU to take account of the delayed flush */
 		if (obj->fence_reg != I915_FENCE_REG_NONE) {
@@ -1702,6 +1711,7 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 
 	i915_gem_object_move_off_active(obj);
 	obj->fenced_gpu_access = false;
+	obj->last_fenced_seqno = 0;
 
 	obj->active = 0;
 	obj->pending_gpu_write = false;
@@ -1871,14 +1881,10 @@ static void i915_gem_reset_fences(struct drm_device *dev)
 		if (!obj)
 			continue;
 
-		if (obj->tiling_mode)
-			i915_gem_release_mmap(obj);
+		i915_gem_object_update_fence(obj, reg, false);
+		i915_gem_object_fence_lost(obj);
 
-		reg->obj->fence_reg = I915_FENCE_REG_NONE;
-		reg->obj->fenced_gpu_access = false;
-		reg->obj->last_fenced_seqno = 0;
-		reg->obj->last_fenced_ring = NULL;
-		i915_gem_clear_fence_reg(dev, reg);
+		reg->pin_count = 0;
 	}
 }
 
@@ -2303,189 +2309,172 @@ int i915_gpu_idle(struct drm_device *dev, bool do_retire)
 	return 0;
 }
 
-static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
-				       struct intel_ring_buffer *pipelined)
+static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
+					struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	u32 size = obj->gtt_space->size;
-	int regnum = obj->fence_reg;
 	uint64_t val;
 
-	val = (uint64_t)((obj->gtt_offset + size - 4096) &
-			 0xfffff000) << 32;
-	val |= obj->gtt_offset & 0xfffff000;
-	val |= (uint64_t)((obj->stride / 128) - 1) <<
-		SANDYBRIDGE_FENCE_PITCH_SHIFT;
+	if (obj) {
+		u32 size = obj->gtt_space->size;
 
-	if (obj->tiling_mode == I915_TILING_Y)
-		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
-	val |= I965_FENCE_REG_VALID;
+		val = (uint64_t)((obj->gtt_offset + size - 4096) &
+				 0xfffff000) << 32;
+		val |= obj->gtt_offset & 0xfffff000;
+		val |= (uint64_t)((obj->stride / 128) - 1) <<
+			SANDYBRIDGE_FENCE_PITCH_SHIFT;
 
-	if (pipelined) {
-		int ret = intel_ring_begin(pipelined, 6);
-		if (ret)
-			return ret;
-
-		intel_ring_emit(pipelined, MI_NOOP);
-		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
-		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
-		intel_ring_emit(pipelined, (u32)val);
-		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
-		intel_ring_emit(pipelined, (u32)(val >> 32));
-		intel_ring_advance(pipelined);
+		if (obj->tiling_mode == I915_TILING_Y)
+			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+		val |= I965_FENCE_REG_VALID;
 	} else
-		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
+		val = 0;
 
-	return 0;
+	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
+	POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
 }
 
-static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
-				struct intel_ring_buffer *pipelined)
+static void i965_write_fence_reg(struct drm_device *dev, int reg,
+				 struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	u32 size = obj->gtt_space->size;
-	int regnum = obj->fence_reg;
 	uint64_t val;
 
-	val = (uint64_t)((obj->gtt_offset + size - 4096) &
-		    0xfffff000) << 32;
-	val |= obj->gtt_offset & 0xfffff000;
-	val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
-	if (obj->tiling_mode == I915_TILING_Y)
-		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
-	val |= I965_FENCE_REG_VALID;
-
-	if (pipelined) {
-		int ret = intel_ring_begin(pipelined, 6);
-		if (ret)
-			return ret;
+	if (obj) {
+		u32 size = obj->gtt_space->size;
 
-		intel_ring_emit(pipelined, MI_NOOP);
-		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
-		intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
-		intel_ring_emit(pipelined, (u32)val);
-		intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
-		intel_ring_emit(pipelined, (u32)(val >> 32));
-		intel_ring_advance(pipelined);
+		val = (uint64_t)((obj->gtt_offset + size - 4096) &
+				 0xfffff000) << 32;
+		val |= obj->gtt_offset & 0xfffff000;
+		val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
+		if (obj->tiling_mode == I915_TILING_Y)
+			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+		val |= I965_FENCE_REG_VALID;
 	} else
-		I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
+		val = 0;
 
-	return 0;
+	I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
+	POSTING_READ(FENCE_REG_965_0 + reg * 8);
 }
 
-static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
-				struct intel_ring_buffer *pipelined)
+static void i915_write_fence_reg(struct drm_device *dev, int reg,
+				 struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	u32 size = obj->gtt_space->size;
-	u32 fence_reg, val, pitch_val;
-	int tile_width;
-
-	if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
-		 (size & -size) != size ||
-		 (obj->gtt_offset & (size - 1)),
-		 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
-		 obj->gtt_offset, obj->map_and_fenceable, size))
-		return -EINVAL;
+	u32 val;
 
-	if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
-		tile_width = 128;
-	else
-		tile_width = 512;
-
-	/* Note: pitch better be a power of two tile widths */
-	pitch_val = obj->stride / tile_width;
-	pitch_val = ffs(pitch_val) - 1;
-
-	val = obj->gtt_offset;
-	if (obj->tiling_mode == I915_TILING_Y)
-		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-	val |= I915_FENCE_SIZE_BITS(size);
-	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
-	val |= I830_FENCE_REG_VALID;
-
-	fence_reg = obj->fence_reg;
-	if (fence_reg < 8)
-		fence_reg = FENCE_REG_830_0 + fence_reg * 4;
-	else
-		fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
+	if (obj) {
+		u32 size = obj->gtt_space->size;
+		int pitch_val;
+		int tile_width;
 
-	if (pipelined) {
-		int ret = intel_ring_begin(pipelined, 4);
-		if (ret)
-			return ret;
+		WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
+		     (size & -size) != size ||
+		     (obj->gtt_offset & (size - 1)),
+		     "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
+		     obj->gtt_offset, obj->map_and_fenceable, size);
 
-		intel_ring_emit(pipelined, MI_NOOP);
-		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(pipelined, fence_reg);
-		intel_ring_emit(pipelined, val);
-		intel_ring_advance(pipelined);
+		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+			tile_width = 128;
+		else
+			tile_width = 512;
+
+		/* Note: pitch better be a power of two tile widths */
+		pitch_val = obj->stride / tile_width;
+		pitch_val = ffs(pitch_val) - 1;
+
+		val = obj->gtt_offset;
+		if (obj->tiling_mode == I915_TILING_Y)
+			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+		val |= I915_FENCE_SIZE_BITS(size);
+		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+		val |= I830_FENCE_REG_VALID;
 	} else
-		I915_WRITE(fence_reg, val);
+		val = 0;
 
-	return 0;
+	if (reg < 8)
+		reg = FENCE_REG_830_0 + reg * 4;
+	else
+		reg = FENCE_REG_945_8 + (reg - 8) * 4;
+
+	I915_WRITE(reg, val);
+	POSTING_READ(reg);
 }
 
-static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
-				struct intel_ring_buffer *pipelined)
+static void i830_write_fence_reg(struct drm_device *dev, int reg,
+				struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	u32 size = obj->gtt_space->size;
-	int regnum = obj->fence_reg;
 	uint32_t val;
-	uint32_t pitch_val;
-
-	if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
-		 (size & -size) != size ||
-		 (obj->gtt_offset & (size - 1)),
-		 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
-		 obj->gtt_offset, size))
-		return -EINVAL;
 
-	pitch_val = obj->stride / 128;
-	pitch_val = ffs(pitch_val) - 1;
-
-	val = obj->gtt_offset;
-	if (obj->tiling_mode == I915_TILING_Y)
-		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-	val |= I830_FENCE_SIZE_BITS(size);
-	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
-	val |= I830_FENCE_REG_VALID;
-
-	if (pipelined) {
-		int ret = intel_ring_begin(pipelined, 4);
-		if (ret)
-			return ret;
-
-		intel_ring_emit(pipelined, MI_NOOP);
-		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
-		intel_ring_emit(pipelined, val);
-		intel_ring_advance(pipelined);
+	if (obj) {
+		u32 size = obj->gtt_space->size;
+		uint32_t pitch_val;
+
+		WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
+		     (size & -size) != size ||
+		     (obj->gtt_offset & (size - 1)),
+		     "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
+		     obj->gtt_offset, size);
+
+		pitch_val = obj->stride / 128;
+		pitch_val = ffs(pitch_val) - 1;
+
+		val = obj->gtt_offset;
+		if (obj->tiling_mode == I915_TILING_Y)
+			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+		val |= I830_FENCE_SIZE_BITS(size);
+		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+		val |= I830_FENCE_REG_VALID;
 	} else
-		I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
+		val = 0;
 
-	return 0;
+	I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
+	POSTING_READ(FENCE_REG_830_0 + reg * 4);
 }
 
-static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno)
+static void i915_gem_write_fence(struct drm_device *dev, int reg,
+				 struct drm_i915_gem_object *obj)
 {
-	return i915_seqno_passed(ring->get_seqno(ring), seqno);
+	switch (INTEL_INFO(dev)->gen) {
+	case 7:
+	case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
+	case 5:
+	case 4: i965_write_fence_reg(dev, reg, obj); break;
+	case 3: i915_write_fence_reg(dev, reg, obj); break;
+	case 2: i830_write_fence_reg(dev, reg, obj); break;
+	default: break;
+	}
+}
+
+static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
+					 struct drm_i915_fence_reg *fence,
+					 bool enable)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	int reg = fence - dev_priv->fence_regs;
+
+	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+
+	if (enable) {
+		obj->fence_reg = reg;
+		fence->obj = obj;
+		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
+	} else {
+		obj->fence_reg = I915_FENCE_REG_NONE;
+		fence->obj = NULL;
+		list_del_init(&fence->lru_list);
+	}
 }
 
 static int
-i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
-			    struct intel_ring_buffer *pipelined)
+i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
 	if (obj->fenced_gpu_access) {
 		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-			ret = i915_gem_flush_ring(obj->last_fenced_ring,
+			ret = i915_gem_flush_ring(obj->ring,
 						  0, obj->base.write_domain);
 			if (ret)
 				return ret;
@@ -2494,18 +2483,14 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
 		obj->fenced_gpu_access = false;
 	}
 
-	if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
-		if (!ring_passed_seqno(obj->last_fenced_ring,
-				       obj->last_fenced_seqno)) {
-			ret = i915_wait_request(obj->last_fenced_ring,
-						obj->last_fenced_seqno,
-						true);
-			if (ret)
-				return ret;
-		}
+	if (obj->last_fenced_seqno) {
+		ret = i915_wait_request(obj->ring,
+					obj->last_fenced_seqno,
+					true);
+		if (ret)
+			return ret;
 
 		obj->last_fenced_seqno = 0;
-		obj->last_fenced_ring = NULL;
 	}
 
 	/* Ensure that all CPU reads are completed before installing a fence
@@ -2520,34 +2505,29 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	if (obj->tiling_mode)
-		i915_gem_release_mmap(obj);
-
-	ret = i915_gem_object_flush_fence(obj, NULL);
+	ret = i915_gem_object_flush_fence(obj);
 	if (ret)
 		return ret;
 
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-
-		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count);
-		i915_gem_clear_fence_reg(obj->base.dev,
-					 &dev_priv->fence_regs[obj->fence_reg]);
+	if (obj->fence_reg == I915_FENCE_REG_NONE)
+		return 0;
 
-		obj->fence_reg = I915_FENCE_REG_NONE;
-	}
+	i915_gem_object_update_fence(obj,
+				     &dev_priv->fence_regs[obj->fence_reg],
+				     false);
+	i915_gem_object_fence_lost(obj);
 
 	return 0;
 }
 
 static struct drm_i915_fence_reg *
-i915_find_fence_reg(struct drm_device *dev,
-		    struct intel_ring_buffer *pipelined)
+i915_find_fence_reg(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_fence_reg *reg, *first, *avail;
+	struct drm_i915_fence_reg *reg, *avail;
 	int i;
 
 	/* First try to find a free reg */
@@ -2565,26 +2545,14 @@ i915_find_fence_reg(struct drm_device *dev,
 		return NULL;
 
 	/* None available, try to steal one or wait for a user to finish */
-	avail = first = NULL;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
 		if (reg->pin_count)
 			continue;
 
-		if (first == NULL)
-			first = reg;
-
-		if (!pipelined ||
-		    !reg->obj->last_fenced_ring ||
-		    reg->obj->last_fenced_ring == pipelined) {
-			avail = reg;
-			break;
-		}
+		return reg;
 	}
 
-	if (avail == NULL)
-		avail = first;
-
-	return avail;
+	return NULL;
 }
 
 /**
@@ -2603,169 +2571,45 @@ i915_find_fence_reg(struct drm_device *dev,
  * For an untiled surface, this removing any existing fence.
  */
 int
-i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
-			  struct intel_ring_buffer *pipelined)
+i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_fence_reg *reg;
-	int ret;
-
-	if (obj->tiling_mode == I915_TILING_NONE)
-		return i915_gem_object_put_fence(obj);
-
-	/* XXX disable pipelining. There are bugs. Shocking. */
-	pipelined = NULL;
+	struct drm_i915_fence_reg *reg = NULL;
+	bool enable = obj->tiling_mode != I915_TILING_NONE;
+	int ret = 0;
 
-	/* Just update our place in the LRU if our fence is getting reused. */
 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
 		reg = &dev_priv->fence_regs[obj->fence_reg];
-		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
-
-		if (obj->tiling_changed) {
-			ret = i915_gem_object_flush_fence(obj, pipelined);
-			if (ret)
-				return ret;
-
-			if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-				pipelined = NULL;
-
-			if (pipelined) {
-				reg->setup_seqno =
-					i915_gem_next_request_seqno(pipelined);
-				obj->last_fenced_seqno = reg->setup_seqno;
-				obj->last_fenced_ring = pipelined;
-			}
-
-			goto update;
+		if (!obj->tiling_changed) {
+			list_move_tail(&reg->lru_list,
+				       &dev_priv->mm.fence_list);
+			reg = NULL;
 		}
+	} else if (enable) {
+		reg = i915_find_fence_reg(dev);
+		if (reg == NULL)
+			return -EDEADLK;
 
-		if (!pipelined) {
-			if (reg->setup_seqno) {
-				if (!ring_passed_seqno(obj->last_fenced_ring,
-						       reg->setup_seqno)) {
-					ret = i915_wait_request(obj->last_fenced_ring,
-								reg->setup_seqno,
-								true);
-					if (ret)
-						return ret;
-				}
-
-				reg->setup_seqno = 0;
-			}
-		} else if (obj->last_fenced_ring &&
-			   obj->last_fenced_ring != pipelined) {
-			ret = i915_gem_object_flush_fence(obj, pipelined);
+		if (reg->obj) {
+			ret = i915_gem_object_flush_fence(reg->obj);
 			if (ret)
 				return ret;
 		}
 
-		return 0;
-	}
-
-	reg = i915_find_fence_reg(dev, pipelined);
-	if (reg == NULL)
-		return -EDEADLK;
-
-	ret = i915_gem_object_flush_fence(obj, pipelined);
-	if (ret)
-		return ret;
-
-	if (reg->obj) {
-		struct drm_i915_gem_object *old = reg->obj;
-
-		drm_gem_object_reference(&old->base);
+		if (reg->obj) {
+			struct drm_i915_gem_object *old = reg->obj;
 
-		if (old->tiling_mode)
-			i915_gem_release_mmap(old);
-
-		ret = i915_gem_object_flush_fence(old, pipelined);
-		if (ret) {
-			drm_gem_object_unreference(&old->base);
-			return ret;
+			old->fence_reg = I915_FENCE_REG_NONE;
+			i915_gem_object_fence_lost(old);
 		}
-
-		if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
-			pipelined = NULL;
-
-		old->fence_reg = I915_FENCE_REG_NONE;
-		old->last_fenced_ring = pipelined;
-		old->last_fenced_seqno =
-			pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
-
-		drm_gem_object_unreference(&old->base);
-	} else if (obj->last_fenced_seqno == 0)
-		pipelined = NULL;
-
-	reg->obj = obj;
-	list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
-	obj->fence_reg = reg - dev_priv->fence_regs;
-	obj->last_fenced_ring = pipelined;
-
-	reg->setup_seqno =
-		pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
-	obj->last_fenced_seqno = reg->setup_seqno;
-
-update:
-	obj->tiling_changed = false;
-	switch (INTEL_INFO(dev)->gen) {
-	case 7:
-	case 6:
-		ret = sandybridge_write_fence_reg(obj, pipelined);
-		break;
-	case 5:
-	case 4:
-		ret = i965_write_fence_reg(obj, pipelined);
-		break;
-	case 3:
-		ret = i915_write_fence_reg(obj, pipelined);
-		break;
-	case 2:
-		ret = i830_write_fence_reg(obj, pipelined);
-		break;
 	}
 
-	return ret;
-}
+	if (reg)
+		i915_gem_object_update_fence(obj, reg, enable);
 
-/**
- * i915_gem_clear_fence_reg - clear out fence register info
- * @obj: object to clear
- *
- * Zeroes out the fence register itself and clears out the associated
- * data structures in dev_priv and obj.
- */
-static void
-i915_gem_clear_fence_reg(struct drm_device *dev,
-			 struct drm_i915_fence_reg *reg)
-{
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t fence_reg = reg - dev_priv->fence_regs;
-
-	switch (INTEL_INFO(dev)->gen) {
-	case 7:
-	case 6:
-		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
-		break;
-	case 5:
-	case 4:
-		I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
-		break;
-	case 3:
-		if (fence_reg >= 8)
-			fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
-		else
-	case 2:
-			fence_reg = FENCE_REG_830_0 + fence_reg * 4;
-
-		I915_WRITE(fence_reg, 0);
-		break;
-	}
-
-	list_del_init(&reg->lru_list);
-	reg->obj = NULL;
-	reg->setup_seqno = 0;
-	reg->pin_count = 0;
+	obj->tiling_changed = false;
+	return 0;
 }
 
 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
@@ -4205,7 +4049,9 @@ i915_gem_load(struct drm_device *dev)
 
 	/* Initialize fence registers to zero */
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
+		i915_gem_write_fence(dev, i, NULL);
+		dev_priv->fence_regs[i].obj = NULL;
+		list_del_init(&dev_priv->fence_regs[i].lru_list);
 	}
 
 	i915_gem_detect_bit_6_swizzle(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d56ba9d..fc2b6d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -486,7 +486,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
 
 	if (has_fenced_gpu_access) {
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			ret = i915_gem_object_get_fence(obj, ring);
+			ret = i915_gem_object_get_fence(obj);
 			if (ret)
 				goto err_unpin;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a67200a..340a378 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2051,7 +2051,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	 * framebuffer compression.  For simplicity, we always install
 	 * a fence as the cost is not that onerous.
 	 */
-	ret = i915_gem_object_get_fence(obj, pipelined);
+	ret = i915_gem_object_get_fence(obj);
 	if (ret)
 		goto err_unpin;
 
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence
  2012-03-22 15:10 [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence Chris Wilson
  2012-03-22 15:10 ` [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure Chris Wilson
@ 2012-03-23 19:08 ` Daniel Vetter
  2012-04-10 10:02 ` Daniel Vetter
  2 siblings, 0 replies; 7+ messages in thread
From: Daniel Vetter @ 2012-03-23 19:08 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Mar 22, 2012 at 03:10:00PM +0000, Chris Wilson wrote:
> By simplifying the rules to calling get_fence when writing to the
> through the GTT in a tiled manner, and calling put_fence before writing
> to the object through the GTT in a linear manner, the code becomes
> clearer and there is less chance of making a mistake.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure
  2012-03-22 15:10 ` [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure Chris Wilson
@ 2012-03-23 19:18   ` Daniel Vetter
  2012-03-23 20:02     ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Vetter @ 2012-03-23 19:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Mar 22, 2012 at 03:10:01PM +0000, Chris Wilson wrote:
> I have failed to find a way to make this work without random GPU deaths,
> so remove the ability to pipeline a fence update using the GPU. In the
> process, we can refactor the code to improve the error handling and
> avoid unnecessary modifications to our VMA if we do not need to update
> the fence register.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Mea culpa, but I agree that it's better to reap this instead of letting it
languish even longer as some dead code. For the patch itself, this does
way too much. Imo the gem_write_fence rework and the reaping of the
pipelining logic should be separate patches (maybe even more) - I don't
quite see through this patch and follow where things move to.

Maybe also add some more comments about the lifetime rules wrt obj->ring
obj->last_rendering_seqno and obj->last_fenced_seqno.

Also, if you have any ideas for crazy i-g-t tests, I think we should use
this opportunity to fill some of the gapping wholes wrt fencing we have in
our testuite.

And to pardon dense me: What does VMA mean in this context?

Cheers, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure
  2012-03-23 19:18   ` Daniel Vetter
@ 2012-03-23 20:02     ` Chris Wilson
  2012-03-23 20:13       ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2012-03-23 20:02 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, 23 Mar 2012 20:18:37 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Mar 22, 2012 at 03:10:01PM +0000, Chris Wilson wrote:
> > I have failed to find a way to make this work without random GPU deaths,
> > so remove the ability to pipeline a fence update using the GPU. In the
> > process, we can refactor the code to improve the error handling and
> > avoid unnecessary modifications to our VMA if we do not need to update
> > the fence register.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Mea culpa, but I agree that it's better to reap this instead of letting it
> languish even longer as some dead code. For the patch itself, this does
> way too much. Imo the gem_write_fence rework and the reaping of the
> pipelining logic should be separate patches (maybe even more) - I don't
> quite see through this patch and follow where things move to.

I got bored of rewriting the same 200 lines with the incremental churn.
Every patch only made sense as "preparing to rip out pipelining".

> Maybe also add some more comments about the lifetime rules wrt obj->ring
> obj->last_rendering_seqno and obj->last_fenced_seqno.
> 
> Also, if you have any ideas for crazy i-g-t tests, I think we should use
> this opportunity to fill some of the gapping wholes wrt fencing we have in
> our testuite.

The obvious failure conditions all seem to revolve around racing with
pending GPU writes, those should be hit by gem_stress. The less
obvious ones where the GPU dies and everything in the error state looks
consistent and coherent, I haven't a clue. I was up to a couple of hours
between failure and still do not why it failed. With pipelining
disabled, that failure mode vanishes. 

> And to pardon dense me: What does VMA mean in this context?

Exactly what you think, virtual memory area. It is a hint that with
certain access patterns, you can see a significant performance
improvement with this patch, can you spot why? ;-) (Though I don't think
mesa would see any benefit.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure
  2012-03-23 20:02     ` Chris Wilson
@ 2012-03-23 20:13       ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2012-03-23 20:13 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, 23 Mar 2012 20:02:05 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Fri, 23 Mar 2012 20:18:37 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> > Mea culpa, but I agree that it's better to reap this instead of letting it
> > languish even longer as some dead code. For the patch itself, this does
> > way too much. Imo the gem_write_fence rework and the reaping of the
> > pipelining logic should be separate patches (maybe even more) - I don't
> > quite see through this patch and follow where things move to.
> 
> I got bored of rewriting the same 200 lines with the incremental churn.
> Every patch only made sense as "preparing to rip out pipelining".

Meant to add: but since it is you, I'll give it another shot.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence
  2012-03-22 15:10 [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence Chris Wilson
  2012-03-22 15:10 ` [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure Chris Wilson
  2012-03-23 19:08 ` [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence Daniel Vetter
@ 2012-04-10 10:02 ` Daniel Vetter
  2 siblings, 0 replies; 7+ messages in thread
From: Daniel Vetter @ 2012-04-10 10:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Mar 22, 2012 at 03:10:00PM +0000, Chris Wilson wrote:
> By simplifying the rules to calling get_fence when writing to the
> through the GTT in a tiled manner, and calling put_fence before writing
> to the object through the GTT in a linear manner, the code becomes
> clearer and there is less chance of making a mistake.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Queued for -next (with a conflict resolved and a little spelling fix
added), thanks for the patch.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-04-10 10:01 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-03-22 15:10 [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence Chris Wilson
2012-03-22 15:10 ` [PATCH 2/2] drm/i915: Remove fence pipelining infrastructure Chris Wilson
2012-03-23 19:18   ` Daniel Vetter
2012-03-23 20:02     ` Chris Wilson
2012-03-23 20:13       ` Chris Wilson
2012-03-23 19:08 ` [PATCH 1/2] drm/i915: Reorganise rules for get_fence/put_fence Daniel Vetter
2012-04-10 10:02 ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.