All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: s/seqno/request/ tracking inside objects
@ 2014-07-25 12:27 Chris Wilson
  2014-07-28 16:24 ` Daniel Vetter
  2014-07-28 20:44 ` Jesse Barnes
  0 siblings, 2 replies; 8+ messages in thread
From: Chris Wilson @ 2014-07-25 12:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Brad Volkin

At the heart of this change is that the seqno is a too low level of an
abstraction to handle the growing complexities of command tracking, both
with the introduction of multiple command queues with execbuffer and the
potential for reordering with a scheduler. On top of the seqno we have
the request. Conceptually this is just a fence, but it also has
substantial bookkeeping of its own in order to track the context and
batch in flight, for example. It is the central structure upon which we
can extend with dependency tracking et al.

As regards the objects, they were using the seqno as a simple fence,
upon which is check or even wait upon for command completion. This patch
exchanges that seqno/ring pair with the request itself. For the
majority, lifetime of the request is ordered by how we retire objects
then requests. However, both the unlocked waits and probing elsewhere do
not tie into the normal request lifetimes and so we need to introduce a
kref. Extending the objects to use the request as the fence naturally
extends to segregrating read/write fence tracking. This has significance
for it reduces the number of semaphores we need to emit, reducing the
likelihood of #54226, and improving performance overall.

NOTE: this is not against bare drm-intel-nightly and is likely to
conflict with execlists...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Brad Volkin <bradley.d.volkin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c          |  37 +-
 drivers/gpu/drm/i915/i915_drv.h              | 108 ++--
 drivers/gpu/drm/i915/i915_gem.c              | 769 ++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
 drivers/gpu/drm/i915/i915_gem_exec.c         |  10 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c        |  35 +-
 drivers/gpu/drm/i915/i915_irq.c              |   6 +-
 drivers/gpu/drm/i915/i915_perf.c             |   6 +-
 drivers/gpu/drm/i915/i915_trace.h            |   2 +-
 drivers/gpu/drm/i915/intel_display.c         |  50 +-
 drivers/gpu/drm/i915/intel_drv.h             |   3 +-
 drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  83 +--
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  11 +-
 17 files changed, 745 insertions(+), 556 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 406e630..676d5f1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
@@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
 		   obj->base.write_domain,
-		   obj->last_read_seqno,
-		   obj->last_write_seqno,
-		   obj->last_fenced_seqno,
+		   i915_request_seqno(rq),
+		   i915_request_seqno(obj->last_write.request),
+		   i915_request_seqno(obj->last_fence.request),
 		   i915_cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		*t = '\0';
 		seq_printf(m, " (%s mappable)", s);
 	}
-	if (obj->ring != NULL)
-		seq_printf(m, " (%s)", obj->ring->name);
+	if (rq)
+		seq_printf(m, " (%s)", rq->ring->name);
 	if (obj->frontbuffer_bits)
 		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 			if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv)
 				continue;
 
-			if (obj->ring) /* XXX per-vma statistic */
+			if (obj->active) /* XXX per-vma statistic */
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	} else {
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			stats->global += obj->base.size;
-			if (obj->ring)
+			if (obj->active)
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
 					   pipe, plane);
 			}
-			if (work->ring)
+			if (work->flip_queued_request) {
+				struct i915_gem_request *rq = work->flip_queued_request;
 				seq_printf(m, "Flip queued on %s at seqno %u, now %u\n",
-						work->ring->name,
-						work->flip_queued_seqno,
-						work->ring->get_seqno(work->ring, true));
-			else
+						rq->ring->name, rq->seqno,
+						rq->ring->get_seqno(rq->ring, true));
+			} else
 				seq_printf(m, "Flip not associated with any ring\n");
 			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
 				   work->flip_queued_vblank,
@@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_i915_gem_request *gem_request;
+	struct i915_gem_request *rq;
 	int ret, count, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -669,12 +670,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 			continue;
 
 		seq_printf(m, "%s requests:\n", ring->name);
-		list_for_each_entry(gem_request,
-				    &ring->request_list,
-				    list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			seq_printf(m, "    %d @ %d\n",
-				   gem_request->seqno,
-				   (int) (jiffies - gem_request->emitted_jiffies));
+				   rq->seqno,
+				   (int)(jiffies - rq->emitted_jiffies));
 		}
 		count++;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9837b0f..5794d096 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -187,6 +187,7 @@ enum hpd_pin {
 struct drm_i915_private;
 struct i915_mm_struct;
 struct i915_mmu_object;
+struct i915_gem_request;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	struct list_head ring_list;
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
+	 * rendering and so a submitted request), and is not set if it is on
+	 * inactive (ready to be unbound) list. We track activity per engine.
 	 */
-	unsigned int active:1;
+	unsigned int active:3;
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -1797,13 +1797,11 @@ struct drm_i915_gem_object {
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
-	struct intel_engine_cs *ring;
-
-	/** Breadcrumb of last rendering to the buffer. */
-	uint32_t last_read_seqno;
-	uint32_t last_write_seqno;
-	/** Breadcrumb of last fenced GPU access to the buffer. */
-	uint32_t last_fenced_seqno;
+	/** Breadcrumbs of last rendering to the buffer. */
+	struct {
+		struct i915_gem_request *request;
+		struct list_head ring_list;
+	} last_write, last_read[I915_NUM_RINGS], last_fence;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	uint32_t stride;
@@ -1836,6 +1834,8 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
+
 void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits);
@@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  * sequence-number comparisons on buffer last_rendering_seqnos, and associate
  * an emission time with seqnos for tracking how far ahead of the GPU we are.
  */
-struct drm_i915_gem_request {
+struct i915_gem_request {
+	struct kref kref;
+
 	/** On Which ring this request was generated */
 	struct intel_engine_cs *ring;
 
@@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_list;
+
+	bool completed:1;
 };
 
+static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring : NULL;
+}
+
+static inline int i915_request_ring_id(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring->id : -1;
+}
+
+static inline u32 i915_request_seqno(struct i915_gem_request *rq)
+{
+	return rq ? rq->seqno : 0;
+}
+
+/**
+ * Returns true if seq1 is later than seq2.
+ */
+static inline bool
+__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+{
+	return (int32_t)(seq1 - seq2) >= 0;
+}
+
+static inline bool
+i915_request_complete(struct i915_gem_request *rq, bool lazy)
+{
+	if (!rq->completed)
+		rq->completed = __i915_seqno_passed(rq->ring->get_seqno(rq->ring, lazy),
+						    rq->seqno);
+	return rq->completed;
+}
+
+static inline struct i915_gem_request *
+i915_request_get(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_get(&rq->kref);
+	return rq;
+}
+
+void __i915_request_free(struct kref *kref);
+
+static inline void
+i915_request_put(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_put(&rq->kref, __i915_request_free);
+}
+
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
 	struct drm_file *file;
@@ -2335,22 +2389,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to);
+			 struct intel_engine_cs *to,
+			 bool readonly);
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring);
+			     struct intel_engine_cs *ring,
+			     unsigned fenced);
+#define VMA_IS_FENCED 0x1
+#define VMA_HAS_FENCE 0x2
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
-/**
- * Returns true if seq1 is later than seq2.
- */
-static inline bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
-{
-	return (int32_t)(seq1 - seq2) >= 0;
-}
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
@@ -2360,14 +2410,14 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
 void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
-int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
+int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
 
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
@@ -2411,12 +2461,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *batch_obj,
-		       u32 *seqno);
-#define i915_add_request(ring, seqno) \
-	__i915_add_request(ring, NULL, NULL, seqno)
-int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
-				 uint32_t seqno);
+		       struct drm_i915_gem_object *batch_obj);
+#define i915_add_request(ring) \
+	__i915_add_request(ring, NULL, NULL)
+int __must_check i915_wait_request(struct i915_gem_request *rq);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f3ad6fb..d208658 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -48,8 +48,6 @@ static __must_check int
 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 					    struct drm_i915_file_private *file_priv,
 					    bool readonly);
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
 
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
@@ -118,6 +116,73 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
+{
+	intel_fb_obj_flush(obj, true);
+	obj->last_write.request = NULL;
+	list_del_init(&obj->last_write.ring_list);
+}
+
+static void
+i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
+{
+	obj->last_fence.request = NULL;
+	list_del_init(&obj->last_fence.ring_list);
+}
+
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
+			     struct intel_engine_cs *ring)
+{
+	struct i915_vma *vma;
+
+	BUG_ON(obj->active == 0);
+	BUG_ON(obj->base.write_domain);
+
+	obj->last_read[ring->id].request = NULL;
+	list_del_init(&obj->last_read[ring->id].ring_list);
+
+	if (--obj->active)
+		return;
+
+	BUG_ON(obj->last_write.request);
+	BUG_ON(obj->last_fence.request);
+
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (!list_empty(&vma->mm_list))
+			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
+	}
+
+	drm_gem_object_unreference(&obj->base);
+
+	WARN_ON(i915_verify_lists(dev));
+}
+
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_request *rq;
+	int i;
+
+	if (!obj->active)
+		return;
+
+	rq = obj->last_write.request;
+	if (rq && i915_request_complete(rq, true))
+		i915_gem_object_retire__write(obj);
+
+	rq = obj->last_fence.request;
+	if (rq && i915_request_complete(rq, true))
+		i915_gem_object_retire__fence(obj);
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		rq = obj->last_read[i].request;
+		if (rq && i915_request_complete(rq, true))
+			i915_gem_object_retire__read(obj, rq->ring);
+	}
+}
+
 static int
 i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
@@ -1337,15 +1402,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
  * equal.
  */
 int
-i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
+i915_gem_check_olr(struct i915_gem_request *rq)
 {
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
 
 	ret = 0;
-	if (seqno == ring->outstanding_lazy_seqno)
-		ret = i915_add_request(ring, NULL);
+	if (rq == rq->ring->preallocated_request)
+		ret = i915_add_request(rq->ring);
 
 	return ret;
 }
@@ -1370,9 +1435,8 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
 }
 
 /**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
+ * __wait_request - wait until execution of request has finished
+ * @request: the request to wait upon
  * @reset_counter: reset sequence associated with the given seqno
  * @interruptible: do an interruptible wait (normally yes)
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
@@ -1387,24 +1451,26 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
  * Returns 0 if the seqno was found within the alloted time. Else returns the
  * errno with remaining time filled in timeout argument.
  */
-static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
-			unsigned reset_counter,
-			bool interruptible,
-			struct timespec *timeout,
-			struct drm_i915_file_private *file_priv)
+static int __wait_request(struct i915_gem_request *rq,
+			  unsigned reset_counter,
+			  bool interruptible,
+			  struct timespec *timeout,
+			  struct drm_i915_file_private *file_priv)
 {
+	struct intel_engine_cs *ring = rq->ring;
 	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	const bool irq_test_in_progress =
 		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
 	struct timespec before, now;
 	DEFINE_WAIT(wait);
 	unsigned long timeout_expire;
+	u32 seqno = rq->seqno;
 	int ret;
 
 	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
+	if (i915_request_complete(rq, true))
 		return 0;
 
 	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
@@ -1440,7 +1506,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 			break;
 		}
 
-		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
+		if (i915_request_complete(rq, false)) {
 			ret = 0;
 			break;
 		}
@@ -1494,46 +1560,30 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
  * request and object lists appropriately for that event.
  */
 int
-i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
+i915_wait_request(struct i915_gem_request *rq)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	bool interruptible = dev_priv->mm.interruptible;
+	struct drm_device *dev = rq->ring->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(seqno == 0);
+	if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
+		return -EINVAL;
+
+	if (i915_request_complete(rq, true))
+		return 0;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_check_olr(ring, seqno);
+	ret = i915_gem_check_olr(rq);
 	if (ret)
 		return ret;
 
-	return __wait_seqno(ring, seqno,
-			    atomic_read(&dev_priv->gpu_error.reset_counter),
-			    interruptible, NULL, NULL);
-}
-
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
-				     struct intel_engine_cs *ring)
-{
-	if (!obj->active)
-		return 0;
-
-	/* Manually manage the write flush as we may have not yet
-	 * retired the buffer.
-	 *
-	 * Note that the last_write_seqno is always the earlier of
-	 * the two (read/write) seqno, so if we haved successfully waited,
-	 * we know we have passed the last write.
-	 */
-	obj->last_write_seqno = 0;
-
-	return 0;
+	return __wait_request(rq,
+			      atomic_read(&dev_priv->gpu_error.reset_counter),
+			      dev_priv->mm.interruptible, NULL, NULL);
 }
 
 /**
@@ -1544,19 +1594,37 @@ static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
-	struct intel_engine_cs *ring = obj->ring;
-	u32 seqno;
-	int ret;
+	int i, ret;
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
+	if (readonly) {
+		if (obj->last_write.request == NULL)
+			return 0;
 
-	ret = i915_wait_seqno(ring, seqno);
-	if (ret)
-		return ret;
+		ret = i915_wait_request(obj->last_write.request);
+		if (ret)
+			return ret;
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			ret = i915_wait_request(obj->last_read[i].request);
+			if (ret)
+				return ret;
+		}
+	}
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+	/* Manually manage the write flush as we may have not yet
+	 * retired the buffer.
+	 *
+	 * Note that the last_write_seqno is always the earlier of
+	 * the two (read/write) seqno, so if we haved successfully waited,
+	 * we know we have passed the last write.
+	 */
+	if (obj->last_write.request)
+		i915_gem_object_retire__write(obj);
+
+	return 0;
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1569,34 +1637,48 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = obj->ring;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
 	unsigned reset_counter;
-	u32 seqno;
-	int ret;
+	int i, n, ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
+	n = 0;
+	if (readonly) {
+		if (obj->last_write.request)
+			rq[n++] = i915_request_get(obj->last_write.request);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++)
+			if (obj->last_read[i].request)
+				rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+	if (n == 0)
 		return 0;
 
 	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
 	if (ret)
-		return ret;
+		goto out;
 
-	ret = i915_gem_check_olr(ring, seqno);
-	if (ret)
-		return ret;
+	for (i = 0; i < n; i++) {
+		ret = i915_gem_check_olr(rq[i]);
+		if (ret)
+			goto out;
+	}
 
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
+
+	for (i = 0; ret == 0 && i < n; i++)
+		ret = __wait_request(rq[i], reset_counter, true, NULL, file_priv);
+
 	mutex_lock(&dev->struct_mutex);
-	if (ret)
-		return ret;
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+out:
+	for (i = 0; i < n; i++)
+		i915_request_put(rq[i]);
+
+	return ret;
 }
 
 /**
@@ -2387,78 +2469,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *ring)
-{
-	u32 seqno = intel_ring_get_seqno(ring);
-
-	BUG_ON(ring == NULL);
-	if (obj->ring != ring && obj->last_write_seqno) {
-		/* Keep the seqno relative to the current ring */
-		obj->last_write_seqno = seqno;
-	}
-	obj->ring = ring;
-
-	/* Add a reference if we're newly entering the active list. */
-	if (!obj->active) {
-		drm_gem_object_reference(&obj->base);
-		obj->active = 1;
-	}
-
-	list_move_tail(&obj->ring_list, &ring->active_list);
-
-	obj->last_read_seqno = seqno;
-}
-
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring)
+			     struct intel_engine_cs *ring,
+			     unsigned fenced)
 {
-	list_move_tail(&vma->mm_list, &vma->vm->active_list);
-	return i915_gem_object_move_to_active(vma->obj, ring);
-}
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = intel_ring_get_request(ring);
+	u32 old_read = obj->base.read_domains;
+	u32 old_write = obj->base.write_domain;
 
-static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
+	BUG_ON(rq == NULL);
 
-	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
-	BUG_ON(!obj->active);
+	obj->base.write_domain = obj->base.pending_write_domain;
+	if (obj->base.write_domain == 0)
+		obj->base.pending_read_domains |= obj->base.read_domains;
+	obj->base.read_domains = obj->base.pending_read_domains;
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (!list_empty(&vma->mm_list))
-			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
-	}
-
-	intel_fb_obj_flush(obj, true);
-
-	list_del_init(&obj->ring_list);
-	obj->ring = NULL;
+	obj->base.pending_read_domains = 0;
+	obj->base.pending_write_domain = 0;
 
-	obj->last_read_seqno = 0;
-	obj->last_write_seqno = 0;
-	obj->base.write_domain = 0;
+	trace_i915_gem_object_change_domain(obj, old_read, old_write);
+	if (obj->base.read_domains == 0)
+		return;
 
-	obj->last_fenced_seqno = 0;
+	/* Add a reference if we're newly entering the active list. */
+	if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
+		drm_gem_object_reference(&obj->base);
 
-	obj->active = 0;
-	drm_gem_object_unreference(&obj->base);
+	obj->last_read[ring->id].request = rq;
+	list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
 
-	WARN_ON(i915_verify_lists(dev));
-}
+	if (obj->base.write_domain) {
+		obj->dirty = 1;
+		obj->last_write.request = rq;
+		list_move_tail(&obj->last_write.ring_list, &ring->write_list);
+		intel_fb_obj_invalidate(obj, ring);
 
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *ring = obj->ring;
+		/* update for the implicit flush after a batch */
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
 
-	if (ring == NULL)
-		return;
+	if (fenced) {
+		obj->last_fence.request = rq;
+		list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
+		if (fenced & 2) {
+			struct drm_i915_private *dev_priv = to_i915(ring->dev);
+			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+					&dev_priv->mm.fence_list);
+		}
+	}
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_read_seqno))
-		i915_gem_object_move_to_inactive(obj);
+	list_move_tail(&vma->mm_list, &vma->vm->active_list);
 }
 
 static int
@@ -2533,11 +2594,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *obj,
-		       u32 *out_seqno)
+		       struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	u32 request_ring_position, request_start;
 	int ret;
 
@@ -2553,8 +2613,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request = ring->preallocated_lazy_request;
-	if (WARN_ON(request == NULL))
+	rq = ring->preallocated_request;
+	if (WARN_ON(rq == NULL))
 		return -ENOMEM;
 
 	/* Record the position of the start of the request so that
@@ -2568,10 +2628,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request->seqno = intel_ring_get_seqno(ring);
-	request->ring = ring;
-	request->head = request_start;
-	request->tail = request_ring_position;
+	rq->head = request_start;
+	rq->tail = request_ring_position;
 
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2579,32 +2637,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
-	request->batch_obj = obj;
+	rq->batch_obj = obj;
 
 	/* Hold a reference to the current context so that we can inspect
 	 * it later in case a hangcheck error event fires.
 	 */
-	request->ctx = ring->last_context;
-	if (request->ctx)
-		i915_gem_context_reference(request->ctx);
+	rq->ctx = ring->last_context;
+	if (rq->ctx)
+		i915_gem_context_reference(rq->ctx);
 
-	request->emitted_jiffies = jiffies;
-	list_add_tail(&request->list, &ring->request_list);
-	request->file_priv = NULL;
+	rq->emitted_jiffies = jiffies;
+	list_add_tail(&rq->list, &ring->request_list);
+	rq->file_priv = NULL;
 
 	if (file) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
 
 		spin_lock(&file_priv->mm.lock);
-		request->file_priv = file_priv;
-		list_add_tail(&request->client_list,
+		rq->file_priv = file_priv;
+		list_add_tail(&rq->client_list,
 			      &file_priv->mm.request_list);
 		spin_unlock(&file_priv->mm.lock);
 	}
 
-	trace_i915_gem_request_add(ring, request->seqno);
-	ring->outstanding_lazy_seqno = 0;
-	ring->preallocated_lazy_request = NULL;
+	trace_i915_gem_request_add(ring, rq->seqno);
+	ring->preallocated_request = NULL;
 
 	if (!dev_priv->ums.mm_suspended) {
 		i915_queue_hangcheck(ring->dev);
@@ -2616,22 +2673,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
 		intel_mark_busy(dev_priv->dev);
 	}
 
-	if (out_seqno)
-		*out_seqno = request->seqno;
 	return 0;
 }
 
 static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+i915_gem_request_remove_from_client(struct i915_gem_request *rq)
 {
-	struct drm_i915_file_private *file_priv = request->file_priv;
+	struct drm_i915_file_private *file_priv = rq->file_priv;
 
 	if (!file_priv)
 		return;
 
 	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
+	list_del(&rq->client_list);
+	rq->file_priv = NULL;
 	spin_unlock(&file_priv->mm.lock);
 }
 
@@ -2679,30 +2734,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
+void __i915_request_free(struct kref *kref)
+{
+	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
+	kfree(rq);
+}
+
+static void i915_request_retire(struct i915_gem_request *rq)
 {
-	list_del(&request->list);
-	i915_gem_request_remove_from_client(request);
+	rq->completed = true;
+
+	list_del(&rq->list);
+	i915_gem_request_remove_from_client(rq);
 
-	if (request->ctx)
-		i915_gem_context_unreference(request->ctx);
+	if (rq->ctx) {
+		i915_gem_context_unreference(rq->ctx);
+		rq->ctx = NULL;
+	}
 
-	kfree(request);
+	i915_request_put(rq);
 }
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
-	u32 completed_seqno;
+	struct i915_gem_request *rq;
 
-	completed_seqno = ring->get_seqno(ring, false);
-
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (i915_seqno_passed(completed_seqno, request->seqno))
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (i915_request_complete(rq, false))
 			continue;
 
-		return request;
+		return rq;
 	}
 
 	return NULL;
@@ -2711,33 +2773,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
 				       struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	bool ring_hung;
 
-	request = i915_gem_find_active_request(ring);
+	rq = i915_gem_find_active_request(ring);
 
-	if (request == NULL)
+	if (rq == NULL)
 		return;
 
 	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 
-	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
+	i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
 
-	list_for_each_entry_continue(request, &ring->request_list, list)
-		i915_set_reset_status(dev_priv, request->ctx, false);
+	list_for_each_entry_continue(rq, &ring->request_list, list)
+		i915_set_reset_status(dev_priv, rq->ctx, false);
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 					struct intel_engine_cs *ring)
 {
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
+		obj = list_first_entry(&ring->write_list,
 				       struct drm_i915_gem_object,
-				       ring_list);
+				       last_write.ring_list);
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
+
+		i915_gem_object_retire__fence(obj);
+	}
+
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		i915_gem_object_retire__read(obj, ring);
 	}
 
 	/*
@@ -2748,19 +2830,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	 * the request.
 	 */
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	/* These may not have been flush before the reset, do so now */
-	kfree(ring->preallocated_lazy_request);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	kfree(ring->preallocated_request);
+	ring->preallocated_request = NULL;
 }
 
 void i915_gem_restore_fences(struct drm_device *dev)
@@ -2825,43 +2906,71 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 	 * by the ringbuffer to the flushing/inactive lists as appropriate,
 	 * before we free the context associated with the requests.
 	 */
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->write_list,
+				       struct drm_i915_gem_object,
+				       last_write.ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_write.request->seqno))
+			break;
+
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
-				      struct drm_i915_gem_object,
-				      ring_list);
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
 
-		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_fence.request->seqno))
 			break;
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__fence(obj);
 	}
 
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_read[ring->id].request->seqno))
+			break;
+
+		i915_gem_object_retire__read(obj, ring);
+	}
 
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		if (!i915_seqno_passed(seqno, request->seqno))
+		if (!__i915_seqno_passed(seqno, rq->seqno))
 			break;
 
-		trace_i915_gem_request_retire(ring, request->seqno);
+		trace_i915_gem_request_retire(ring, rq->seqno);
 		/* We know the GPU must have read the request to have
 		 * sent us the seqno + interrupt, so use the position
 		 * of tail of the request to update the last known position
 		 * of the GPU head.
 		 */
-		ring->buffer->last_retired_head = request->tail;
+		ring->buffer->last_retired_head = rq->tail;
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	if (unlikely(ring->trace_irq_seqno &&
-		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
+		     __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
 		ring->irq_put(ring);
 		ring->trace_irq_seqno = 0;
 	}
@@ -2926,14 +3035,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
 static int
 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
-	int ret;
+	int i;
 
-	if (obj->active) {
-		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
+	if (!obj->active)
+		return 0;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct i915_gem_request *rq = obj->last_read[i].request;
+		int ret;
+
+		if (rq == NULL)
+			continue;
+
+		ret = i915_gem_check_olr(rq);
 		if (ret)
 			return ret;
 
-		i915_gem_retire_requests_ring(obj->ring);
+		i915_gem_retire_requests_ring(rq->ring);
 	}
 
 	return 0;
@@ -2967,11 +3085,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *ring = NULL;
 	struct timespec timeout_stack, *timeout = NULL;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
 	unsigned reset_counter;
-	u32 seqno = 0;
-	int ret = 0;
+	int i, n, ret = 0;
 
 	if (args->timeout_ns >= 0) {
 		timeout_stack = ns_to_timespec(args->timeout_ns);
@@ -2993,13 +3110,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (ret)
 		goto out;
 
-	if (obj->active) {
-		seqno = obj->last_read_seqno;
-		ring = obj->ring;
-	}
-
-	if (seqno == 0)
-		 goto out;
+	if (!obj->active)
+		goto out;
 
 	/* Do this after OLR check to make sure we make forward progress polling
 	 * on this IOCTL with a 0 timeout (like busy ioctl)
@@ -3009,11 +3121,25 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto out;
 	}
 
+	for (i = n = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+
 	drm_gem_object_unreference(&obj->base);
+
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
+	for (i = 0; i < n; i++) {
+		if (ret == 0)
+			ret = __wait_request(rq[i], reset_counter, true, timeout, file->driver_priv);
+
+		i915_request_put(rq[i]);
+	}
+
 	if (timeout)
 		args->timeout_ns = timespec_to_ns(timeout);
 	return ret;
@@ -3024,6 +3150,45 @@ out:
 	return ret;
 }
 
+static int
+i915_request_sync(struct i915_gem_request *rq,
+		  struct intel_engine_cs *to,
+		  struct drm_i915_gem_object *obj)
+{
+	int ret, idx;
+
+	if (to == NULL)
+		return i915_wait_request(rq);
+
+	/* XXX this is broken by VEBOX+ */
+	idx = intel_ring_sync_index(rq->ring, to);
+
+	/* Optimization: Avoid semaphore sync when we are sure we already
+	 * waited for an object with higher seqno */
+	if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
+		return 0;
+
+	ret = i915_gem_check_olr(rq);
+	if (ret)
+		return ret;
+
+	if (!i915_request_complete(rq, true)) {
+		trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
+		ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
+		if (ret)
+			return ret;
+	}
+
+	/* We must recheck last_reqad_request because sync_to()
+	 * might have just caused seqno wrap under
+	 * the radar.
+	 */
+	if (obj->last_read[rq->ring->id].request == rq)
+		rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
+
+	return 0;
+}
+
 /**
  * i915_gem_object_sync - sync an object to a ring.
  *
@@ -3038,44 +3203,35 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to)
+		     struct intel_engine_cs *to,
+		     bool readonly)
 {
-	struct intel_engine_cs *from = obj->ring;
-	u32 seqno;
-	int ret, idx;
+	struct i915_gem_request *rq;
+	struct intel_engine_cs *semaphore;
+	int ret = 0, i;
 
-	if (from == NULL || to == from)
-		return 0;
+	semaphore = NULL;
+	if (i915_semaphore_is_enabled(obj->base.dev))
+		semaphore = to;
 
-	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
-
-	/* XXX this is broken by VEBOX+ */
-	idx = intel_ring_sync_index(from, to);
-
-	seqno = obj->last_read_seqno;
-	/* Optimization: Avoid semaphore sync when we are sure we already
-	 * waited for an object with higher seqno */
-	if (seqno <= from->semaphore.sync_seqno[idx])
-		return 0;
-
-	ret = 0;
-	if (!i915_seqno_passed(from->get_seqno(from, true), seqno)) {
-		ret = i915_gem_check_olr(from, seqno);
-		if (ret)
-			return ret;
+	if (readonly) {
+		rq = obj->last_write.request;
+		if (rq != NULL && to != rq->ring)
+			ret = i915_request_sync(rq, semaphore, obj);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			rq = obj->last_read[i].request;
+			if (rq == NULL || to == rq->ring)
+				continue;
 
-		trace_i915_gem_ring_sync_to(from, to, seqno);
-		ret = to->semaphore.sync_to(to, from, seqno);
+			ret = i915_request_sync(rq, semaphore, obj);
+			if (ret)
+				break;
+		}
 	}
-	if (!ret)
-		/* We use last_read_seqno because sync_to()
-		 * might have just caused seqno wrap under
-		 * the radar.
-		 */
-		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
 
 	return ret;
+
 }
 
 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -3381,14 +3537,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 static int
 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 {
-	if (obj->last_fenced_seqno) {
-		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
-		if (ret)
-			return ret;
+	int ret;
 
-		obj->last_fenced_seqno = 0;
-	}
+	if (obj->last_fence.request == NULL)
+		return 0;
 
+	ret = i915_wait_request(obj->last_fence.request);
+	if (ret)
+		return ret;
+
+	i915_gem_object_retire__fence(obj);
 	return 0;
 }
 
@@ -3836,11 +3994,12 @@ int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
 	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	/* Not valid to be called on unbound objects. */
-	if (!i915_gem_obj_bound_any(obj))
+	if (vma == NULL)
 		return -EINVAL;
 
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
@@ -3882,14 +4041,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	if (!obj->active) {
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
-		if (vma)
-			list_move_tail(&vma->mm_list,
-				       &dev_priv->gtt.base.inactive_list);
-
-	}
-
+	list_move_tail(&vma->mm_list,
+		       &dev_priv->gtt.base.inactive_list);
 	return 0;
 }
 
@@ -4087,11 +4240,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	bool was_pin_display;
 	int ret;
 
-	if (pipelined != obj->ring) {
-		ret = i915_gem_object_sync(obj, pipelined);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_sync(obj, pipelined, true);
+	if (ret)
+		return ret;
 
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -4239,10 +4390,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
-	struct drm_i915_gem_request *request;
-	struct intel_engine_cs *ring = NULL;
+	struct i915_gem_request *rq;
 	unsigned reset_counter;
-	u32 seqno = 0;
 	int ret;
 
 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
@@ -4254,23 +4403,22 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 		return ret;
 
 	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
+	list_for_each_entry(rq, &file_priv->mm.request_list, client_list) {
+		if (time_after_eq(rq->emitted_jiffies, recent_enough))
 			break;
-
-		ring = request->ring;
-		seqno = request->seqno;
 	}
+	rq = i915_request_get(&rq->client_list == &file_priv->mm.request_list ? NULL : rq);
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	spin_unlock(&file_priv->mm.lock);
 
-	if (seqno == 0)
+	if (rq == NULL)
 		return 0;
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
+	ret = __wait_request(rq, reset_counter, true, NULL, NULL);
 	if (ret == 0)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
+	i915_request_put(rq);
 	return ret;
 }
 
@@ -4488,7 +4636,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret;
+	int ret, i;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -4507,10 +4655,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 */
 	ret = i915_gem_object_flush_active(obj);
 
-	args->busy = obj->active;
-	if (obj->ring) {
+	args->busy = 0;
+	if (obj->active) {
 		BUILD_BUG_ON(I915_NUM_RINGS > 16);
-		args->busy |= intel_ring_flag(obj->ring) << 16;
+		args->busy |= 1;
+		for (i = 0; i < I915_NUM_RINGS; i++)  {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			args->busy |= 1 << (16 + i);
+		}
 	}
 
 	drm_gem_object_unreference(&obj->base);
@@ -4584,8 +4738,13 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
+	int i;
+
 	INIT_LIST_HEAD(&obj->global_list);
-	INIT_LIST_HEAD(&obj->ring_list);
+	INIT_LIST_HEAD(&obj->last_fence.ring_list);
+	INIT_LIST_HEAD(&obj->last_write.ring_list);
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		INIT_LIST_HEAD(&obj->last_read[i].ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 
@@ -5117,7 +5276,9 @@ i915_gem_lastclose(struct drm_device *dev)
 static void
 init_ring_lists(struct intel_engine_cs *ring)
 {
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
+	INIT_LIST_HEAD(&ring->fence_list);
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
@@ -5213,13 +5374,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	 */
 	spin_lock(&file_priv->mm.lock);
 	while (!list_empty(&file_priv->mm.request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&file_priv->mm.request_list,
-					   struct drm_i915_gem_request,
-					   client_list);
-		list_del(&request->client_list);
-		request->file_priv = NULL;
+		rq = list_first_entry(&file_priv->mm.request_list,
+				      struct i915_gem_request,
+				      client_list);
+		list_del(&rq->client_list);
+		rq->file_priv = NULL;
 	}
 	spin_unlock(&file_priv->mm.lock);
 }
@@ -5503,15 +5664,27 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
-	/* This WARN has probably outlived its usefulness (callers already
-	 * WARN if they don't find the GGTT vma they expect). When removing,
-	 * remember to remove the pre-check in is_pin_display() as well */
-	if (WARN_ON(list_empty(&obj->vma_list)))
-		return NULL;
-
 	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
 	if (vma->vm != obj_to_ggtt(obj))
 		return NULL;
 
 	return vma;
 }
+
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)
+{
+	u32 seqno = 0;
+	struct i915_gem_request *rq = NULL;
+	int i;
+
+	/* This is approximate as seqno cannot be used across rings */
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
+			rq = obj->last_read[i].request, seqno = rq->seqno;
+	}
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 79dc77b..690e2dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -394,13 +394,9 @@ void i915_gem_context_reset(struct drm_device *dev)
 		if (!lctx)
 			continue;
 
-		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
+		if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
 			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
 						      get_context_alignment(dev), 0));
-			/* Fake a finish/inactive */
-			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
-			dctx->legacy_hw_ctx.rcs_state->active = 0;
-		}
 
 		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
 			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
@@ -467,7 +463,6 @@ void i915_gem_context_fini(struct drm_device *dev)
 		WARN_ON(!dev_priv->ring[RCS].last_context);
 		if (dev_priv->ring[RCS].last_context == dctx) {
 			/* Fake switch to NULL context */
-			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
 			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
 			i915_gem_context_unreference(dctx);
 			dev_priv->ring[RCS].last_context = NULL;
@@ -741,8 +736,11 @@ static int do_switch(struct intel_engine_cs *ring,
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
+		struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
+
+		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
+
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -750,11 +748,10 @@ static int do_switch(struct intel_engine_cs *ring,
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
-		from->legacy_hw_ctx.rcs_state->dirty = 1;
-		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
+		from_obj->dirty = 1;
 
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
+		i915_gem_object_ggtt_unpin(from_obj);
 		i915_gem_context_unreference(from);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
index 57d4dde..787ea6f 100644
--- a/drivers/gpu/drm/i915/i915_gem_exec.c
+++ b/drivers/gpu/drm/i915/i915_gem_exec.c
@@ -45,7 +45,7 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	ret = i915_gem_object_sync(obj, ring);
+	ret = i915_gem_object_sync(obj, ring, false);
 	if (ret)
 		return ret;
 
@@ -65,11 +65,9 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
 static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
 				       struct intel_engine_cs *ring)
 {
-	obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
-	obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
-	obj->last_write_seqno = intel_ring_get_seqno(ring);
-	obj->dirty = 1;
+	obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
+	obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring, 0);
 
 	ring->gpu_caches_dirty = true;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0faab01..8f1c2a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -847,7 +847,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring);
+
+		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
 		if (ret)
 			return ret;
 
@@ -956,40 +957,20 @@ static void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 				   struct intel_engine_cs *ring)
 {
-	u32 seqno = intel_ring_get_seqno(ring);
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-		struct drm_i915_gem_object *obj = vma->obj;
-		u32 old_read = obj->base.read_domains;
-		u32 old_write = obj->base.write_domain;
-
-		obj->base.write_domain = obj->base.pending_write_domain;
-		if (obj->base.write_domain == 0)
-			obj->base.pending_read_domains |= obj->base.read_domains;
-		obj->base.read_domains = obj->base.pending_read_domains;
-
-		i915_vma_move_to_active(vma, ring);
-		if (obj->base.write_domain) {
-			obj->dirty = 1;
-			obj->last_write_seqno = seqno;
+		unsigned fenced;
 
-			intel_fb_obj_invalidate(obj, ring);
-
-			/* update for the implicit flush after a batch */
-			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-		}
+		fenced = 0;
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			obj->last_fenced_seqno = seqno;
-			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-				struct drm_i915_private *dev_priv = to_i915(ring->dev);
-				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
-					       &dev_priv->mm.fence_list);
-			}
+			fenced |= VMA_IS_FENCED;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+				fenced |= VMA_HAS_FENCE;
 		}
 
-		trace_i915_gem_object_change_domain(obj, old_read, old_write);
+		i915_vma_move_to_active(vma, ring, fenced);
 	}
 }
 
@@ -1003,7 +984,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, NULL);
+	(void)__i915_add_request(ring, file, obj);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index e60be3f..fc1223c 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
 	if (ret)
 		goto out;
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
 
-	ret = __i915_add_request(ring, NULL, so.obj, NULL);
+	ret = __i915_add_request(ring, NULL, so.obj);
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	render_state_fini(&so);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index af5d31a..e46fb34 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -326,7 +326,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 
 	if (ret == 0) {
 		obj->fence_dirty =
-			obj->last_fenced_seqno ||
+			obj->last_fence.request ||
 			obj->fence_reg != I915_FENCE_REG_NONE;
 		obj->tiling_mode = tiling_mode;
 		obj->stride = stride;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ebc8529..584b863 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -572,7 +572,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 	if (i915_gem_obj_bound(src, vm))
 		dst->gtt_offset = i915_gem_obj_offset(src, vm);
 	else
-		dst->gtt_offset = -1UL;
+		dst->gtt_offset = -1;
 
 	reloc_offset = dst->gtt_offset;
 	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
@@ -653,11 +653,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
-	err->rseqno = obj->last_read_seqno;
-	err->wseqno = obj->last_write_seqno;
+	err->rseqno = i915_request_seqno(rq);
+	err->wseqno = i915_request_seqno(obj->last_write.request);
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
@@ -671,7 +672,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
 	err->userptr = obj->userptr.mm != NULL;
-	err->ring = obj->ring ? obj->ring->id : -1;
+	err->ring = i915_request_ring_id(rq);
 	err->cache_level = obj->cache_level;
 }
 
@@ -963,7 +964,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	int i, count;
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
@@ -978,17 +979,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
 		i915_record_ring_state(dev, error, ring, &error->ring[i]);
 
-		request = i915_gem_find_active_request(ring);
-		if (request) {
+		rq = i915_gem_find_active_request(ring);
+		if (rq) {
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
 			 * by userspace.
 			 */
 			error->ring[i].batchbuffer =
 				i915_error_object_create(dev_priv,
-							 request->batch_obj,
-							 request->ctx ?
-							 request->ctx->vm :
+							 rq->batch_obj,
+							 rq->ctx ?
+							 rq->ctx->vm :
 							 &dev_priv->gtt.base);
 
 			if (HAS_BROKEN_CS_TLB(dev_priv))
@@ -996,11 +997,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
 					i915_error_ggtt_object_create(dev_priv,
 							     ring->scratch.obj);
 
-			if (request->file_priv) {
+			if (rq->file_priv) {
 				struct task_struct *task;
 
 				rcu_read_lock();
-				task = pid_task(request->file_priv->file->pid,
+				task = pid_task(rq->file_priv->file->pid,
 						PIDTYPE_PID);
 				if (task) {
 					strcpy(error->ring[i].comm, task->comm);
@@ -1019,7 +1020,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list)
+		list_for_each_entry(rq, &ring->request_list, list)
 			count++;
 
 		error->ring[i].num_requests = count;
@@ -1032,13 +1033,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		}
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			struct drm_i915_error_request *erq;
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
-			erq->jiffies = request->emitted_jiffies;
-			erq->tail = request->tail;
+			erq->seqno = rq->seqno;
+			erq->jiffies = rq->emitted_jiffies;
+			erq->tail = rq->tail;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 717c111..6d4f5a7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2935,14 +2935,14 @@ static u32
 ring_last_seqno(struct intel_engine_cs *ring)
 {
 	return list_entry(ring->request_list.prev,
-			  struct drm_i915_gem_request, list)->seqno;
+			  struct i915_gem_request, list)->seqno;
 }
 
 static bool
 ring_idle(struct intel_engine_cs *ring, u32 seqno)
 {
 	return (list_empty(&ring->request_list) ||
-		i915_seqno_passed(seqno, ring_last_seqno(ring)));
+		__i915_seqno_passed(seqno, ring_last_seqno(ring)));
 }
 
 static bool
@@ -3057,7 +3057,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
 	if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
 		return -1;
 
-	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
+	if (__i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
 		return 1;
 
 	/* cursory check for an unkickable deadlock */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 75f423d..f1c2a28 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -17,16 +17,16 @@ static bool gpu_active(struct drm_i915_private *i915)
 	int i;
 
 	for_each_ring(ring, i915, i) {
-		struct drm_i915_gem_request *rq;
+		struct i915_gem_request *rq;
 
 		if (list_empty(&ring->request_list))
 			continue;
 
 		rq = list_last_entry(&ring->request_list,
-				     struct drm_i915_gem_request,
+				     struct i915_gem_request,
 				     list);
 
-		if (i915_seqno_passed(ring->get_seqno(ring, true), rq->seqno))
+		if (i915_request_complete(rq, true))
 			continue;
 
 		return true;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 63f6875..0ebd85d 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -389,7 +389,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->seqno = intel_ring_get_seqno(ring),
+			   __entry->seqno = intel_ring_get_request(ring)->seqno,
 			   __entry->flags = flags;
 			   i915_trace_irq_get(ring, __entry->seqno);
 			   ),
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d828f47..9b7931c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9167,6 +9167,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
 	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
 
+	i915_request_put(work->flip_queued_request);
 	kfree(work);
 }
 
@@ -9548,7 +9549,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
 	else if (i915.use_mmio_flip > 0)
 		return true;
 	else
-		return ring != obj->ring;
+		return ring != i915_request_ring(obj->last_write.request);
 }
 
 static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -9581,25 +9582,22 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
 
 static int intel_postpone_flip(struct drm_i915_gem_object *obj)
 {
-	struct intel_engine_cs *ring;
+	struct i915_gem_request *rq = obj->last_write.request;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	if (!obj->last_write_seqno)
-		return 0;
-
-	ring = obj->ring;
-
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_write_seqno))
+	if (rq == NULL)
 		return 0;
 
-	ret = i915_gem_check_olr(ring, obj->last_write_seqno);
+	ret = i915_gem_check_olr(rq);
 	if (ret)
 		return ret;
 
-	if (WARN_ON(!ring->irq_get(ring)))
+	if (i915_request_complete(rq, true))
+		return 0;
+
+	if (WARN_ON(!rq->ring->irq_get(rq->ring)))
 		return 0;
 
 	return 1;
@@ -9625,7 +9623,7 @@ void intel_notify_mmio_flip(struct intel_engine_cs *ring)
 		if (ring->id != mmio_flip->ring_id)
 			continue;
 
-		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
+		if (__i915_seqno_passed(seqno, mmio_flip->seqno)) {
 			intel_do_mmio_flip(intel_crtc);
 			mmio_flip->seqno = 0;
 			ring->irq_put(ring);
@@ -9643,6 +9641,7 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct i915_gem_request *rq;
 	unsigned long irq_flags;
 	int ret;
 
@@ -9657,16 +9656,20 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
 		return 0;
 	}
 
+	rq = obj->last_write.request;
+	if (WARN_ON(rq == NULL))
+		return 0;
+
 	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
-	intel_crtc->mmio_flip.ring_id = obj->ring->id;
+	intel_crtc->mmio_flip.seqno = rq->seqno;
+	intel_crtc->mmio_flip.ring_id = rq->ring->id;
 	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
 
 	/*
 	 * Double check to catch cases where irq fired before
 	 * mmio flip data was ready
 	 */
-	intel_notify_mmio_flip(obj->ring);
+	intel_notify_mmio_flip(rq->ring);
 	return 0;
 }
 
@@ -9695,9 +9698,8 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev,
 		return false;
 
 	if (work->flip_ready_vblank == 0) {
-		if (work->ring &&
-		    !i915_seqno_passed(work->ring->get_seqno(work->ring, true),
-				      work->flip_queued_seqno))
+		struct i915_gem_request *rq = work->flip_queued_request;
+		if (rq && !i915_request_complete(rq, true))
 			return false;
 
 		work->flip_ready_vblank = drm_vblank_count(dev, intel_crtc->pipe);
@@ -9758,6 +9760,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	enum pipe pipe = intel_crtc->pipe;
 	struct intel_unpin_work *work;
 	struct intel_engine_cs *ring;
+	struct i915_gem_request *rq;
 	unsigned long flags;
 	int ret;
 
@@ -9856,7 +9859,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev)) {
 		ring = &dev_priv->ring[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		ring = obj->ring;
+		ring = i915_request_ring(obj->last_write.request);
 		if (ring == NULL || ring->id != RCS)
 			ring = &dev_priv->ring[BCS];
 	} else {
@@ -9864,7 +9867,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	}
 
 	if (use_mmio_flip(ring, obj, page_flip_flags)) {
-		ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+		ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(obj->last_write.request));
 		if (ret)
 			goto cleanup_pending;
 
@@ -9876,8 +9879,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		if (ret)
 			goto cleanup_unpin;
 
-		work->flip_queued_seqno = obj->last_write_seqno;
-		work->ring = obj->ring;
+		rq = obj->last_write.request;
 	} else {
 		ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
 		if (ret)
@@ -9891,10 +9893,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		if (ret)
 			goto cleanup_unpin;
 
-		work->flip_queued_seqno = intel_ring_get_seqno(ring);
-		work->ring = ring;
+		rq = intel_ring_get_request(ring);
 	}
 
+	work->flip_queued_request = i915_request_get(rq);
 	work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
 	work->enable_stall_check = true;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 274f77c..5f336a3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -657,14 +657,13 @@ struct intel_unpin_work {
 	struct drm_i915_gem_object *old_fb_obj;
 	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
-	struct intel_engine_cs *ring;
 	atomic_t pending;
 #define INTEL_FLIP_INACTIVE	0
 #define INTEL_FLIP_PENDING	1
 #define INTEL_FLIP_COMPLETE	2
 	u32 flip_count;
 	u32 gtt_offset;
-	u32 flip_queued_seqno;
+	struct i915_gem_request *flip_queued_request;
 	int flip_queued_vblank;
 	int flip_ready_vblank;
 	bool enable_stall_check;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index d94af27..c709ca5 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -183,7 +183,7 @@ struct intel_overlay {
 	u32 flip_addr;
 	struct drm_i915_gem_object *reg_bo;
 	/* flip handling */
-	uint32_t last_flip_req;
+	struct i915_gem_request *flip_request;
 	void (*flip_tail)(struct intel_overlay *);
 };
 
@@ -209,29 +209,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
 		io_mapping_unmap(regs);
 }
 
-static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
-					 void (*tail)(struct intel_overlay *))
+/* recover from an interruption due to a signal
+ * We have to be careful not to repeat work forever an make forward progess. */
+static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->last_flip_req);
-	ret = i915_add_request(ring, &overlay->last_flip_req);
-	if (ret)
-		return ret;
+	if (overlay->flip_request == NULL)
+		return 0;
 
-	overlay->flip_tail = tail;
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_request(overlay->flip_request);
 	if (ret)
 		return ret;
-	i915_gem_retire_requests(dev);
 
-	overlay->last_flip_req = 0;
+	i915_request_put(overlay->flip_request);
+	overlay->flip_request = NULL;
+
+	i915_gem_retire_requests(overlay->dev);
+
+	if (overlay->flip_tail)
+		overlay->flip_tail(overlay);
+
 	return 0;
 }
 
+static int intel_overlay_add_request(struct intel_overlay *overlay,
+				     struct intel_engine_cs *ring,
+				     void (*tail)(struct intel_overlay *))
+{
+	BUG_ON(overlay->flip_request);
+	overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
+	overlay->flip_tail = tail;
+
+	return i915_add_request(ring);
+}
+
+static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
+					 struct intel_engine_cs *ring,
+					 void (*tail)(struct intel_overlay *))
+{
+	intel_overlay_add_request(overlay, ring, tail);
+	return intel_overlay_recover_from_interrupt(overlay);
+}
+
 /* overlay needs to be disable in OCMD reg */
 static int intel_overlay_on(struct intel_overlay *overlay)
 {
@@ -253,9 +273,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return intel_overlay_do_wait_request(overlay, NULL);
+	return intel_overlay_do_wait_request(overlay, ring, NULL);
 }
 
 /* overlay needs to be enabled in OCMD reg */
@@ -285,15 +305,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 
 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	intel_ring_emit(ring, flip_addr);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return i915_add_request(ring, &overlay->last_flip_req);
+	return intel_overlay_add_request(overlay, ring, NULL);
 }
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
 
+	i915_gem_track_fb(obj, NULL,
+			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
+
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 
@@ -353,33 +376,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 		intel_ring_emit(ring, flip_addr);
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	}
-	intel_ring_advance(ring);
-
-	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
-}
-
-/* recover from an interruption due to a signal
- * We have to be careful not to repeat work forever an make forward progess. */
-static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
-{
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
-
-	if (overlay->last_flip_req == 0)
-		return 0;
+	__intel_ring_advance(ring);
 
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
-	if (ret)
-		return ret;
-	i915_gem_retire_requests(dev);
-
-	if (overlay->flip_tail)
-		overlay->flip_tail(overlay);
-
-	overlay->last_flip_req = 0;
-	return 0;
+	return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -388,10 +387,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
  */
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
+	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
+	int ret = 0;
 
 	/* Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
@@ -400,6 +397,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		return 0;
 
 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
+		struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+
 		/* synchronous slowpath */
 		ret = intel_ring_begin(ring, 2);
 		if (ret)
@@ -407,20 +406,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
+		__intel_ring_advance(ring);
 
-		ret = intel_overlay_do_wait_request(overlay,
+		ret = intel_overlay_do_wait_request(overlay, ring,
 						    intel_overlay_release_old_vid_tail);
-		if (ret)
-			return ret;
-	}
-
-	intel_overlay_release_old_vid_tail(overlay);
+	} else
+		intel_overlay_release_old_vid_tail(overlay);
 
-
-	i915_gem_track_fb(overlay->old_vid_bo, NULL,
-			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
-	return 0;
+	return ret;
 }
 
 struct put_image_params {
@@ -827,12 +820,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	iowrite32(0, &regs->OCMD);
 	intel_overlay_unmap_regs(overlay, regs);
 
-	ret = intel_overlay_off(overlay);
-	if (ret != 0)
-		return ret;
-
-	intel_overlay_off_tail(overlay);
-	return 0;
+	return intel_overlay_off(overlay);
 }
 
 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7c5a6c5..ae96de5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -726,7 +726,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 					   PIPE_CONTROL_FLUSH_ENABLE);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
 		intel_ring_emit(signaller, 0);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
@@ -763,7 +763,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
 		intel_ring_emit(signaller, 0);
@@ -797,7 +797,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
 		if (mbox_reg != GEN6_NOSYNC) {
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+			intel_ring_emit(signaller, signaller->preallocated_request->seqno);
 		}
 	}
 
@@ -832,7 +832,7 @@ gen6_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -950,7 +950,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -969,7 +969,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, 0);
 	__intel_ring_advance(ring);
 
@@ -1224,7 +1224,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -1602,7 +1602,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	}
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	ringbuf->size = 32 * PAGE_SIZE;
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
@@ -1662,8 +1663,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
 	intel_destroy_ringbuffer_obj(ringbuf);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	ring->preallocated_request = NULL;
 
 	if (ring->cleanup)
 		ring->cleanup(ring);
@@ -1679,8 +1679,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
+	struct i915_gem_request *rq;
 	int ret;
 
 	if (ringbuf->last_retired_head != -1) {
@@ -1692,17 +1691,15 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 			return 0;
 	}
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
-			seqno = request->seqno;
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (__ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
 			break;
-		}
 	}
 
-	if (seqno == 0)
+	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
 		return -ENOSPC;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_request(rq);
 	if (ret)
 		return ret;
 
@@ -1803,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 
 int intel_ring_idle(struct intel_engine_cs *ring)
 {
-	u32 seqno;
 	int ret;
 
 	/* We need to add any requests required to flush the objects and ring */
-	if (ring->outstanding_lazy_seqno) {
-		ret = i915_add_request(ring, NULL);
+	if (ring->preallocated_request) {
+		ret = i915_add_request(ring);
 		if (ret)
 			return ret;
 	}
@@ -1817,30 +1813,36 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 	if (list_empty(&ring->request_list))
 		return 0;
 
-	seqno = list_entry(ring->request_list.prev,
-			   struct drm_i915_gem_request,
-			   list)->seqno;
-
-	return i915_wait_seqno(ring, seqno);
+	return i915_wait_request(container_of(ring->request_list.prev,
+					      struct i915_gem_request,
+					      list));
 }
 
 static int
-intel_ring_alloc_seqno(struct intel_engine_cs *ring)
+intel_ring_alloc_request(struct intel_engine_cs *ring)
 {
-	if (ring->outstanding_lazy_seqno)
-		return 0;
+	struct i915_gem_request *rq;
+	int ret;
 
-	if (ring->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
+	if (ring->preallocated_request)
+		return 0;
 
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
+	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
+	if (rq == NULL)
+		return -ENOMEM;
 
-		ring->preallocated_lazy_request = request;
+	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
+	if (ret) {
+		kfree(rq);
+		return ret;
 	}
 
-	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
+	kref_init(&rq->kref);
+	rq->ring = ring;
+	rq->completed = false;
+
+	ring->preallocated_request = rq;
+	return 0;
 }
 
 static int __intel_ring_prepare(struct intel_engine_cs *ring,
@@ -1876,7 +1878,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
 		return ret;
 
 	/* Preallocate the olr before touching the ring, */
-	ret = intel_ring_alloc_seqno(ring);
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -1886,7 +1888,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
 		return ret;
 
 	/* but we may flush the seqno during prepare. */
-	ret = intel_ring_alloc_seqno(ring);
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -1921,7 +1923,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	BUG_ON(ring->outstanding_lazy_seqno);
+	BUG_ON(ring->preallocated_request);
 
 	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
@@ -2300,7 +2302,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 	ring->cleanup = render_ring_cleanup;
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 
 	ringbuf->size = size;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index dcd2e44..2a78051 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -222,7 +222,7 @@ struct  intel_engine_cs {
 	 *
 	 * A reference is held on the buffer while on this list.
 	 */
-	struct list_head active_list;
+	struct list_head read_list, write_list, fence_list;
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
@@ -233,8 +233,7 @@ struct  intel_engine_cs {
 	/**
 	 * Do we have some not yet emitted requests outstanding?
 	 */
-	struct drm_i915_gem_request *preallocated_lazy_request;
-	u32 outstanding_lazy_seqno;
+	struct i915_gem_request *preallocated_request;
 	bool gpu_caches_dirty;
 	bool fbc_dirty;
 
@@ -393,10 +392,10 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 	return ringbuf->tail;
 }
 
-static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
+static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
 {
-	BUG_ON(ring->outstanding_lazy_seqno == 0);
-	return ring->outstanding_lazy_seqno;
+	BUG_ON(ring->preallocated_request == 0);
+	return ring->preallocated_request;
 }
 
 static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: s/seqno/request/ tracking inside objects
  2014-07-25 12:27 [PATCH] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
@ 2014-07-28 16:24 ` Daniel Vetter
  2014-07-28 20:44 ` Jesse Barnes
  1 sibling, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2014-07-28 16:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx, Brad Volkin

On Fri, Jul 25, 2014 at 01:27:00PM +0100, Chris Wilson wrote:
> At the heart of this change is that the seqno is a too low level of an
> abstraction to handle the growing complexities of command tracking, both
> with the introduction of multiple command queues with execbuffer and the
> potential for reordering with a scheduler. On top of the seqno we have
> the request. Conceptually this is just a fence, but it also has
> substantial bookkeeping of its own in order to track the context and
> batch in flight, for example. It is the central structure upon which we
> can extend with dependency tracking et al.
> 
> As regards the objects, they were using the seqno as a simple fence,
> upon which is check or even wait upon for command completion. This patch
> exchanges that seqno/ring pair with the request itself. For the
> majority, lifetime of the request is ordered by how we retire objects
> then requests. However, both the unlocked waits and probing elsewhere do
> not tie into the normal request lifetimes and so we need to introduce a
> kref. Extending the objects to use the request as the fence naturally
> extends to segregrating read/write fence tracking. This has significance
> for it reduces the number of semaphores we need to emit, reducing the
> likelihood of #54226, and improving performance overall.
> 
> NOTE: this is not against bare drm-intel-nightly and is likely to
> conflict with execlists...
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Brad Volkin <bradley.d.volkin@intel.com>

Ok, read through it and I like overall. Also, right now is the perfect
time to merge it since we're right before the merge window. But this here
needs to be split up a bit to cut out prep patches. I've noticed a few
things in-line, but there's also the mechanical stuff (like dropping the
drm_ prefix from requests).
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c          |  37 +-
>  drivers/gpu/drm/i915/i915_drv.h              | 108 ++--
>  drivers/gpu/drm/i915/i915_gem.c              | 769 ++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
>  drivers/gpu/drm/i915/i915_gem_exec.c         |  10 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
>  drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c        |  35 +-
>  drivers/gpu/drm/i915/i915_irq.c              |   6 +-
>  drivers/gpu/drm/i915/i915_perf.c             |   6 +-
>  drivers/gpu/drm/i915/i915_trace.h            |   2 +-
>  drivers/gpu/drm/i915/intel_display.c         |  50 +-
>  drivers/gpu/drm/i915/intel_drv.h             |   3 +-
>  drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  83 +--
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  11 +-
>  17 files changed, 745 insertions(+), 556 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 406e630..676d5f1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
>  static void
>  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  {
> +	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>  	struct i915_vma *vma;
>  	int pin_count = 0;
>  
> -	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
> +	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
>  		   &obj->base,
>  		   get_pin_flag(obj),
>  		   get_tiling_flag(obj),
> @@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  		   obj->base.size / 1024,
>  		   obj->base.read_domains,
>  		   obj->base.write_domain,
> -		   obj->last_read_seqno,
> -		   obj->last_write_seqno,
> -		   obj->last_fenced_seqno,
> +		   i915_request_seqno(rq),
> +		   i915_request_seqno(obj->last_write.request),
> +		   i915_request_seqno(obj->last_fence.request),
>  		   i915_cache_level_str(obj->cache_level),
>  		   obj->dirty ? " dirty" : "",
>  		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> @@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  		*t = '\0';
>  		seq_printf(m, " (%s mappable)", s);
>  	}
> -	if (obj->ring != NULL)
> -		seq_printf(m, " (%s)", obj->ring->name);
> +	if (rq)
> +		seq_printf(m, " (%s)", rq->ring->name);
>  	if (obj->frontbuffer_bits)
>  		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
>  }
> @@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  			if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv)
>  				continue;
>  
> -			if (obj->ring) /* XXX per-vma statistic */
> +			if (obj->active) /* XXX per-vma statistic */
>  				stats->active += obj->base.size;
>  			else
>  				stats->inactive += obj->base.size;
> @@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  	} else {
>  		if (i915_gem_obj_ggtt_bound(obj)) {
>  			stats->global += obj->base.size;
> -			if (obj->ring)
> +			if (obj->active)
>  				stats->active += obj->base.size;
>  			else
>  				stats->inactive += obj->base.size;
> @@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>  				seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
>  					   pipe, plane);
>  			}
> -			if (work->ring)
> +			if (work->flip_queued_request) {
> +				struct i915_gem_request *rq = work->flip_queued_request;
>  				seq_printf(m, "Flip queued on %s at seqno %u, now %u\n",
> -						work->ring->name,
> -						work->flip_queued_seqno,
> -						work->ring->get_seqno(work->ring, true));
> -			else
> +						rq->ring->name, rq->seqno,
> +						rq->ring->get_seqno(rq->ring, true));
> +			} else
>  				seq_printf(m, "Flip not associated with any ring\n");
>  			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
>  				   work->flip_queued_vblank,
> @@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  	struct drm_device *dev = node->minor->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring;
> -	struct drm_i915_gem_request *gem_request;
> +	struct i915_gem_request *rq;
>  	int ret, count, i;
>  
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
> @@ -669,12 +670,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  			continue;
>  
>  		seq_printf(m, "%s requests:\n", ring->name);
> -		list_for_each_entry(gem_request,
> -				    &ring->request_list,
> -				    list) {
> +		list_for_each_entry(rq, &ring->request_list, list) {
>  			seq_printf(m, "    %d @ %d\n",
> -				   gem_request->seqno,
> -				   (int) (jiffies - gem_request->emitted_jiffies));
> +				   rq->seqno,
> +				   (int)(jiffies - rq->emitted_jiffies));
>  		}
>  		count++;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9837b0f..5794d096 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -187,6 +187,7 @@ enum hpd_pin {
>  struct drm_i915_private;
>  struct i915_mm_struct;
>  struct i915_mmu_object;
> +struct i915_gem_request;
>  
>  enum intel_dpll_id {
>  	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
>  	struct drm_mm_node *stolen;
>  	struct list_head global_list;
>  
> -	struct list_head ring_list;
>  	/** Used in execbuf to temporarily hold a ref */
>  	struct list_head obj_exec_link;
>  
>  	/**
>  	 * This is set if the object is on the active lists (has pending
> -	 * rendering and so a non-zero seqno), and is not set if it i s on
> -	 * inactive (ready to be unbound) list.
> +	 * rendering and so a submitted request), and is not set if it is on
> +	 * inactive (ready to be unbound) list. We track activity per engine.
>  	 */
> -	unsigned int active:1;
> +	unsigned int active:3;

Could we #define this and then add a BUILG_BUG_ON that 1 << shift >=
NUM_RINGS?

>  
>  	/**
>  	 * This is set if the object has been written to since last bound
> @@ -1797,13 +1797,11 @@ struct drm_i915_gem_object {
>  	void *dma_buf_vmapping;
>  	int vmapping_count;
>  
> -	struct intel_engine_cs *ring;
> -
> -	/** Breadcrumb of last rendering to the buffer. */
> -	uint32_t last_read_seqno;
> -	uint32_t last_write_seqno;
> -	/** Breadcrumb of last fenced GPU access to the buffer. */
> -	uint32_t last_fenced_seqno;
> +	/** Breadcrumbs of last rendering to the buffer. */
> +	struct {
> +		struct i915_gem_request *request;
> +		struct list_head ring_list;
> +	} last_write, last_read[I915_NUM_RINGS], last_fence;
>  
>  	/** Current tiling stride for the object, if it's tiled. */
>  	uint32_t stride;
> @@ -1836,6 +1834,8 @@ struct drm_i915_gem_object {
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
> +
>  void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  		       struct drm_i915_gem_object *new,
>  		       unsigned frontbuffer_bits);
> @@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>   * sequence-number comparisons on buffer last_rendering_seqnos, and associate
>   * an emission time with seqnos for tracking how far ahead of the GPU we are.
>   */
> -struct drm_i915_gem_request {
> +struct i915_gem_request {
> +	struct kref kref;
> +
>  	/** On Which ring this request was generated */
>  	struct intel_engine_cs *ring;
>  
> @@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
>  	struct drm_i915_file_private *file_priv;
>  	/** file_priv list entry for this request */
>  	struct list_head client_list;
> +
> +	bool completed:1;
>  };
>  
> +static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->ring : NULL;
> +}
> +
> +static inline int i915_request_ring_id(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->ring->id : -1;
> +}
> +
> +static inline u32 i915_request_seqno(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->seqno : 0;
> +}
> +
> +/**
> + * Returns true if seq1 is later than seq2.
> + */
> +static inline bool
> +__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> +{
> +	return (int32_t)(seq1 - seq2) >= 0;
> +}
> +
> +static inline bool
> +i915_request_complete(struct i915_gem_request *rq, bool lazy)
> +{
> +	if (!rq->completed)
> +		rq->completed = __i915_seqno_passed(rq->ring->get_seqno(rq->ring, lazy),
> +						    rq->seqno);
> +	return rq->completed;
> +}
> +
> +static inline struct i915_gem_request *
> +i915_request_get(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_get(&rq->kref);
> +	return rq;
> +}
> +
> +void __i915_request_free(struct kref *kref);
> +
> +static inline void
> +i915_request_put(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_put(&rq->kref, __i915_request_free);
> +}
> +
>  struct drm_i915_file_private {
>  	struct drm_i915_private *dev_priv;
>  	struct drm_file *file;
> @@ -2335,22 +2389,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>  
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -			 struct intel_engine_cs *to);
> +			 struct intel_engine_cs *to,
> +			 bool readonly);
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring);
> +			     struct intel_engine_cs *ring,
> +			     unsigned fenced);
> +#define VMA_IS_FENCED 0x1
> +#define VMA_HAS_FENCE 0x2
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>  			 struct drm_device *dev,
>  			 struct drm_mode_create_dumb *args);
>  int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
>  		      uint32_t handle, uint64_t *offset);
> -/**
> - * Returns true if seq1 is later than seq2.
> - */
> -static inline bool
> -i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> -{
> -	return (int32_t)(seq1 - seq2) >= 0;
> -}
>  
>  int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
>  int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
> @@ -2360,14 +2410,14 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
>  bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
>  void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring);
>  
>  bool i915_gem_retire_requests(struct drm_device *dev);
>  void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
>  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
>  				      bool interruptible);
> -int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
> +int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
>  
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
> @@ -2411,12 +2461,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
> -		       struct drm_i915_gem_object *batch_obj,
> -		       u32 *seqno);
> -#define i915_add_request(ring, seqno) \
> -	__i915_add_request(ring, NULL, NULL, seqno)
> -int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
> -				 uint32_t seqno);
> +		       struct drm_i915_gem_object *batch_obj);
> +#define i915_add_request(ring) \
> +	__i915_add_request(ring, NULL, NULL)
> +int __must_check i915_wait_request(struct i915_gem_request *rq);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
>  int __must_check
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f3ad6fb..d208658 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -48,8 +48,6 @@ static __must_check int
>  i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  					    struct drm_i915_file_private *file_priv,
>  					    bool readonly);
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj);
>  
>  static void i915_gem_write_fence(struct drm_device *dev, int reg,
>  				 struct drm_i915_gem_object *obj);
> @@ -118,6 +116,73 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
>  	spin_unlock(&dev_priv->mm.object_stat_lock);
>  }
>  
> +static void
> +i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> +{
> +	intel_fb_obj_flush(obj, true);
> +	obj->last_write.request = NULL;
> +	list_del_init(&obj->last_write.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
> +{
> +	obj->last_fence.request = NULL;
> +	list_del_init(&obj->last_fence.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
> +			     struct intel_engine_cs *ring)
> +{
> +	struct i915_vma *vma;
> +
> +	BUG_ON(obj->active == 0);
> +	BUG_ON(obj->base.write_domain);
> +
> +	obj->last_read[ring->id].request = NULL;
> +	list_del_init(&obj->last_read[ring->id].ring_list);
> +
> +	if (--obj->active)
> +		return;
> +
> +	BUG_ON(obj->last_write.request);
> +	BUG_ON(obj->last_fence.request);
> +
> +	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +		if (!list_empty(&vma->mm_list))
> +			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> +	}
> +
> +	drm_gem_object_unreference(&obj->base);
> +
> +	WARN_ON(i915_verify_lists(dev));
> +}
> +
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_request *rq;
> +	int i;
> +
> +	if (!obj->active)
> +		return;
> +
> +	rq = obj->last_write.request;
> +	if (rq && i915_request_complete(rq, true))
> +		i915_gem_object_retire__write(obj);
> +
> +	rq = obj->last_fence.request;
> +	if (rq && i915_request_complete(rq, true))
> +		i915_gem_object_retire__fence(obj);
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		rq = obj->last_read[i].request;
> +		if (rq && i915_request_complete(rq, true))
> +			i915_gem_object_retire__read(obj, rq->ring);
> +	}
> +}
> +
>  static int
>  i915_gem_wait_for_error(struct i915_gpu_error *error)
>  {
> @@ -1337,15 +1402,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>   * equal.
>   */
>  int
> -i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
> +i915_gem_check_olr(struct i915_gem_request *rq)
>  {
>  	int ret;
>  
> -	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +	BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
>  
>  	ret = 0;
> -	if (seqno == ring->outstanding_lazy_seqno)
> -		ret = i915_add_request(ring, NULL);
> +	if (rq == rq->ring->preallocated_request)
> +		ret = i915_add_request(rq->ring);
>  
>  	return ret;
>  }
> @@ -1370,9 +1435,8 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
>  }
>  
>  /**
> - * __wait_seqno - wait until execution of seqno has finished
> - * @ring: the ring expected to report seqno
> - * @seqno: duh!
> + * __wait_request - wait until execution of request has finished
> + * @request: the request to wait upon
>   * @reset_counter: reset sequence associated with the given seqno
>   * @interruptible: do an interruptible wait (normally yes)
>   * @timeout: in - how long to wait (NULL forever); out - how much time remaining
> @@ -1387,24 +1451,26 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
>   * Returns 0 if the seqno was found within the alloted time. Else returns the
>   * errno with remaining time filled in timeout argument.
>   */
> -static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> -			unsigned reset_counter,
> -			bool interruptible,
> -			struct timespec *timeout,
> -			struct drm_i915_file_private *file_priv)
> +static int __wait_request(struct i915_gem_request *rq,
> +			  unsigned reset_counter,
> +			  bool interruptible,
> +			  struct timespec *timeout,
> +			  struct drm_i915_file_private *file_priv)
>  {
> +	struct intel_engine_cs *ring = rq->ring;
>  	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	const bool irq_test_in_progress =
>  		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
>  	struct timespec before, now;
>  	DEFINE_WAIT(wait);
>  	unsigned long timeout_expire;
> +	u32 seqno = rq->seqno;
>  	int ret;
>  
>  	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
>  
> -	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> +	if (i915_request_complete(rq, true))
>  		return 0;
>  
>  	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
> @@ -1440,7 +1506,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>  			break;
>  		}
>  
> -		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
> +		if (i915_request_complete(rq, false)) {
>  			ret = 0;
>  			break;
>  		}
> @@ -1494,46 +1560,30 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>   * request and object lists appropriately for that event.
>   */
>  int
> -i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
> +i915_wait_request(struct i915_gem_request *rq)
>  {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	bool interruptible = dev_priv->mm.interruptible;
> +	struct drm_device *dev = rq->ring->dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	int ret;
>  
> -	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> -	BUG_ON(seqno == 0);
> +	if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
> +		return -EINVAL;
> +
> +	if (i915_request_complete(rq, true))
> +		return 0;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> +	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> +				   dev_priv->mm.interruptible);
>  	if (ret)
>  		return ret;
>  
> -	ret = i915_gem_check_olr(ring, seqno);
> +	ret = i915_gem_check_olr(rq);
>  	if (ret)
>  		return ret;
>  
> -	return __wait_seqno(ring, seqno,
> -			    atomic_read(&dev_priv->gpu_error.reset_counter),
> -			    interruptible, NULL, NULL);
> -}
> -
> -static int
> -i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
> -				     struct intel_engine_cs *ring)
> -{
> -	if (!obj->active)
> -		return 0;
> -
> -	/* Manually manage the write flush as we may have not yet
> -	 * retired the buffer.
> -	 *
> -	 * Note that the last_write_seqno is always the earlier of
> -	 * the two (read/write) seqno, so if we haved successfully waited,
> -	 * we know we have passed the last write.
> -	 */
> -	obj->last_write_seqno = 0;
> -
> -	return 0;
> +	return __wait_request(rq,
> +			      atomic_read(&dev_priv->gpu_error.reset_counter),
> +			      dev_priv->mm.interruptible, NULL, NULL);
>  }
>  
>  /**
> @@ -1544,19 +1594,37 @@ static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly)
>  {
> -	struct intel_engine_cs *ring = obj->ring;
> -	u32 seqno;
> -	int ret;
> +	int i, ret;
>  
> -	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -	if (seqno == 0)
> -		return 0;
> +	if (readonly) {
> +		if (obj->last_write.request == NULL)
> +			return 0;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> -	if (ret)
> -		return ret;
> +		ret = i915_wait_request(obj->last_write.request);
> +		if (ret)
> +			return ret;
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			if (obj->last_read[i].request == NULL)
> +				continue;
> +
> +			ret = i915_wait_request(obj->last_read[i].request);
> +			if (ret)
> +				return ret;
> +		}
> +	}
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +	/* Manually manage the write flush as we may have not yet
> +	 * retired the buffer.
> +	 *
> +	 * Note that the last_write_seqno is always the earlier of
> +	 * the two (read/write) seqno, so if we haved successfully waited,
> +	 * we know we have passed the last write.
> +	 */
> +	if (obj->last_write.request)
> +		i915_gem_object_retire__write(obj);
> +
> +	return 0;
>  }
>  
>  /* A nonblocking variant of the above wait. This is a highly dangerous routine
> @@ -1569,34 +1637,48 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = obj->ring;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>  	unsigned reset_counter;
> -	u32 seqno;
> -	int ret;
> +	int i, n, ret;
>  
>  	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  	BUG_ON(!dev_priv->mm.interruptible);
>  
> -	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -	if (seqno == 0)
> +	n = 0;
> +	if (readonly) {
> +		if (obj->last_write.request)
> +			rq[n++] = i915_request_get(obj->last_write.request);
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++)
> +			if (obj->last_read[i].request)
> +				rq[n++] = i915_request_get(obj->last_read[i].request);
> +	}
> +	if (n == 0)
>  		return 0;
>  
>  	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
>  	if (ret)
> -		return ret;
> +		goto out;
>  
> -	ret = i915_gem_check_olr(ring, seqno);
> -	if (ret)
> -		return ret;
> +	for (i = 0; i < n; i++) {
> +		ret = i915_gem_check_olr(rq[i]);
> +		if (ret)
> +			goto out;
> +	}
>  
>  	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	mutex_unlock(&dev->struct_mutex);
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
> +
> +	for (i = 0; ret == 0 && i < n; i++)
> +		ret = __wait_request(rq[i], reset_counter, true, NULL, file_priv);
> +
>  	mutex_lock(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +out:
> +	for (i = 0; i < n; i++)
> +		i915_request_put(rq[i]);
> +
> +	return ret;
>  }
>  
>  /**
> @@ -2387,78 +2469,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  	return 0;
>  }
>  
> -static void
> -i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> -			       struct intel_engine_cs *ring)
> -{
> -	u32 seqno = intel_ring_get_seqno(ring);
> -
> -	BUG_ON(ring == NULL);
> -	if (obj->ring != ring && obj->last_write_seqno) {
> -		/* Keep the seqno relative to the current ring */
> -		obj->last_write_seqno = seqno;
> -	}
> -	obj->ring = ring;
> -
> -	/* Add a reference if we're newly entering the active list. */
> -	if (!obj->active) {
> -		drm_gem_object_reference(&obj->base);
> -		obj->active = 1;
> -	}
> -
> -	list_move_tail(&obj->ring_list, &ring->active_list);
> -
> -	obj->last_read_seqno = seqno;
> -}
> -
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring)
> +			     struct intel_engine_cs *ring,
> +			     unsigned fenced)
>  {
> -	list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -	return i915_gem_object_move_to_active(vma->obj, ring);
> -}
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_request *rq = intel_ring_get_request(ring);
> +	u32 old_read = obj->base.read_domains;
> +	u32 old_write = obj->base.write_domain;
>  
> -static void
> -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> -{
> -	struct i915_vma *vma;
> +	BUG_ON(rq == NULL);
>  
> -	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
> -	BUG_ON(!obj->active);
> +	obj->base.write_domain = obj->base.pending_write_domain;
> +	if (obj->base.write_domain == 0)
> +		obj->base.pending_read_domains |= obj->base.read_domains;
> +	obj->base.read_domains = obj->base.pending_read_domains;
>  
> -	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> -		if (!list_empty(&vma->mm_list))
> -			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> -	}
> -
> -	intel_fb_obj_flush(obj, true);
> -
> -	list_del_init(&obj->ring_list);
> -	obj->ring = NULL;
> +	obj->base.pending_read_domains = 0;
> +	obj->base.pending_write_domain = 0;
>  
> -	obj->last_read_seqno = 0;
> -	obj->last_write_seqno = 0;
> -	obj->base.write_domain = 0;
> +	trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +	if (obj->base.read_domains == 0)
> +		return;
>  
> -	obj->last_fenced_seqno = 0;
> +	/* Add a reference if we're newly entering the active list. */
> +	if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
> +		drm_gem_object_reference(&obj->base);
>  
> -	obj->active = 0;
> -	drm_gem_object_unreference(&obj->base);
> +	obj->last_read[ring->id].request = rq;
> +	list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
>  
> -	WARN_ON(i915_verify_lists(dev));
> -}
> +	if (obj->base.write_domain) {
> +		obj->dirty = 1;
> +		obj->last_write.request = rq;
> +		list_move_tail(&obj->last_write.ring_list, &ring->write_list);
> +		intel_fb_obj_invalidate(obj, ring);
>  
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj)
> -{
> -	struct intel_engine_cs *ring = obj->ring;
> +		/* update for the implicit flush after a batch */
> +		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> +	}
>  
> -	if (ring == NULL)
> -		return;
> +	if (fenced) {
> +		obj->last_fence.request = rq;
> +		list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
> +		if (fenced & 2) {

Please use the #define here ...

> +			struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> +					&dev_priv->mm.fence_list);
> +		}
> +	}
>  
> -	if (i915_seqno_passed(ring->get_seqno(ring, true),
> -			      obj->last_read_seqno))
> -		i915_gem_object_move_to_inactive(obj);
> +	list_move_tail(&vma->mm_list, &vma->vm->active_list);
>  }
>  
>  static int
> @@ -2533,11 +2594,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
> -		       struct drm_i915_gem_object *obj,
> -		       u32 *out_seqno)
> +		       struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	u32 request_ring_position, request_start;
>  	int ret;
>  
> @@ -2553,8 +2613,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	request = ring->preallocated_lazy_request;
> -	if (WARN_ON(request == NULL))
> +	rq = ring->preallocated_request;
> +	if (WARN_ON(rq == NULL))
>  		return -ENOMEM;
>  
>  	/* Record the position of the start of the request so that
> @@ -2568,10 +2628,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	request->seqno = intel_ring_get_seqno(ring);
> -	request->ring = ring;
> -	request->head = request_start;
> -	request->tail = request_ring_position;
> +	rq->head = request_start;
> +	rq->tail = request_ring_position;
>  
>  	/* Whilst this request exists, batch_obj will be on the
>  	 * active_list, and so will hold the active reference. Only when this
> @@ -2579,32 +2637,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	 * inactive_list and lose its active reference. Hence we do not need
>  	 * to explicitly hold another reference here.
>  	 */
> -	request->batch_obj = obj;
> +	rq->batch_obj = obj;
>  
>  	/* Hold a reference to the current context so that we can inspect
>  	 * it later in case a hangcheck error event fires.
>  	 */
> -	request->ctx = ring->last_context;
> -	if (request->ctx)
> -		i915_gem_context_reference(request->ctx);
> +	rq->ctx = ring->last_context;
> +	if (rq->ctx)
> +		i915_gem_context_reference(rq->ctx);
>  
> -	request->emitted_jiffies = jiffies;
> -	list_add_tail(&request->list, &ring->request_list);
> -	request->file_priv = NULL;
> +	rq->emitted_jiffies = jiffies;
> +	list_add_tail(&rq->list, &ring->request_list);
> +	rq->file_priv = NULL;
>  
>  	if (file) {
>  		struct drm_i915_file_private *file_priv = file->driver_priv;
>  
>  		spin_lock(&file_priv->mm.lock);
> -		request->file_priv = file_priv;
> -		list_add_tail(&request->client_list,
> +		rq->file_priv = file_priv;
> +		list_add_tail(&rq->client_list,
>  			      &file_priv->mm.request_list);
>  		spin_unlock(&file_priv->mm.lock);
>  	}
>  
> -	trace_i915_gem_request_add(ring, request->seqno);
> -	ring->outstanding_lazy_seqno = 0;
> -	ring->preallocated_lazy_request = NULL;
> +	trace_i915_gem_request_add(ring, rq->seqno);
> +	ring->preallocated_request = NULL;
>  
>  	if (!dev_priv->ums.mm_suspended) {
>  		i915_queue_hangcheck(ring->dev);
> @@ -2616,22 +2673,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  		intel_mark_busy(dev_priv->dev);
>  	}
>  
> -	if (out_seqno)
> -		*out_seqno = request->seqno;
>  	return 0;
>  }
>  
>  static inline void
> -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> +i915_gem_request_remove_from_client(struct i915_gem_request *rq)
>  {
> -	struct drm_i915_file_private *file_priv = request->file_priv;
> +	struct drm_i915_file_private *file_priv = rq->file_priv;
>  
>  	if (!file_priv)
>  		return;
>  
>  	spin_lock(&file_priv->mm.lock);
> -	list_del(&request->client_list);
> -	request->file_priv = NULL;
> +	list_del(&rq->client_list);
> +	rq->file_priv = NULL;
>  	spin_unlock(&file_priv->mm.lock);
>  }
>  
> @@ -2679,30 +2734,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
>  	}
>  }
>  
> -static void i915_gem_free_request(struct drm_i915_gem_request *request)
> +void __i915_request_free(struct kref *kref)
> +{
> +	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
> +	kfree(rq);
> +}
> +
> +static void i915_request_retire(struct i915_gem_request *rq)
>  {
> -	list_del(&request->list);
> -	i915_gem_request_remove_from_client(request);
> +	rq->completed = true;
> +
> +	list_del(&rq->list);
> +	i915_gem_request_remove_from_client(rq);
>  
> -	if (request->ctx)
> -		i915_gem_context_unreference(request->ctx);
> +	if (rq->ctx) {
> +		i915_gem_context_unreference(rq->ctx);
> +		rq->ctx = NULL;
> +	}
>  
> -	kfree(request);
> +	i915_request_put(rq);
>  }
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring)
>  {
> -	struct drm_i915_gem_request *request;
> -	u32 completed_seqno;
> +	struct i915_gem_request *rq;
>  
> -	completed_seqno = ring->get_seqno(ring, false);
> -
> -	list_for_each_entry(request, &ring->request_list, list) {
> -		if (i915_seqno_passed(completed_seqno, request->seqno))
> +	list_for_each_entry(rq, &ring->request_list, list) {
> +		if (i915_request_complete(rq, false))
>  			continue;
>  
> -		return request;
> +		return rq;
>  	}
>  
>  	return NULL;
> @@ -2711,33 +2773,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
>  static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
>  				       struct intel_engine_cs *ring)
>  {
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	bool ring_hung;
>  
> -	request = i915_gem_find_active_request(ring);
> +	rq = i915_gem_find_active_request(ring);
>  
> -	if (request == NULL)
> +	if (rq == NULL)
>  		return;
>  
>  	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>  
> -	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
> +	i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
>  
> -	list_for_each_entry_continue(request, &ring->request_list, list)
> -		i915_set_reset_status(dev_priv, request->ctx, false);
> +	list_for_each_entry_continue(rq, &ring->request_list, list)
> +		i915_set_reset_status(dev_priv, rq->ctx, false);
>  }
>  
>  static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>  					struct intel_engine_cs *ring)
>  {
> -	while (!list_empty(&ring->active_list)) {
> +	while (!list_empty(&ring->write_list)) {
>  		struct drm_i915_gem_object *obj;
>  
> -		obj = list_first_entry(&ring->active_list,
> +		obj = list_first_entry(&ring->write_list,
>  				       struct drm_i915_gem_object,
> -				       ring_list);
> +				       last_write.ring_list);
>  
> -		i915_gem_object_move_to_inactive(obj);
> +		i915_gem_object_retire__write(obj);
> +	}
> +
> +	while (!list_empty(&ring->fence_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->fence_list,
> +				       struct drm_i915_gem_object,
> +				       last_fence.ring_list);
> +
> +		i915_gem_object_retire__fence(obj);
> +	}
> +
> +	while (!list_empty(&ring->read_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->read_list,
> +				       struct drm_i915_gem_object,
> +				       last_read[ring->id].ring_list);
> +
> +		i915_gem_object_retire__read(obj, ring);
>  	}
>  
>  	/*
> @@ -2748,19 +2830,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>  	 * the request.
>  	 */
>  	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> +		rq = list_first_entry(&ring->request_list,
> +				      struct i915_gem_request,
> +				      list);
>  
> -		i915_gem_free_request(request);
> +		i915_request_retire(rq);
>  	}
>  
>  	/* These may not have been flush before the reset, do so now */
> -	kfree(ring->preallocated_lazy_request);
> -	ring->preallocated_lazy_request = NULL;
> -	ring->outstanding_lazy_seqno = 0;
> +	kfree(ring->preallocated_request);
> +	ring->preallocated_request = NULL;
>  }
>  
>  void i915_gem_restore_fences(struct drm_device *dev)
> @@ -2825,43 +2906,71 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  	 * by the ringbuffer to the flushing/inactive lists as appropriate,
>  	 * before we free the context associated with the requests.
>  	 */
> -	while (!list_empty(&ring->active_list)) {
> +	while (!list_empty(&ring->write_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->write_list,
> +				       struct drm_i915_gem_object,
> +				       last_write.ring_list);
> +
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_write.request->seqno))
> +			break;
> +
> +		i915_gem_object_retire__write(obj);
> +	}
> +
> +	while (!list_empty(&ring->fence_list)) {
>  		struct drm_i915_gem_object *obj;
>  
> -		obj = list_first_entry(&ring->active_list,
> -				      struct drm_i915_gem_object,
> -				      ring_list);
> +		obj = list_first_entry(&ring->fence_list,
> +				       struct drm_i915_gem_object,
> +				       last_fence.ring_list);
>  
> -		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_fence.request->seqno))
>  			break;
>  
> -		i915_gem_object_move_to_inactive(obj);
> +		i915_gem_object_retire__fence(obj);
>  	}
>  
> +	while (!list_empty(&ring->read_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->read_list,
> +				       struct drm_i915_gem_object,
> +				       last_read[ring->id].ring_list);
> +
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_read[ring->id].request->seqno))
> +			break;
> +
> +		i915_gem_object_retire__read(obj, ring);
> +	}
>  
>  	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> +		rq = list_first_entry(&ring->request_list,
> +				      struct i915_gem_request,
> +				      list);
>  
> -		if (!i915_seqno_passed(seqno, request->seqno))
> +		if (!__i915_seqno_passed(seqno, rq->seqno))
>  			break;
>  
> -		trace_i915_gem_request_retire(ring, request->seqno);
> +		trace_i915_gem_request_retire(ring, rq->seqno);
>  		/* We know the GPU must have read the request to have
>  		 * sent us the seqno + interrupt, so use the position
>  		 * of tail of the request to update the last known position
>  		 * of the GPU head.
>  		 */
> -		ring->buffer->last_retired_head = request->tail;
> +		ring->buffer->last_retired_head = rq->tail;
>  
> -		i915_gem_free_request(request);
> +		i915_request_retire(rq);
>  	}
>  
>  	if (unlikely(ring->trace_irq_seqno &&
> -		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
> +		     __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
>  		ring->irq_put(ring);
>  		ring->trace_irq_seqno = 0;
>  	}
> @@ -2926,14 +3035,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  static int
>  i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  {
> -	int ret;
> +	int i;
>  
> -	if (obj->active) {
> -		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
> +	if (!obj->active)
> +		return 0;
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		struct i915_gem_request *rq = obj->last_read[i].request;
> +		int ret;
> +
> +		if (rq == NULL)
> +			continue;
> +
> +		ret = i915_gem_check_olr(rq);
>  		if (ret)
>  			return ret;
>  
> -		i915_gem_retire_requests_ring(obj->ring);
> +		i915_gem_retire_requests_ring(rq->ring);
>  	}
>  
>  	return 0;
> @@ -2967,11 +3085,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_wait *args = data;
>  	struct drm_i915_gem_object *obj;
> -	struct intel_engine_cs *ring = NULL;
>  	struct timespec timeout_stack, *timeout = NULL;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>  	unsigned reset_counter;
> -	u32 seqno = 0;
> -	int ret = 0;
> +	int i, n, ret = 0;
>  
>  	if (args->timeout_ns >= 0) {
>  		timeout_stack = ns_to_timespec(args->timeout_ns);
> @@ -2993,13 +3110,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	if (ret)
>  		goto out;
>  
> -	if (obj->active) {
> -		seqno = obj->last_read_seqno;
> -		ring = obj->ring;
> -	}
> -
> -	if (seqno == 0)
> -		 goto out;
> +	if (!obj->active)
> +		goto out;
>  
>  	/* Do this after OLR check to make sure we make forward progress polling
>  	 * on this IOCTL with a 0 timeout (like busy ioctl)
> @@ -3009,11 +3121,25 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto out;
>  	}
>  
> +	for (i = n = 0; i < I915_NUM_RINGS; i++) {
> +		if (obj->last_read[i].request == NULL)
> +			continue;
> +
> +		rq[n++] = i915_request_get(obj->last_read[i].request);
> +	}
> +
>  	drm_gem_object_unreference(&obj->base);
> +
>  	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
> +	for (i = 0; i < n; i++) {
> +		if (ret == 0)
> +			ret = __wait_request(rq[i], reset_counter, true, timeout, file->driver_priv);
> +
> +		i915_request_put(rq[i]);
> +	}
> +
>  	if (timeout)
>  		args->timeout_ns = timespec_to_ns(timeout);
>  	return ret;
> @@ -3024,6 +3150,45 @@ out:
>  	return ret;
>  }
>  
> +static int
> +i915_request_sync(struct i915_gem_request *rq,
> +		  struct intel_engine_cs *to,
> +		  struct drm_i915_gem_object *obj)
> +{
> +	int ret, idx;
> +
> +	if (to == NULL)
> +		return i915_wait_request(rq);
> +
> +	/* XXX this is broken by VEBOX+ */
> +	idx = intel_ring_sync_index(rq->ring, to);
> +
> +	/* Optimization: Avoid semaphore sync when we are sure we already
> +	 * waited for an object with higher seqno */
> +	if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
> +		return 0;
> +
> +	ret = i915_gem_check_olr(rq);
> +	if (ret)
> +		return ret;
> +
> +	if (!i915_request_complete(rq, true)) {
> +		trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
> +		ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* We must recheck last_reqad_request because sync_to()
> +	 * might have just caused seqno wrap under
> +	 * the radar.
> +	 */
> +	if (obj->last_read[rq->ring->id].request == rq)
> +		rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
> +
> +	return 0;
> +}
> +
>  /**
>   * i915_gem_object_sync - sync an object to a ring.
>   *
> @@ -3038,44 +3203,35 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to)
> +		     struct intel_engine_cs *to,
> +		     bool readonly)
>  {
> -	struct intel_engine_cs *from = obj->ring;
> -	u32 seqno;
> -	int ret, idx;
> +	struct i915_gem_request *rq;
> +	struct intel_engine_cs *semaphore;
> +	int ret = 0, i;
>  
> -	if (from == NULL || to == from)
> -		return 0;
> +	semaphore = NULL;
> +	if (i915_semaphore_is_enabled(obj->base.dev))
> +		semaphore = to;
>  
> -	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
> -		return i915_gem_object_wait_rendering(obj, false);
> -
> -	/* XXX this is broken by VEBOX+ */
> -	idx = intel_ring_sync_index(from, to);
> -
> -	seqno = obj->last_read_seqno;
> -	/* Optimization: Avoid semaphore sync when we are sure we already
> -	 * waited for an object with higher seqno */
> -	if (seqno <= from->semaphore.sync_seqno[idx])
> -		return 0;
> -
> -	ret = 0;
> -	if (!i915_seqno_passed(from->get_seqno(from, true), seqno)) {
> -		ret = i915_gem_check_olr(from, seqno);
> -		if (ret)
> -			return ret;
> +	if (readonly) {
> +		rq = obj->last_write.request;
> +		if (rq != NULL && to != rq->ring)
> +			ret = i915_request_sync(rq, semaphore, obj);
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			rq = obj->last_read[i].request;
> +			if (rq == NULL || to == rq->ring)
> +				continue;
>  
> -		trace_i915_gem_ring_sync_to(from, to, seqno);
> -		ret = to->semaphore.sync_to(to, from, seqno);
> +			ret = i915_request_sync(rq, semaphore, obj);
> +			if (ret)
> +				break;
> +		}
>  	}
> -	if (!ret)
> -		/* We use last_read_seqno because sync_to()
> -		 * might have just caused seqno wrap under
> -		 * the radar.
> -		 */
> -		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
>  
>  	return ret;
> +
>  }
>  
>  static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
> @@ -3381,14 +3537,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
>  static int
>  i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
>  {
> -	if (obj->last_fenced_seqno) {
> -		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
> -		if (ret)
> -			return ret;
> +	int ret;
>  
> -		obj->last_fenced_seqno = 0;
> -	}
> +	if (obj->last_fence.request == NULL)
> +		return 0;
>  
> +	ret = i915_wait_request(obj->last_fence.request);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_retire__fence(obj);
>  	return 0;
>  }
>  
> @@ -3836,11 +3994,12 @@ int
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  {
>  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
>  	uint32_t old_write_domain, old_read_domains;
>  	int ret;
>  
>  	/* Not valid to be called on unbound objects. */
> -	if (!i915_gem_obj_bound_any(obj))
> +	if (vma == NULL)
>  		return -EINVAL;
>  
>  	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> @@ -3882,14 +4041,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  					    old_write_domain);
>  
>  	/* And bump the LRU for this access */
> -	if (!obj->active) {
> -		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> -		if (vma)
> -			list_move_tail(&vma->mm_list,
> -				       &dev_priv->gtt.base.inactive_list);
> -
> -	}
> -
> +	list_move_tail(&vma->mm_list,
> +		       &dev_priv->gtt.base.inactive_list);

We've lost the obj->active check here and I didn't spot anything that
would justify that.

>  	return 0;
>  }
>  
> @@ -4087,11 +4240,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	bool was_pin_display;
>  	int ret;
>  
> -	if (pipelined != obj->ring) {
> -		ret = i915_gem_object_sync(obj, pipelined);
> -		if (ret)
> -			return ret;
> -	}
> +	ret = i915_gem_object_sync(obj, pipelined, true);
> +	if (ret)
> +		return ret;
>  
>  	/* Mark the pin_display early so that we account for the
>  	 * display coherency whilst setting up the cache domains.
> @@ -4239,10 +4390,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
>  	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
> -	struct drm_i915_gem_request *request;
> -	struct intel_engine_cs *ring = NULL;
> +	struct i915_gem_request *rq;
>  	unsigned reset_counter;
> -	u32 seqno = 0;
>  	int ret;
>  
>  	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
> @@ -4254,23 +4403,22 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  		return ret;
>  
>  	spin_lock(&file_priv->mm.lock);
> -	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
> -		if (time_after_eq(request->emitted_jiffies, recent_enough))
> +	list_for_each_entry(rq, &file_priv->mm.request_list, client_list) {
> +		if (time_after_eq(rq->emitted_jiffies, recent_enough))
>  			break;
> -
> -		ring = request->ring;
> -		seqno = request->seqno;
>  	}
> +	rq = i915_request_get(&rq->client_list == &file_priv->mm.request_list ? NULL : rq);
>  	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	spin_unlock(&file_priv->mm.lock);
>  
> -	if (seqno == 0)
> +	if (rq == NULL)
>  		return 0;
>  
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
> +	ret = __wait_request(rq, reset_counter, true, NULL, NULL);
>  	if (ret == 0)
>  		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
>  
> +	i915_request_put(rq);
>  	return ret;
>  }
>  
> @@ -4488,7 +4636,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  {
>  	struct drm_i915_gem_busy *args = data;
>  	struct drm_i915_gem_object *obj;
> -	int ret;
> +	int ret, i;
>  
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
> @@ -4507,10 +4655,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	 */
>  	ret = i915_gem_object_flush_active(obj);
>  
> -	args->busy = obj->active;
> -	if (obj->ring) {
> +	args->busy = 0;
> +	if (obj->active) {
>  		BUILD_BUG_ON(I915_NUM_RINGS > 16);

Hm, this suggests we should size active to be 4 bits. Just to stay
consistent.

> -		args->busy |= intel_ring_flag(obj->ring) << 16;
> +		args->busy |= 1;
> +		for (i = 0; i < I915_NUM_RINGS; i++)  {
> +			if (obj->last_read[i].request == NULL)
> +				continue;
> +
> +			args->busy |= 1 << (16 + i);
> +		}
>  	}
>  
>  	drm_gem_object_unreference(&obj->base);
> @@ -4584,8 +4738,13 @@ unlock:
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			  const struct drm_i915_gem_object_ops *ops)
>  {
> +	int i;
> +
>  	INIT_LIST_HEAD(&obj->global_list);
> -	INIT_LIST_HEAD(&obj->ring_list);
> +	INIT_LIST_HEAD(&obj->last_fence.ring_list);
> +	INIT_LIST_HEAD(&obj->last_write.ring_list);
> +	for (i = 0; i < I915_NUM_RINGS; i++)
> +		INIT_LIST_HEAD(&obj->last_read[i].ring_list);
>  	INIT_LIST_HEAD(&obj->obj_exec_link);
>  	INIT_LIST_HEAD(&obj->vma_list);
>  
> @@ -5117,7 +5276,9 @@ i915_gem_lastclose(struct drm_device *dev)
>  static void
>  init_ring_lists(struct intel_engine_cs *ring)
>  {
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
> +	INIT_LIST_HEAD(&ring->fence_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  }
>  
> @@ -5213,13 +5374,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  	 */
>  	spin_lock(&file_priv->mm.lock);
>  	while (!list_empty(&file_priv->mm.request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&file_priv->mm.request_list,
> -					   struct drm_i915_gem_request,
> -					   client_list);
> -		list_del(&request->client_list);
> -		request->file_priv = NULL;
> +		rq = list_first_entry(&file_priv->mm.request_list,
> +				      struct i915_gem_request,
> +				      client_list);
> +		list_del(&rq->client_list);
> +		rq->file_priv = NULL;
>  	}
>  	spin_unlock(&file_priv->mm.lock);
>  }
> @@ -5503,15 +5664,27 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
>  {
>  	struct i915_vma *vma;
>  
> -	/* This WARN has probably outlived its usefulness (callers already
> -	 * WARN if they don't find the GGTT vma they expect). When removing,
> -	 * remember to remove the pre-check in is_pin_display() as well */
> -	if (WARN_ON(list_empty(&obj->vma_list)))
> -		return NULL;
> -

Smells like a separate patch. Maybe do it up-front if taking it out is too
invasive.

>  	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
>  	if (vma->vm != obj_to_ggtt(obj))
>  		return NULL;
>  
>  	return vma;
>  }
> +
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)

This one needs a big warning that it's only suitable as a hint for error
state and debugfs. If execbuf gets stuck in the slowpath we might end up
with slightly out-of-order reads (since now they don't sync cross-engine
any more).

> +{
> +	u32 seqno = 0;
> +	struct i915_gem_request *rq = NULL;
> +	int i;
> +
> +	/* This is approximate as seqno cannot be used across rings */
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		if (obj->last_read[i].request == NULL)
> +			continue;
> +
> +		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
> +			rq = obj->last_read[i].request, seqno = rq->seqno;
> +	}
> +
> +	return rq;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 79dc77b..690e2dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -394,13 +394,9 @@ void i915_gem_context_reset(struct drm_device *dev)
>  		if (!lctx)
>  			continue;
>  
> -		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
> +		if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
>  			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
>  						      get_context_alignment(dev), 0));
> -			/* Fake a finish/inactive */
> -			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
> -			dctx->legacy_hw_ctx.rcs_state->active = 0;
> -		}

Again taste like a separate patch for up-front merging.

>  
>  		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
>  			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
> @@ -467,7 +463,6 @@ void i915_gem_context_fini(struct drm_device *dev)
>  		WARN_ON(!dev_priv->ring[RCS].last_context);
>  		if (dev_priv->ring[RCS].last_context == dctx) {
>  			/* Fake switch to NULL context */
> -			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
>  			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
>  			i915_gem_context_unreference(dctx);
>  			dev_priv->ring[RCS].last_context = NULL;
> @@ -741,8 +736,11 @@ static int do_switch(struct intel_engine_cs *ring,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from != NULL) {
> -		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
> +		struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
> +
> +		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
> +
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -750,11 +748,10 @@ static int do_switch(struct intel_engine_cs *ring,
>  		 * able to defer doing this until we know the object would be
>  		 * swapped, but there is no way to do that yet.
>  		 */
> -		from->legacy_hw_ctx.rcs_state->dirty = 1;
> -		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
> +		from_obj->dirty = 1;
>  
>  		/* obj is kept alive until the next request by its active ref */
> -		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
> +		i915_gem_object_ggtt_unpin(from_obj);
>  		i915_gem_context_unreference(from);
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
> index 57d4dde..787ea6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_exec.c
> +++ b/drivers/gpu/drm/i915/i915_gem_exec.c
> @@ -45,7 +45,7 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
>  {
>  	int ret;
>  
> -	ret = i915_gem_object_sync(obj, ring);
> +	ret = i915_gem_object_sync(obj, ring, false);
>  	if (ret)
>  		return ret;
>  
> @@ -65,11 +65,9 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
>  static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
>  				       struct intel_engine_cs *ring)
>  {
> -	obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
> -	obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
> -	obj->last_write_seqno = intel_ring_get_seqno(ring);
> -	obj->dirty = 1;

Would be nice to split out the semantic change of moving dirty = 1 into
move_to_active.

> +	obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
> +	obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring, 0);
>  
>  	ring->gpu_caches_dirty = true;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0faab01..8f1c2a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -847,7 +847,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
> -		ret = i915_gem_object_sync(obj, ring);
> +
> +		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
>  		if (ret)
>  			return ret;
>  
> @@ -956,40 +957,20 @@ static void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  				   struct intel_engine_cs *ring)
>  {
> -	u32 seqno = intel_ring_get_seqno(ring);
>  	struct i915_vma *vma;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -		struct drm_i915_gem_object *obj = vma->obj;
> -		u32 old_read = obj->base.read_domains;
> -		u32 old_write = obj->base.write_domain;
> -
> -		obj->base.write_domain = obj->base.pending_write_domain;
> -		if (obj->base.write_domain == 0)
> -			obj->base.pending_read_domains |= obj->base.read_domains;
> -		obj->base.read_domains = obj->base.pending_read_domains;
> -
> -		i915_vma_move_to_active(vma, ring);
> -		if (obj->base.write_domain) {
> -			obj->dirty = 1;
> -			obj->last_write_seqno = seqno;
> +		unsigned fenced;
>  
> -			intel_fb_obj_invalidate(obj, ring);
> -
> -			/* update for the implicit flush after a batch */
> -			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> -		}
> +		fenced = 0;
>  		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> -			obj->last_fenced_seqno = seqno;
> -			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -				struct drm_i915_private *dev_priv = to_i915(ring->dev);
> -				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> -					       &dev_priv->mm.fence_list);
> -			}
> +			fenced |= VMA_IS_FENCED;
> +			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
> +				fenced |= VMA_HAS_FENCE;
>  		}
>  
> -		trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +		i915_vma_move_to_active(vma, ring, fenced);
>  	}
>  }
>  
> @@ -1003,7 +984,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  	ring->gpu_caches_dirty = true;
>  
>  	/* Add a breadcrumb for the completion of the batch buffer */
> -	(void)__i915_add_request(ring, file, obj, NULL);
> +	(void)__i915_add_request(ring, file, obj);
>  }
>  
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index e60be3f..fc1223c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>  	if (ret)
>  		goto out;
>  
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
>  
> -	ret = __i915_add_request(ring, NULL, so.obj, NULL);
> +	ret = __i915_add_request(ring, NULL, so.obj);
>  	/* __i915_add_request moves object to inactive if it fails */
>  out:
>  	render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index af5d31a..e46fb34 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -326,7 +326,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>  
>  	if (ret == 0) {
>  		obj->fence_dirty =
> -			obj->last_fenced_seqno ||
> +			obj->last_fence.request ||
>  			obj->fence_reg != I915_FENCE_REG_NONE;
>  		obj->tiling_mode = tiling_mode;
>  		obj->stride = stride;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ebc8529..584b863 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -572,7 +572,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  	if (i915_gem_obj_bound(src, vm))
>  		dst->gtt_offset = i915_gem_obj_offset(src, vm);
>  	else
> -		dst->gtt_offset = -1UL;
> +		dst->gtt_offset = -1;

Spurious change?

>  
>  	reloc_offset = dst->gtt_offset;
>  	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> @@ -653,11 +653,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  		       struct i915_vma *vma)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>  
>  	err->size = obj->base.size;
>  	err->name = obj->base.name;
> -	err->rseqno = obj->last_read_seqno;
> -	err->wseqno = obj->last_write_seqno;
> +	err->rseqno = i915_request_seqno(rq);
> +	err->wseqno = i915_request_seqno(obj->last_write.request);
>  	err->gtt_offset = vma->node.start;
>  	err->read_domains = obj->base.read_domains;
>  	err->write_domain = obj->base.write_domain;
> @@ -671,7 +672,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  	err->dirty = obj->dirty;
>  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
>  	err->userptr = obj->userptr.mm != NULL;
> -	err->ring = obj->ring ? obj->ring->id : -1;
> +	err->ring = i915_request_ring_id(rq);
>  	err->cache_level = obj->cache_level;
>  }
>  
> @@ -963,7 +964,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  				  struct drm_i915_error_state *error)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	int i, count;
>  
>  	for (i = 0; i < I915_NUM_RINGS; i++) {
> @@ -978,17 +979,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  
>  		i915_record_ring_state(dev, error, ring, &error->ring[i]);
>  
> -		request = i915_gem_find_active_request(ring);
> -		if (request) {
> +		rq = i915_gem_find_active_request(ring);

This reminds me that our locking for the error state capture and also the
guilty batch determination is fairly ... nonexistent. This will be a fun
problem to fix once we make reset more common with per-engine resets and
short-lived timers for media workloads. Anyway, unrelated comment.

> +		if (rq) {
>  			/* We need to copy these to an anonymous buffer
>  			 * as the simplest method to avoid being overwritten
>  			 * by userspace.
>  			 */
>  			error->ring[i].batchbuffer =
>  				i915_error_object_create(dev_priv,
> -							 request->batch_obj,
> -							 request->ctx ?
> -							 request->ctx->vm :
> +							 rq->batch_obj,
> +							 rq->ctx ?
> +							 rq->ctx->vm :
>  							 &dev_priv->gtt.base);
>  
>  			if (HAS_BROKEN_CS_TLB(dev_priv))
> @@ -996,11 +997,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  					i915_error_ggtt_object_create(dev_priv,
>  							     ring->scratch.obj);
>  
> -			if (request->file_priv) {
> +			if (rq->file_priv) {
>  				struct task_struct *task;
>  
>  				rcu_read_lock();
> -				task = pid_task(request->file_priv->file->pid,
> +				task = pid_task(rq->file_priv->file->pid,
>  						PIDTYPE_PID);
>  				if (task) {
>  					strcpy(error->ring[i].comm, task->comm);
> @@ -1019,7 +1020,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
>  		count = 0;
> -		list_for_each_entry(request, &ring->request_list, list)
> +		list_for_each_entry(rq, &ring->request_list, list)
>  			count++;
>  
>  		error->ring[i].num_requests = count;
> @@ -1032,13 +1033,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		}
>  
>  		count = 0;
> -		list_for_each_entry(request, &ring->request_list, list) {
> +		list_for_each_entry(rq, &ring->request_list, list) {
>  			struct drm_i915_error_request *erq;
>  
>  			erq = &error->ring[i].requests[count++];
> -			erq->seqno = request->seqno;
> -			erq->jiffies = request->emitted_jiffies;
> -			erq->tail = request->tail;
> +			erq->seqno = rq->seqno;
> +			erq->jiffies = rq->emitted_jiffies;
> +			erq->tail = rq->tail;
>  		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 717c111..6d4f5a7 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2935,14 +2935,14 @@ static u32
>  ring_last_seqno(struct intel_engine_cs *ring)
>  {
>  	return list_entry(ring->request_list.prev,
> -			  struct drm_i915_gem_request, list)->seqno;
> +			  struct i915_gem_request, list)->seqno;
>  }
>  
>  static bool
>  ring_idle(struct intel_engine_cs *ring, u32 seqno)
>  {
>  	return (list_empty(&ring->request_list) ||
> -		i915_seqno_passed(seqno, ring_last_seqno(ring)));
> +		__i915_seqno_passed(seqno, ring_last_seqno(ring)));
>  }
>  
>  static bool
> @@ -3057,7 +3057,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
>  	if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
>  		return -1;
>  
> -	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
> +	if (__i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
>  		return 1;
>  
>  	/* cursory check for an unkickable deadlock */
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 75f423d..f1c2a28 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -17,16 +17,16 @@ static bool gpu_active(struct drm_i915_private *i915)
>  	int i;
>  
>  	for_each_ring(ring, i915, i) {
> -		struct drm_i915_gem_request *rq;
> +		struct i915_gem_request *rq;
>  
>  		if (list_empty(&ring->request_list))
>  			continue;
>  
>  		rq = list_last_entry(&ring->request_list,
> -				     struct drm_i915_gem_request,
> +				     struct i915_gem_request,
>  				     list);
>  
> -		if (i915_seqno_passed(ring->get_seqno(ring, true), rq->seqno))
> +		if (i915_request_complete(rq, true))
>  			continue;
>  
>  		return true;
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 63f6875..0ebd85d 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -389,7 +389,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
>  	    TP_fast_assign(
>  			   __entry->dev = ring->dev->primary->index;
>  			   __entry->ring = ring->id;
> -			   __entry->seqno = intel_ring_get_seqno(ring),
> +			   __entry->seqno = intel_ring_get_request(ring)->seqno,
>  			   __entry->flags = flags;
>  			   i915_trace_irq_get(ring, __entry->seqno);
>  			   ),
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d828f47..9b7931c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9167,6 +9167,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
>  	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
>  	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
>  
> +	i915_request_put(work->flip_queued_request);
>  	kfree(work);
>  }
>  
> @@ -9548,7 +9549,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
>  	else if (i915.use_mmio_flip > 0)
>  		return true;
>  	else
> -		return ring != obj->ring;
> +		return ring != i915_request_ring(obj->last_write.request);
>  }
>  
>  static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> @@ -9581,25 +9582,22 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
>  
>  static int intel_postpone_flip(struct drm_i915_gem_object *obj)
>  {
> -	struct intel_engine_cs *ring;
> +	struct i915_gem_request *rq = obj->last_write.request;
>  	int ret;
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	if (!obj->last_write_seqno)
> -		return 0;
> -
> -	ring = obj->ring;
> -
> -	if (i915_seqno_passed(ring->get_seqno(ring, true),
> -			      obj->last_write_seqno))
> +	if (rq == NULL)
>  		return 0;
>  
> -	ret = i915_gem_check_olr(ring, obj->last_write_seqno);
> +	ret = i915_gem_check_olr(rq);
>  	if (ret)
>  		return ret;
>  
> -	if (WARN_ON(!ring->irq_get(ring)))
> +	if (i915_request_complete(rq, true))
> +		return 0;
> +
> +	if (WARN_ON(!rq->ring->irq_get(rq->ring)))
>  		return 0;
>  
>  	return 1;
> @@ -9625,7 +9623,7 @@ void intel_notify_mmio_flip(struct intel_engine_cs *ring)
>  		if (ring->id != mmio_flip->ring_id)
>  			continue;
>  
> -		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
> +		if (__i915_seqno_passed(seqno, mmio_flip->seqno)) {
>  			intel_do_mmio_flip(intel_crtc);
>  			mmio_flip->seqno = 0;
>  			ring->irq_put(ring);
> @@ -9643,6 +9641,7 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	struct i915_gem_request *rq;
>  	unsigned long irq_flags;
>  	int ret;
>  
> @@ -9657,16 +9656,20 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
>  		return 0;
>  	}
>  
> +	rq = obj->last_write.request;
> +	if (WARN_ON(rq == NULL))
> +		return 0;
> +
>  	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> -	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
> -	intel_crtc->mmio_flip.ring_id = obj->ring->id;
> +	intel_crtc->mmio_flip.seqno = rq->seqno;
> +	intel_crtc->mmio_flip.ring_id = rq->ring->id;
>  	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
>  
>  	/*
>  	 * Double check to catch cases where irq fired before
>  	 * mmio flip data was ready
>  	 */
> -	intel_notify_mmio_flip(obj->ring);
> +	intel_notify_mmio_flip(rq->ring);
>  	return 0;
>  }
>  
> @@ -9695,9 +9698,8 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev,
>  		return false;
>  
>  	if (work->flip_ready_vblank == 0) {
> -		if (work->ring &&
> -		    !i915_seqno_passed(work->ring->get_seqno(work->ring, true),
> -				      work->flip_queued_seqno))
> +		struct i915_gem_request *rq = work->flip_queued_request;
> +		if (rq && !i915_request_complete(rq, true))
>  			return false;
>  
>  		work->flip_ready_vblank = drm_vblank_count(dev, intel_crtc->pipe);
> @@ -9758,6 +9760,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	enum pipe pipe = intel_crtc->pipe;
>  	struct intel_unpin_work *work;
>  	struct intel_engine_cs *ring;
> +	struct i915_gem_request *rq;
>  	unsigned long flags;
>  	int ret;
>  
> @@ -9856,7 +9859,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	} else if (IS_IVYBRIDGE(dev)) {
>  		ring = &dev_priv->ring[BCS];
>  	} else if (INTEL_INFO(dev)->gen >= 7) {
> -		ring = obj->ring;
> +		ring = i915_request_ring(obj->last_write.request);
>  		if (ring == NULL || ring->id != RCS)
>  			ring = &dev_priv->ring[BCS];
>  	} else {
> @@ -9864,7 +9867,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	}
>  
>  	if (use_mmio_flip(ring, obj, page_flip_flags)) {
> -		ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +		ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(obj->last_write.request));
>  		if (ret)
>  			goto cleanup_pending;
>  
> @@ -9876,8 +9879,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  		if (ret)
>  			goto cleanup_unpin;
>  
> -		work->flip_queued_seqno = obj->last_write_seqno;
> -		work->ring = obj->ring;
> +		rq = obj->last_write.request;
>  	} else {
>  		ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
>  		if (ret)
> @@ -9891,10 +9893,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  		if (ret)
>  			goto cleanup_unpin;
>  
> -		work->flip_queued_seqno = intel_ring_get_seqno(ring);
> -		work->ring = ring;
> +		rq = intel_ring_get_request(ring);
>  	}
>  
> +	work->flip_queued_request = i915_request_get(rq);
>  	work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
>  	work->enable_stall_check = true;
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 274f77c..5f336a3 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -657,14 +657,13 @@ struct intel_unpin_work {
>  	struct drm_i915_gem_object *old_fb_obj;
>  	struct drm_i915_gem_object *pending_flip_obj;
>  	struct drm_pending_vblank_event *event;
> -	struct intel_engine_cs *ring;
>  	atomic_t pending;
>  #define INTEL_FLIP_INACTIVE	0
>  #define INTEL_FLIP_PENDING	1
>  #define INTEL_FLIP_COMPLETE	2
>  	u32 flip_count;
>  	u32 gtt_offset;
> -	u32 flip_queued_seqno;
> +	struct i915_gem_request *flip_queued_request;
>  	int flip_queued_vblank;
>  	int flip_ready_vblank;
>  	bool enable_stall_check;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index d94af27..c709ca5 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -183,7 +183,7 @@ struct intel_overlay {
>  	u32 flip_addr;
>  	struct drm_i915_gem_object *reg_bo;
>  	/* flip handling */
> -	uint32_t last_flip_req;
> +	struct i915_gem_request *flip_request;
>  	void (*flip_tail)(struct intel_overlay *);
>  };
>  
> @@ -209,29 +209,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
>  		io_mapping_unmap(regs);
>  }
>  
> -static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> -					 void (*tail)(struct intel_overlay *))
> +/* recover from an interruption due to a signal
> + * We have to be careful not to repeat work forever an make forward progess. */
> +static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
>  {
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
>  	int ret;
>  
> -	BUG_ON(overlay->last_flip_req);
> -	ret = i915_add_request(ring, &overlay->last_flip_req);
> -	if (ret)
> -		return ret;
> +	if (overlay->flip_request == NULL)
> +		return 0;
>  
> -	overlay->flip_tail = tail;
> -	ret = i915_wait_seqno(ring, overlay->last_flip_req);
> +	ret = i915_wait_request(overlay->flip_request);
>  	if (ret)
>  		return ret;
> -	i915_gem_retire_requests(dev);
>  
> -	overlay->last_flip_req = 0;
> +	i915_request_put(overlay->flip_request);
> +	overlay->flip_request = NULL;
> +
> +	i915_gem_retire_requests(overlay->dev);
> +
> +	if (overlay->flip_tail)
> +		overlay->flip_tail(overlay);
> +
>  	return 0;
>  }
>  
> +static int intel_overlay_add_request(struct intel_overlay *overlay,
> +				     struct intel_engine_cs *ring,
> +				     void (*tail)(struct intel_overlay *))
> +{
> +	BUG_ON(overlay->flip_request);
> +	overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
> +	overlay->flip_tail = tail;
> +
> +	return i915_add_request(ring);
> +}
> +
> +static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> +					 struct intel_engine_cs *ring,
> +					 void (*tail)(struct intel_overlay *))
> +{
> +	intel_overlay_add_request(overlay, ring, tail);
> +	return intel_overlay_recover_from_interrupt(overlay);
> +}
> +
>  /* overlay needs to be disable in OCMD reg */
>  static int intel_overlay_on(struct intel_overlay *overlay)
>  {
> @@ -253,9 +273,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>  	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
>  	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  	intel_ring_emit(ring, MI_NOOP);
> -	intel_ring_advance(ring);
> +	__intel_ring_advance(ring);
>  
> -	return intel_overlay_do_wait_request(overlay, NULL);
> +	return intel_overlay_do_wait_request(overlay, ring, NULL);
>  }
>  
>  /* overlay needs to be enabled in OCMD reg */
> @@ -285,15 +305,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>  
>  	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
>  	intel_ring_emit(ring, flip_addr);
> -	intel_ring_advance(ring);
> +	__intel_ring_advance(ring);
>  
> -	return i915_add_request(ring, &overlay->last_flip_req);
> +	return intel_overlay_add_request(overlay, ring, NULL);
>  }
>  
>  static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
>  {
>  	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
>  
> +	i915_gem_track_fb(obj, NULL,
> +			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> +
>  	i915_gem_object_ggtt_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);
>  
> @@ -353,33 +376,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>  		intel_ring_emit(ring, flip_addr);
>  		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  	}
> -	intel_ring_advance(ring);
> -
> -	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> -}
> -
> -/* recover from an interruption due to a signal
> - * We have to be careful not to repeat work forever an make forward progess. */
> -static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
> -{
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -	int ret;
> -
> -	if (overlay->last_flip_req == 0)
> -		return 0;
> +	__intel_ring_advance(ring);
>  
> -	ret = i915_wait_seqno(ring, overlay->last_flip_req);
> -	if (ret)
> -		return ret;
> -	i915_gem_retire_requests(dev);
> -
> -	if (overlay->flip_tail)
> -		overlay->flip_tail(overlay);
> -
> -	overlay->last_flip_req = 0;
> -	return 0;
> +	return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
>  }
>  
>  /* Wait for pending overlay flip and release old frame.
> @@ -388,10 +387,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
>   */
>  static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  {
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -	int ret;
> +	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
> +	int ret = 0;
>  
>  	/* Only wait if there is actually an old frame to release to
>  	 * guarantee forward progress.
> @@ -400,6 +397,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  		return 0;
>  
>  	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
> +		struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +
>  		/* synchronous slowpath */
>  		ret = intel_ring_begin(ring, 2);
>  		if (ret)
> @@ -407,20 +406,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  
>  		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  		intel_ring_emit(ring, MI_NOOP);
> -		intel_ring_advance(ring);
> +		__intel_ring_advance(ring);
>  
> -		ret = intel_overlay_do_wait_request(overlay,
> +		ret = intel_overlay_do_wait_request(overlay, ring,
>  						    intel_overlay_release_old_vid_tail);
> -		if (ret)
> -			return ret;
> -	}
> -
> -	intel_overlay_release_old_vid_tail(overlay);
> +	} else
> +		intel_overlay_release_old_vid_tail(overlay);
>  
> -
> -	i915_gem_track_fb(overlay->old_vid_bo, NULL,
> -			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> -	return 0;
> +	return ret;
>  }
>  
>  struct put_image_params {
> @@ -827,12 +820,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
>  	iowrite32(0, &regs->OCMD);
>  	intel_overlay_unmap_regs(overlay, regs);
>  
> -	ret = intel_overlay_off(overlay);
> -	if (ret != 0)
> -		return ret;
> -
> -	intel_overlay_off_tail(overlay);
> -	return 0;
> +	return intel_overlay_off(overlay);
>  }
>  
>  static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7c5a6c5..ae96de5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -726,7 +726,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>  					   PIPE_CONTROL_FLUSH_ENABLE);
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
>  		intel_ring_emit(signaller, 0);
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->id));
> @@ -763,7 +763,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
>  					   MI_FLUSH_DW_USE_GTT);
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->id));
>  		intel_ring_emit(signaller, 0);
> @@ -797,7 +797,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>  		if (mbox_reg != GEN6_NOSYNC) {
>  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>  			intel_ring_emit(signaller, mbox_reg);
> -			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +			intel_ring_emit(signaller, signaller->preallocated_request->seqno);
>  		}
>  	}
>  
> @@ -832,7 +832,7 @@ gen6_add_request(struct intel_engine_cs *ring)
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -950,7 +950,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_WRITE_FLUSH |
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, 0);
>  	PIPE_CONTROL_FLUSH(ring, scratch_addr);
>  	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -969,7 +969,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
>  			PIPE_CONTROL_NOTIFY);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, 0);
>  	__intel_ring_advance(ring);
>  
> @@ -1224,7 +1224,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, ring->preallocated_request->seqno);
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -1602,7 +1602,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  	}
>  
>  	ring->dev = dev;
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  	ringbuf->size = 32 * PAGE_SIZE;
>  	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
> @@ -1662,8 +1663,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
>  
>  	intel_destroy_ringbuffer_obj(ringbuf);
> -	ring->preallocated_lazy_request = NULL;
> -	ring->outstanding_lazy_seqno = 0;
> +	ring->preallocated_request = NULL;
>  
>  	if (ring->cleanup)
>  		ring->cleanup(ring);
> @@ -1679,8 +1679,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
>  {
>  	struct intel_ringbuffer *ringbuf = ring->buffer;
> -	struct drm_i915_gem_request *request;
> -	u32 seqno = 0;
> +	struct i915_gem_request *rq;
>  	int ret;
>  
>  	if (ringbuf->last_retired_head != -1) {
> @@ -1692,17 +1691,15 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
>  			return 0;
>  	}
>  
> -	list_for_each_entry(request, &ring->request_list, list) {
> -		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
> -			seqno = request->seqno;
> +	list_for_each_entry(rq, &ring->request_list, list) {
> +		if (__ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
>  			break;
> -		}
>  	}
>  
> -	if (seqno == 0)
> +	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
>  		return -ENOSPC;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> +	ret = i915_wait_request(rq);
>  	if (ret)
>  		return ret;
>  
> @@ -1803,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>  
>  int intel_ring_idle(struct intel_engine_cs *ring)
>  {
> -	u32 seqno;
>  	int ret;
>  
>  	/* We need to add any requests required to flush the objects and ring */
> -	if (ring->outstanding_lazy_seqno) {
> -		ret = i915_add_request(ring, NULL);
> +	if (ring->preallocated_request) {
> +		ret = i915_add_request(ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -1817,30 +1813,36 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>  	if (list_empty(&ring->request_list))
>  		return 0;
>  
> -	seqno = list_entry(ring->request_list.prev,
> -			   struct drm_i915_gem_request,
> -			   list)->seqno;
> -
> -	return i915_wait_seqno(ring, seqno);
> +	return i915_wait_request(container_of(ring->request_list.prev,
> +					      struct i915_gem_request,
> +					      list));
>  }
>  
>  static int
> -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
> +intel_ring_alloc_request(struct intel_engine_cs *ring)
>  {
> -	if (ring->outstanding_lazy_seqno)
> -		return 0;
> +	struct i915_gem_request *rq;
> +	int ret;
>  
> -	if (ring->preallocated_lazy_request == NULL) {
> -		struct drm_i915_gem_request *request;
> +	if (ring->preallocated_request)
> +		return 0;
>  
> -		request = kmalloc(sizeof(*request), GFP_KERNEL);
> -		if (request == NULL)
> -			return -ENOMEM;
> +	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
> +	if (rq == NULL)
> +		return -ENOMEM;
>  
> -		ring->preallocated_lazy_request = request;
> +	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
> +	if (ret) {
> +		kfree(rq);
> +		return ret;
>  	}
>  
> -	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> +	kref_init(&rq->kref);
> +	rq->ring = ring;
> +	rq->completed = false;
> +
> +	ring->preallocated_request = rq;
> +	return 0;
>  }
>  
>  static int __intel_ring_prepare(struct intel_engine_cs *ring,
> @@ -1876,7 +1878,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>  		return ret;
>  
>  	/* Preallocate the olr before touching the ring, */
> -	ret = intel_ring_alloc_seqno(ring);
> +	ret = intel_ring_alloc_request(ring);
>  	if (ret)
>  		return ret;
>  
> @@ -1886,7 +1888,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>  		return ret;
>  
>  	/* but we may flush the seqno during prepare. */
> -	ret = intel_ring_alloc_seqno(ring);
> +	ret = intel_ring_alloc_request(ring);
>  	if (ret)
>  		return ret;
>  
> @@ -1921,7 +1923,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	BUG_ON(ring->outstanding_lazy_seqno);
> +	BUG_ON(ring->preallocated_request);
>  
>  	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
>  		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
> @@ -2300,7 +2302,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
>  	ring->cleanup = render_ring_cleanup;
>  
>  	ring->dev = dev;
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  
>  	ringbuf->size = size;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index dcd2e44..2a78051 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -222,7 +222,7 @@ struct  intel_engine_cs {
>  	 *
>  	 * A reference is held on the buffer while on this list.
>  	 */
> -	struct list_head active_list;
> +	struct list_head read_list, write_list, fence_list;
>  
>  	/**
>  	 * List of breadcrumbs associated with GPU requests currently
> @@ -233,8 +233,7 @@ struct  intel_engine_cs {
>  	/**
>  	 * Do we have some not yet emitted requests outstanding?
>  	 */
> -	struct drm_i915_gem_request *preallocated_lazy_request;
> -	u32 outstanding_lazy_seqno;
> +	struct i915_gem_request *preallocated_request;
>  	bool gpu_caches_dirty;
>  	bool fbc_dirty;
>  
> @@ -393,10 +392,10 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>  	return ringbuf->tail;
>  }
>  
> -static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
> +static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
>  {
> -	BUG_ON(ring->outstanding_lazy_seqno == 0);
> -	return ring->outstanding_lazy_seqno;
> +	BUG_ON(ring->preallocated_request == 0);
> +	return ring->preallocated_request;
>  }
>  
>  static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
> -- 
> 1.9.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: s/seqno/request/ tracking inside objects
  2014-07-25 12:27 [PATCH] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
  2014-07-28 16:24 ` Daniel Vetter
@ 2014-07-28 20:44 ` Jesse Barnes
  2014-07-29  6:53   ` Chris Wilson
  2014-07-29  7:29   ` Chris Wilson
  1 sibling, 2 replies; 8+ messages in thread
From: Jesse Barnes @ 2014-07-28 20:44 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx, Brad Volkin

On Fri, 25 Jul 2014 13:27:00 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> @@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct
> seq_file *m, void *data) seq_printf(m, "Flip pending (waiting for
> vsync) on pipe %c (plane %c)\n", pipe, plane);
>  			}
> -			if (work->ring)
> +			if (work->flip_queued_request) {
> +				struct i915_gem_request *rq =
> work->flip_queued_request; seq_printf(m, "Flip queued on %s at seqno
> %u, now %u\n",
> -						work->ring->name,
> -
> work->flip_queued_seqno,
> -
> work->ring->get_seqno(work->ring, true));
> -			else
> +						rq->ring->name,
> rq->seqno,
> +
> rq->ring->get_seqno(rq->ring, true));
> +			} else
>  				seq_printf(m, "Flip not associated

I wonder if the overlay, flip queue and unlocked wait changes could be
split out somehow; I think they're the trickiest part of the change...

It does look like you're doing get/puts in the right places, though I
didn't check the error paths (and I'm not familiar at all with the
overlay bits, I guess Daniel will have to look at that).

> with any ring\n"); seq_printf(m, "Flip queued on frame %d, (was ready
> on frame %d), now %d\n", work->flip_queued_vblank,
> @@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file
> *m, void *data) struct drm_device *dev = node->minor->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring;
> -	struct drm_i915_gem_request *gem_request;
> +	struct i915_gem_request *rq;
>  	int ret, count, i;
>  
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
> @@ -669,12 +670,10 @@ static int i915_gem_request_info(struct
> seq_file *m, void *data) continue;
>  
>  		seq_printf(m, "%s requests:\n", ring->name);
> -		list_for_each_entry(gem_request,
> -				    &ring->request_list,
> -				    list) {
> +		list_for_each_entry(rq, &ring->request_list, list) {
>  			seq_printf(m, "    %d @ %d\n",
> -				   gem_request->seqno,
> -				   (int) (jiffies -
> gem_request->emitted_jiffies));
> +				   rq->seqno,
> +				   (int)(jiffies -
> rq->emitted_jiffies)); }
>  		count++;
>  	}

Semi gratuitous rename (though I know you renamed the struct to catch
all the users).

> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h index 9837b0f..5794d096 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -187,6 +187,7 @@ enum hpd_pin {
>  struct drm_i915_private;
>  struct i915_mm_struct;
>  struct i915_mmu_object;
> +struct i915_gem_request;
>  
>  enum intel_dpll_id {
>  	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
>  	struct drm_mm_node *stolen;
>  	struct list_head global_list;
>  
> -	struct list_head ring_list;
>  	/** Used in execbuf to temporarily hold a ref */
>  	struct list_head obj_exec_link;
>  
>  	/**
>  	 * This is set if the object is on the active lists (has
> pending
> -	 * rendering and so a non-zero seqno), and is not set if it
> i s on
> -	 * inactive (ready to be unbound) list.
> +	 * rendering and so a submitted request), and is not set if
> it is on
> +	 * inactive (ready to be unbound) list. We track activity
> per engine. */
> -	unsigned int active:1;
> +	unsigned int active:3;

I wonder if it would be better to be explicit and have a num_rings
sized array here then.  We need 4 bits anyway for the second video ring
right?  We'd need a wrapper to check for active then though...

> @@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct
> drm_i915_gem_object *old,
>   * sequence-number comparisons on buffer last_rendering_seqnos, and
> associate
>   * an emission time with seqnos for tracking how far ahead of the
> GPU we are. */
> -struct drm_i915_gem_request {
> +struct i915_gem_request {
> +	struct kref kref;
> +
>  	/** On Which ring this request was generated */
>  	struct intel_engine_cs *ring;
>  
> @@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
>  	struct drm_i915_file_private *file_priv;
>  	/** file_priv list entry for this request */
>  	struct list_head client_list;
> +
> +	bool completed:1;
>  };

If Daniel pulls in Greg's tree, the kref could turn into a fence and
the completed could be removed.

>  
> +static inline struct intel_engine_cs *i915_request_ring(struct
> i915_gem_request *rq) +{
> +	return rq ? rq->ring : NULL;
> +}
> +
> +static inline int i915_request_ring_id(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->ring->id : -1;
> +}
> +
> +static inline u32 i915_request_seqno(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->seqno : 0;
> +}
> +
> +/**
> + * Returns true if seq1 is later than seq2.
> + */
> +static inline bool
> +__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> +{
> +	return (int32_t)(seq1 - seq2) >= 0;
> +}
> +
> +static inline bool
> +i915_request_complete(struct i915_gem_request *rq, bool lazy)
> +{
> +	if (!rq->completed)
> +		rq->completed =
> __i915_seqno_passed(rq->ring->get_seqno(rq->ring, lazy),
> +						    rq->seqno);
> +	return rq->completed;
> +}
> +
> +static inline struct i915_gem_request *
> +i915_request_get(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_get(&rq->kref);
> +	return rq;
> +}
> +
> +void __i915_request_free(struct kref *kref);
> +
> +static inline void
> +i915_request_put(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_put(&rq->kref, __i915_request_free);
> +}

Several of these helpers could be pushed separately to reduce the patch
size.

> +static void
> +i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> +{
> +	intel_fb_obj_flush(obj, true);
> +	obj->last_write.request = NULL;
> +	list_del_init(&obj->last_write.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
> +{
> +	obj->last_fence.request = NULL;
> +	list_del_init(&obj->last_fence.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
> +			     struct intel_engine_cs *ring)
> +{
> +	struct i915_vma *vma;
> +
> +	BUG_ON(obj->active == 0);
> +	BUG_ON(obj->base.write_domain);
> +
> +	obj->last_read[ring->id].request = NULL;
> +	list_del_init(&obj->last_read[ring->id].ring_list);
> +
> +	if (--obj->active)
> +		return;
> +
> +	BUG_ON(obj->last_write.request);
> +	BUG_ON(obj->last_fence.request);
> +
> +	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +		if (!list_empty(&vma->mm_list))
> +			list_move_tail(&vma->mm_list,
> &vma->vm->inactive_list);
> +	}
> +
> +	drm_gem_object_unreference(&obj->base);
> +
> +	WARN_ON(i915_verify_lists(dev));
> +}

I notice this __ notation elsewhere too; I guess you're using it to
denote internal functions rather than a __ prefix?  Maybe we could use
gcc embedded functions... :)

> +
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_request *rq;
> +	int i;
> +
> +	if (!obj->active)
> +		return;
> +
> +	rq = obj->last_write.request;
> +	if (rq && i915_request_complete(rq, true))
> +		i915_gem_object_retire__write(obj);
> +
> +	rq = obj->last_fence.request;
> +	if (rq && i915_request_complete(rq, true))
> +		i915_gem_object_retire__fence(obj);
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		rq = obj->last_read[i].request;
> +		if (rq && i915_request_complete(rq, true))
> +			i915_gem_object_retire__read(obj, rq->ring);
> +	}
> +}

Unrelated comment on GEM in general: it seems like the callers of this
all use it after they've waited on the object; maybe it should just
be called from wait_rendering() to avoid any potential trouble?  The
name is a bit ambiguous; it could be taken to mean that it does the
idling.

>
> @@ -1544,19 +1594,37 @@ static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly)
>  {
> -	struct intel_engine_cs *ring = obj->ring;
> -	u32 seqno;
> -	int ret;
> +	int i, ret;
>  
> -	seqno = readonly ? obj->last_write_seqno :
> obj->last_read_seqno;
> -	if (seqno == 0)
> -		return 0;
> +	if (readonly) {
> +		if (obj->last_write.request == NULL)
> +			return 0;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> -	if (ret)
> -		return ret;
> +		ret = i915_wait_request(obj->last_write.request);
> +		if (ret)
> +			return ret;
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			if (obj->last_read[i].request == NULL)
> +				continue;
> +
> +			ret =
> i915_wait_request(obj->last_read[i].request);
> +			if (ret)
> +				return ret;
> +		}
> +	}
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +	/* Manually manage the write flush as we may have not yet
> +	 * retired the buffer.
> +	 *
> +	 * Note that the last_write_seqno is always the earlier of
> +	 * the two (read/write) seqno, so if we haved successfully
> waited,
> +	 * we know we have passed the last write.
> +	 */

Does this comment belong above where we do the read checking?

> +	if (obj->last_write.request)
> +		i915_gem_object_retire__write(obj);
> +
> +	return 0;
>  }

> @@ -1569,34 +1637,48 @@
> i915_gem_object_wait_rendering__nonblocking(struct
> drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = obj->ring;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>  	unsigned reset_counter;
> -	u32 seqno;
> -	int ret;
> +	int i, n, ret;
>  
>  	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  	BUG_ON(!dev_priv->mm.interruptible);
> obj->last_read_seqno;
> -	if (seqno == 0)
> +	n = 0;
> +	if (readonly) {
> +		if (obj->last_write.request)
> +			rq[n++] =
> i915_request_get(obj->last_write.request);
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++)
> +			if (obj->last_read[i].request)
> +				rq[n++] =
> i915_request_get(obj->last_read[i].request);
> +	}

There's a similar pattern elsewhere (though w/o the readonly checking),
I wonder if we can add a small function for this?

> +	if (n == 0)
>  		return 0;
>  
>  	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
>  	if (ret)
> -		return ret;
> +		goto out;
>  
> -	ret = i915_gem_check_olr(ring, seqno);
> -	if (ret)
> -		return ret;
> +	for (i = 0; i < n; i++) {
> +		ret = i915_gem_check_olr(rq[i]);
> +		if (ret)
> +			goto out;
> +	}
>  
>  	reset_counter =
> atomic_read(&dev_priv->gpu_error.reset_counter);
> mutex_unlock(&dev->struct_mutex);
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL,
> file_priv); +
> +	for (i = 0; ret == 0 && i < n; i++)
> +		ret = __wait_request(rq[i], reset_counter, true,
> NULL, file_priv); +
>  	mutex_lock(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +out:
> +	for (i = 0; i < n; i++)
> +		i915_request_put(rq[i]);
> +
> +	return ret;
>  }

Looks like you got the error path/put handling correct here.

>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring)
> +			     struct intel_engine_cs *ring,
> +			     unsigned fenced)
>  {
> -	list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -	return i915_gem_object_move_to_active(vma->obj, ring);
> -}
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_request *rq = intel_ring_get_request(ring);
> +	u32 old_read = obj->base.read_domains;
> +	u32 old_write = obj->base.write_domain;

I'm already getting confused about which fence we're talking about.
But I guess that's for me to worry about when I add a fence obj.

> @@ -2533,11 +2594,10 @@ i915_gem_get_seqno(struct drm_device *dev,
> u32 *seqno) 
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
> -		       struct drm_i915_gem_object *obj,
> -		       u32 *out_seqno)
> +		       struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	u32 request_ring_position, request_start;
>  	int ret;
>  
> @@ -2553,8 +2613,8 @@ int __i915_add_request(struct intel_engine_cs
> *ring, if (ret)
>  		return ret;
>  
> -	request = ring->preallocated_lazy_request;
> -	if (WARN_ON(request == NULL))
> +	rq = ring->preallocated_request;
> +	if (WARN_ON(rq == NULL))
>  		return -ENOMEM;
>  
>  	/* Record the position of the start of the request so that
> @@ -2568,10 +2628,8 @@ int __i915_add_request(struct intel_engine_cs
> *ring, if (ret)
>  		return ret;
>  
> -	request->seqno = intel_ring_get_seqno(ring);
> -	request->ring = ring;
> -	request->head = request_start;
> -	request->tail = request_ring_position;
> +	rq->head = request_start;
> +	rq->tail = request_ring_position;
>  
>  	/* Whilst this request exists, batch_obj will be on the
>  	 * active_list, and so will hold the active reference. Only
> when this @@ -2579,32 +2637,31 @@ int __i915_add_request(struct
> intel_engine_cs *ring,
>  	 * inactive_list and lose its active reference. Hence we do
> not need
>  	 * to explicitly hold another reference here.
>  	 */
> -	request->batch_obj = obj;
> +	rq->batch_obj = obj;
>  
>  	/* Hold a reference to the current context so that we can
> inspect
>  	 * it later in case a hangcheck error event fires.
>  	 */
> -	request->ctx = ring->last_context;
> -	if (request->ctx)
> -		i915_gem_context_reference(request->ctx);
> +	rq->ctx = ring->last_context;
> +	if (rq->ctx)
> +		i915_gem_context_reference(rq->ctx);
>  
> -	request->emitted_jiffies = jiffies;
> -	list_add_tail(&request->list, &ring->request_list);
> -	request->file_priv = NULL;
> +	rq->emitted_jiffies = jiffies;
> +	list_add_tail(&rq->list, &ring->request_list);
> +	rq->file_priv = NULL;
>  
>  	if (file) {
>  		struct drm_i915_file_private *file_priv =
> file->driver_priv; 
>  		spin_lock(&file_priv->mm.lock);
> -		request->file_priv = file_priv;
> -		list_add_tail(&request->client_list,
> +		rq->file_priv = file_priv;
> +		list_add_tail(&rq->client_list,
>  			      &file_priv->mm.request_list);
>  		spin_unlock(&file_priv->mm.lock);
>  	}
>  
> -	trace_i915_gem_request_add(ring, request->seqno);
> -	ring->outstanding_lazy_seqno = 0;
> -	ring->preallocated_lazy_request = NULL;
> +	trace_i915_gem_request_add(ring, rq->seqno);
> +	ring->preallocated_request = NULL;
>  
>  	if (!dev_priv->ums.mm_suspended) {
>  		i915_queue_hangcheck(ring->dev);
> @@ -2616,22 +2673,20 @@ int __i915_add_request(struct intel_engine_cs
> *ring, intel_mark_busy(dev_priv->dev);
>  	}
>  
> -	if (out_seqno)
> -		*out_seqno = request->seqno;
>  	return 0;
>  }

Nearly gratuitous rename.  But only nearly.

>  
>  static inline void
> -i915_gem_request_remove_from_client(struct drm_i915_gem_request
> *request) +i915_gem_request_remove_from_client(struct
> i915_gem_request *rq) {
> -	struct drm_i915_file_private *file_priv = request->file_priv;
> +	struct drm_i915_file_private *file_priv = rq->file_priv;
>  
>  	if (!file_priv)
>  		return;
>  
>  	spin_lock(&file_priv->mm.lock);
> -	list_del(&request->client_list);
> -	request->file_priv = NULL;
> +	list_del(&rq->client_list);
> +	rq->file_priv = NULL;
>  	spin_unlock(&file_priv->mm.lock);
>  }

Gratuitous!  But still ok.

>  
> @@ -2679,30 +2734,37 @@ static void i915_set_reset_status(struct
> drm_i915_private *dev_priv, }
>  }
>  
> -static void i915_gem_free_request(struct drm_i915_gem_request
> *request) +void __i915_request_free(struct kref *kref)
> +{
> +	struct i915_gem_request *rq = container_of(kref, struct
> i915_gem_request, kref);
> +	kfree(rq);
> +}
> +
> +static void i915_request_retire(struct i915_gem_request *rq)
>  {
> -	list_del(&request->list);
> -	i915_gem_request_remove_from_client(request);
> +	rq->completed = true;
> +
> +	list_del(&rq->list);
> +	i915_gem_request_remove_from_client(rq);
>  
> -	if (request->ctx)
> -		i915_gem_context_unreference(request->ctx);
> +	if (rq->ctx) {
> +		i915_gem_context_unreference(rq->ctx);
> +		rq->ctx = NULL;
> +	}
>  
> -	kfree(request);
> +	i915_request_put(rq);
>  }

I wonder if we can somehow always use this function instead of having
both obj and request retire functions.  Maybe if we moved obj retire
into the rendering sync functions we'd have a little less pseudo
duplication.


> @@ -2967,11 +3085,10 @@ i915_gem_wait_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file) struct drm_i915_private *dev_priv
> = dev->dev_private; struct drm_i915_gem_wait *args = data;
>  	struct drm_i915_gem_object *obj;
> -	struct intel_engine_cs *ring = NULL;
>  	struct timespec timeout_stack, *timeout = NULL;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>  	unsigned reset_counter;
> -	u32 seqno = 0;
> -	int ret = 0;
> +	int i, n, ret = 0;
>  
>  	if (args->timeout_ns >= 0) {
>  		timeout_stack = ns_to_timespec(args->timeout_ns);
> @@ -2993,13 +3110,8 @@ i915_gem_wait_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file) if (ret)
>  		goto out;
>  
> -	if (obj->active) {
> -		seqno = obj->last_read_seqno;
> -		ring = obj->ring;
> -	}
> -
> -	if (seqno == 0)
> -		 goto out;
> +	if (!obj->active)
> +		goto out;
>  
>  	/* Do this after OLR check to make sure we make forward
> progress polling
>  	 * on this IOCTL with a 0 timeout (like busy ioctl)
> @@ -3009,11 +3121,25 @@ i915_gem_wait_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file) goto out;
>  	}
>  
> +	for (i = n = 0; i < I915_NUM_RINGS; i++) {
> +		if (obj->last_read[i].request == NULL)
> +			continue;
> +
> +		rq[n++] =
> i915_request_get(obj->last_read[i].request);
> +	}
> +
>  	drm_gem_object_unreference(&obj->base);
> +
>  	reset_counter =
> atomic_read(&dev_priv->gpu_error.reset_counter);
> mutex_unlock(&dev->struct_mutex); 
> -	ret = __wait_seqno(ring, seqno, reset_counter, true,
> timeout, file->driver_priv);
> +	for (i = 0; i < n; i++) {
> +		if (ret == 0)
> +			ret = __wait_request(rq[i], reset_counter,
> true, timeout, file->driver_priv); +
> +		i915_request_put(rq[i]);
> +	}
> +
>  	if (timeout)
>  		args->timeout_ns = timespec_to_ns(timeout);
>  	return ret;
> @@ -3024,6 +3150,45 @@ out:
>  	return ret;
>  }

request_retire does a put, where is the corresponding get so that we
don't lose the object when the lock is dropped here or in the
nonblocking wait?  Or should request_retire not do a put?


>  
> +static int
> +i915_request_sync(struct i915_gem_request *rq,
> +		  struct intel_engine_cs *to,
> +		  struct drm_i915_gem_object *obj)
> +{
> +	int ret, idx;
> +
> +	if (to == NULL)
> +		return i915_wait_request(rq);
> +
> +	/* XXX this is broken by VEBOX+ */

What does this comment mean?  How does VEBOX break this?  Does it not
have semaphore support or something?

> @@ -3038,44 +3203,35 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to)
> +		     struct intel_engine_cs *to,
> +		     bool readonly)
>  {

It might be nice to have sync_read/sync_write functions instead, since
looking up bool args or unnamed enums is a pain.

>  static bool
>  ring_idle(struct intel_engine_cs *ring, u32 seqno)
>  {
>  	return (list_empty(&ring->request_list) ||
> -		i915_seqno_passed(seqno, ring_last_seqno(ring)));
> +		__i915_seqno_passed(seqno, ring_last_seqno(ring)));
>  }

Unrelated question: why do we have two checks here?  Shouldn't the
seqno check always be sufficient?  Or is the list_empty check for the
uninitialized case?

> @@ -9856,7 +9859,7 @@ static int intel_crtc_page_flip(struct drm_crtc
> *crtc, } else if (IS_IVYBRIDGE(dev)) {
>  		ring = &dev_priv->ring[BCS];
>  	} else if (INTEL_INFO(dev)->gen >= 7) {
> -		ring = obj->ring;
> +		ring = i915_request_ring(obj->last_write.request);
>  		if (ring == NULL || ring->id != RCS)
>  			ring = &dev_priv->ring[BCS];
>  	} else {

I like the flip changes, they make things easier to follow in general.

>  static int
> -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
> +intel_ring_alloc_request(struct intel_engine_cs *ring)
>  {
> -	if (ring->outstanding_lazy_seqno)
> -		return 0;
> +	struct i915_gem_request *rq;
> +	int ret;
>  
> -	if (ring->preallocated_lazy_request == NULL) {
> -		struct drm_i915_gem_request *request;
> +	if (ring->preallocated_request)
> +		return 0;
>  
> -		request = kmalloc(sizeof(*request), GFP_KERNEL);
> -		if (request == NULL)
> -			return -ENOMEM;
> +	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
> +	if (rq == NULL)
> +		return -ENOMEM;
>  
> -		ring->preallocated_lazy_request = request;
> +	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
> +	if (ret) {
> +		kfree(rq);
> +		return ret;
>  	}
>  
> -	return i915_gem_get_seqno(ring->dev,
> &ring->outstanding_lazy_seqno);
> +	kref_init(&rq->kref);
> +	rq->ring = ring;
> +	rq->completed = false;
> +
> +	ring->preallocated_request = rq;
> +	return 0;
>  }

Makes me wonder if we should have our own slab for the objs.  Might
save a bit of mem and/or perf?  But then could reduce our cache hit
rate, dunno.

Overall this gets my:
Fatigued-reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
given its size and with the changes/comments above addressed.  Also,
someone else will have to check over the overlay and perf bits; I
assume those are mostly mechanical, but I'm not that familiar with
those bits.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: s/seqno/request/ tracking inside objects
  2014-07-28 20:44 ` Jesse Barnes
@ 2014-07-29  6:53   ` Chris Wilson
  2014-07-29  7:29   ` Chris Wilson
  1 sibling, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2014-07-29  6:53 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Daniel Vetter, intel-gfx, Brad Volkin

On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
> > +static void
> > +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> > +{
> > +	struct i915_gem_request *rq;
> > +	int i;
> > +
> > +	if (!obj->active)
> > +		return;
> > +
> > +	rq = obj->last_write.request;
> > +	if (rq && i915_request_complete(rq, true))
> > +		i915_gem_object_retire__write(obj);
> > +
> > +	rq = obj->last_fence.request;
> > +	if (rq && i915_request_complete(rq, true))
> > +		i915_gem_object_retire__fence(obj);
> > +
> > +	for (i = 0; i < I915_NUM_RINGS; i++) {
> > +		rq = obj->last_read[i].request;
> > +		if (rq && i915_request_complete(rq, true))
> > +			i915_gem_object_retire__read(obj, rq->ring);
> > +	}
> > +}
> 
> Unrelated comment on GEM in general: it seems like the callers of this
> all use it after they've waited on the object; maybe it should just
> be called from wait_rendering() to avoid any potential trouble?  The
> name is a bit ambiguous; it could be taken to mean that it does the
> idling.

No. There be dragons. Quite a few of the (indirect) callers of
wait_rendering() cannot handle the recursive freeing of requests/objects
and so we must carefully inspect each caller.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: s/seqno/request/ tracking inside objects
  2014-07-28 20:44 ` Jesse Barnes
  2014-07-29  6:53   ` Chris Wilson
@ 2014-07-29  7:29   ` Chris Wilson
  2014-07-29 10:41     ` Daniel Vetter
  1 sibling, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2014-07-29  7:29 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Daniel Vetter, intel-gfx, Brad Volkin

On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
> > +static void i915_request_retire(struct i915_gem_request *rq)
> >  {
> > -	list_del(&request->list);
> > -	i915_gem_request_remove_from_client(request);
> > +	rq->completed = true;
> > +
> > +	list_del(&rq->list);
> > +	i915_gem_request_remove_from_client(rq);
> >  
> > -	if (request->ctx)
> > -		i915_gem_context_unreference(request->ctx);
> > +	if (rq->ctx) {
> > +		i915_gem_context_unreference(rq->ctx);
> > +		rq->ctx = NULL;
> > +	}
> >  
> > -	kfree(request);
> > +	i915_request_put(rq);
> >  }
> 
> I wonder if we can somehow always use this function instead of having
> both obj and request retire functions.  Maybe if we moved obj retire
> into the rendering sync functions we'd have a little less pseudo
> duplication.

I kept it as it was as I was trying to avoid increasing the number of
calls to ring->get_seqno. Elsewhere converting to a fence is good at
reducing the number of calls (as that completed:1 is likely to catch
most cases). But here I think we really do want to treat this en masse.

> request_retire does a put, where is the corresponding get so that we
> don't lose the object when the lock is dropped here or in the
> nonblocking wait?  Or should request_retire not do a put?

Ownership flows from:
  intel_ring_begin -> i915_add_request -> retire_request.
Everyone else has to grab their own references.

> > +static int
> > +i915_request_sync(struct i915_gem_request *rq,
> > +		  struct intel_engine_cs *to,
> > +		  struct drm_i915_gem_object *obj)
> > +{
> > +	int ret, idx;
> > +
> > +	if (to == NULL)
> > +		return i915_wait_request(rq);
> > +
> > +	/* XXX this is broken by VEBOX+ */
> 
> What does this comment mean?  How does VEBOX break this?  Does it not
> have semaphore support or something?

It's a very old comment. In theory it should have been fixed by the recent
gen8 semaphore work.

> > @@ -3038,44 +3203,35 @@ out:
> >   */
> >  int
> >  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> > -		     struct intel_engine_cs *to)
> > +		     struct intel_engine_cs *to,
> > +		     bool readonly)
> >  {
> 
> It might be nice to have sync_read/sync_write functions instead, since
> looking up bool args or unnamed enums is a pain.

Not convinced since it is used in a single location and two function
calls would look unelegant - but we could switch to PROT_READ |
PROT_WRITE throughout.

> >  static bool
> >  ring_idle(struct intel_engine_cs *ring, u32 seqno)
> >  {
> >  	return (list_empty(&ring->request_list) ||
> > -		i915_seqno_passed(seqno, ring_last_seqno(ring)));
> > +		__i915_seqno_passed(seqno, ring_last_seqno(ring)));
> >  }
> 
> Unrelated question: why do we have two checks here?  Shouldn't the
> seqno check always be sufficient?  Or is the list_empty check for the
> uninitialized case?

We can't test the seqno without testing that the pointer is valid -
that's the first check. And since we retire lazily we cannot rely on the
request_list being empty itself.

> >  static int
> > -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
> > +intel_ring_alloc_request(struct intel_engine_cs *ring)
> >  {
> > -	if (ring->outstanding_lazy_seqno)
> > -		return 0;
> > +	struct i915_gem_request *rq;
> > +	int ret;
> >  
> > -	if (ring->preallocated_lazy_request == NULL) {
> > -		struct drm_i915_gem_request *request;
> > +	if (ring->preallocated_request)
> > +		return 0;
> >  
> > -		request = kmalloc(sizeof(*request), GFP_KERNEL);
> > -		if (request == NULL)
> > -			return -ENOMEM;
> > +	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
> > +	if (rq == NULL)
> > +		return -ENOMEM;
> >  
> > -		ring->preallocated_lazy_request = request;
> > +	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
> > +	if (ret) {
> > +		kfree(rq);
> > +		return ret;
> >  	}
> >  
> > -	return i915_gem_get_seqno(ring->dev,
> > &ring->outstanding_lazy_seqno);
> > +	kref_init(&rq->kref);
> > +	rq->ring = ring;
> > +	rq->completed = false;
> > +
> > +	ring->preallocated_request = rq;
> > +	return 0;
> >  }
> 
> Makes me wonder if we should have our own slab for the objs.  Might
> save a bit of mem and/or perf?  But then could reduce our cache hit
> rate, dunno.

It's just a case of whether we are in a general slab or a named slab.
Having a named slab is nice for debugging, and we definitely have a high
enough reuse rate to justify our own slab.

Elsewhere some useful comments deleted to save electrons :-)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: s/seqno/request/ tracking inside objects
  2014-07-29  7:29   ` Chris Wilson
@ 2014-07-29 10:41     ` Daniel Vetter
  2014-07-29 14:54       ` Jesse Barnes
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2014-07-29 10:41 UTC (permalink / raw)
  To: Chris Wilson, Jesse Barnes, intel-gfx, Daniel Vetter,
	Oscar Mateo, Brad Volkin

On Tue, Jul 29, 2014 at 08:29:53AM +0100, Chris Wilson wrote:
> On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
> > > @@ -3038,44 +3203,35 @@ out:
> > >   */
> > >  int
> > >  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> > > -		     struct intel_engine_cs *to)
> > > +		     struct intel_engine_cs *to,
> > > +		     bool readonly)
> > >  {
> > 
> > It might be nice to have sync_read/sync_write functions instead, since
> > looking up bool args or unnamed enums is a pain.
> 
> Not convinced since it is used in a single location and two function
> calls would look unelegant - but we could switch to PROT_READ |
> PROT_WRITE throughout.

Switching to PROT_READ/WRITE might be nice as a general cleanup (or some
other named thing) since read_only/write booleans are all over the place.
So I like this, but definitely something for a separate patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: s/seqno/request/ tracking inside objects
  2014-07-29 10:41     ` Daniel Vetter
@ 2014-07-29 14:54       ` Jesse Barnes
  0 siblings, 0 replies; 8+ messages in thread
From: Jesse Barnes @ 2014-07-29 14:54 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx, Brad Volkin

On Tue, 29 Jul 2014 12:41:26 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Tue, Jul 29, 2014 at 08:29:53AM +0100, Chris Wilson wrote:
> > On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
> > > > @@ -3038,44 +3203,35 @@ out:
> > > >   */
> > > >  int
> > > >  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> > > > -		     struct intel_engine_cs *to)
> > > > +		     struct intel_engine_cs *to,
> > > > +		     bool readonly)
> > > >  {
> > > 
> > > It might be nice to have sync_read/sync_write functions instead, since
> > > looking up bool args or unnamed enums is a pain.
> > 
> > Not convinced since it is used in a single location and two function
> > calls would look unelegant - but we could switch to PROT_READ |
> > PROT_WRITE throughout.
> 
> Switching to PROT_READ/WRITE might be nice as a general cleanup (or some
> other named thing) since read_only/write booleans are all over the place.
> So I like this, but definitely something for a separate patch.

Agreed, that's a good idea.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] drm/i915: s/seqno/request/ tracking inside objects
@ 2014-07-25 12:19 Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2014-07-25 12:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Brad Volkin

At the heart of this change is that the seqno is a too low level of an
abstraction to handle the growing complexities of command tracking, both
with the introduction of multiple command queues with execbuffer and the
potential for reordering with a scheduler. On top of the seqno we have
the request. Conceptually this is just a fence, but it also has
substantial bookkeeping of its own in order to track the context and
batch in flight, for example. It is the central structure upon which we
can extend with dependency tracking et al.

As regards the objects, they were using the seqno as a simple fence,
upon which is check or even wait upon for command completion. This patch
exchanges that seqno/ring pair with the request itself. For the
majority, lifetime of the request is ordered by how we retire objects
then requests. However, both the unlocked waits and probing elsewhere do
not tie into the normal request lifetimes and so we need to introduce a
kref. Extending the objects to use the request as the fence naturally
extends to segregrating read/write fence tracking. This has significance
for it reduces the number of semaphores we need to emit, reducing the
likelihood of #54226, and improving performance overall.

NOTE: this is not against bare drm-intel-nightly and is likely to
conflict with execlists...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Brad Volkin <bradley.d.volkin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c          |  37 +-
 drivers/gpu/drm/i915/i915_drv.h              | 108 ++--
 drivers/gpu/drm/i915/i915_gem.c              | 769 ++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
 drivers/gpu/drm/i915/i915_gem_exec.c         |  10 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c        |  35 +-
 drivers/gpu/drm/i915/i915_irq.c              |   6 +-
 drivers/gpu/drm/i915/i915_perf.c             |   6 +-
 drivers/gpu/drm/i915/i915_trace.h            |   2 +-
 drivers/gpu/drm/i915/intel_display.c         |  50 +-
 drivers/gpu/drm/i915/intel_drv.h             |   3 +-
 drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  83 +--
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  11 +-
 17 files changed, 745 insertions(+), 556 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 406e630..676d5f1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
@@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
 		   obj->base.write_domain,
-		   obj->last_read_seqno,
-		   obj->last_write_seqno,
-		   obj->last_fenced_seqno,
+		   i915_request_seqno(rq),
+		   i915_request_seqno(obj->last_write.request),
+		   i915_request_seqno(obj->last_fence.request),
 		   i915_cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		*t = '\0';
 		seq_printf(m, " (%s mappable)", s);
 	}
-	if (obj->ring != NULL)
-		seq_printf(m, " (%s)", obj->ring->name);
+	if (rq)
+		seq_printf(m, " (%s)", rq->ring->name);
 	if (obj->frontbuffer_bits)
 		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 			if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv)
 				continue;
 
-			if (obj->ring) /* XXX per-vma statistic */
+			if (obj->active) /* XXX per-vma statistic */
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	} else {
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			stats->global += obj->base.size;
-			if (obj->ring)
+			if (obj->active)
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
 					   pipe, plane);
 			}
-			if (work->ring)
+			if (work->flip_queued_request) {
+				struct i915_gem_request *rq = work->flip_queued_request;
 				seq_printf(m, "Flip queued on %s at seqno %u, now %u\n",
-						work->ring->name,
-						work->flip_queued_seqno,
-						work->ring->get_seqno(work->ring, true));
-			else
+						rq->ring->name, rq->seqno,
+						rq->ring->get_seqno(rq->ring, true));
+			} else
 				seq_printf(m, "Flip not associated with any ring\n");
 			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
 				   work->flip_queued_vblank,
@@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_i915_gem_request *gem_request;
+	struct i915_gem_request *rq;
 	int ret, count, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -669,12 +670,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 			continue;
 
 		seq_printf(m, "%s requests:\n", ring->name);
-		list_for_each_entry(gem_request,
-				    &ring->request_list,
-				    list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			seq_printf(m, "    %d @ %d\n",
-				   gem_request->seqno,
-				   (int) (jiffies - gem_request->emitted_jiffies));
+				   rq->seqno,
+				   (int)(jiffies - rq->emitted_jiffies));
 		}
 		count++;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9837b0f..5794d096 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -187,6 +187,7 @@ enum hpd_pin {
 struct drm_i915_private;
 struct i915_mm_struct;
 struct i915_mmu_object;
+struct i915_gem_request;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	struct list_head ring_list;
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
+	 * rendering and so a submitted request), and is not set if it is on
+	 * inactive (ready to be unbound) list. We track activity per engine.
 	 */
-	unsigned int active:1;
+	unsigned int active:3;
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -1797,13 +1797,11 @@ struct drm_i915_gem_object {
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
-	struct intel_engine_cs *ring;
-
-	/** Breadcrumb of last rendering to the buffer. */
-	uint32_t last_read_seqno;
-	uint32_t last_write_seqno;
-	/** Breadcrumb of last fenced GPU access to the buffer. */
-	uint32_t last_fenced_seqno;
+	/** Breadcrumbs of last rendering to the buffer. */
+	struct {
+		struct i915_gem_request *request;
+		struct list_head ring_list;
+	} last_write, last_read[I915_NUM_RINGS], last_fence;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	uint32_t stride;
@@ -1836,6 +1834,8 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
+
 void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits);
@@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  * sequence-number comparisons on buffer last_rendering_seqnos, and associate
  * an emission time with seqnos for tracking how far ahead of the GPU we are.
  */
-struct drm_i915_gem_request {
+struct i915_gem_request {
+	struct kref kref;
+
 	/** On Which ring this request was generated */
 	struct intel_engine_cs *ring;
 
@@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_list;
+
+	bool completed:1;
 };
 
+static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring : NULL;
+}
+
+static inline int i915_request_ring_id(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring->id : -1;
+}
+
+static inline u32 i915_request_seqno(struct i915_gem_request *rq)
+{
+	return rq ? rq->seqno : 0;
+}
+
+/**
+ * Returns true if seq1 is later than seq2.
+ */
+static inline bool
+__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+{
+	return (int32_t)(seq1 - seq2) >= 0;
+}
+
+static inline bool
+i915_request_complete(struct i915_gem_request *rq, bool lazy)
+{
+	if (!rq->completed)
+		rq->completed = __i915_seqno_passed(rq->ring->get_seqno(rq->ring, lazy),
+						    rq->seqno);
+	return rq->completed;
+}
+
+static inline struct i915_gem_request *
+i915_request_get(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_get(&rq->kref);
+	return rq;
+}
+
+void __i915_request_free(struct kref *kref);
+
+static inline void
+i915_request_put(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_put(&rq->kref, __i915_request_free);
+}
+
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
 	struct drm_file *file;
@@ -2335,22 +2389,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to);
+			 struct intel_engine_cs *to,
+			 bool readonly);
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring);
+			     struct intel_engine_cs *ring,
+			     unsigned fenced);
+#define VMA_IS_FENCED 0x1
+#define VMA_HAS_FENCE 0x2
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
-/**
- * Returns true if seq1 is later than seq2.
- */
-static inline bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
-{
-	return (int32_t)(seq1 - seq2) >= 0;
-}
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
@@ -2360,14 +2410,14 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
 void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
-int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
+int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
 
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
@@ -2411,12 +2461,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *batch_obj,
-		       u32 *seqno);
-#define i915_add_request(ring, seqno) \
-	__i915_add_request(ring, NULL, NULL, seqno)
-int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
-				 uint32_t seqno);
+		       struct drm_i915_gem_object *batch_obj);
+#define i915_add_request(ring) \
+	__i915_add_request(ring, NULL, NULL)
+int __must_check i915_wait_request(struct i915_gem_request *rq);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f3ad6fb..d208658 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -48,8 +48,6 @@ static __must_check int
 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 					    struct drm_i915_file_private *file_priv,
 					    bool readonly);
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
 
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
@@ -118,6 +116,73 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
+{
+	intel_fb_obj_flush(obj, true);
+	obj->last_write.request = NULL;
+	list_del_init(&obj->last_write.ring_list);
+}
+
+static void
+i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
+{
+	obj->last_fence.request = NULL;
+	list_del_init(&obj->last_fence.ring_list);
+}
+
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
+			     struct intel_engine_cs *ring)
+{
+	struct i915_vma *vma;
+
+	BUG_ON(obj->active == 0);
+	BUG_ON(obj->base.write_domain);
+
+	obj->last_read[ring->id].request = NULL;
+	list_del_init(&obj->last_read[ring->id].ring_list);
+
+	if (--obj->active)
+		return;
+
+	BUG_ON(obj->last_write.request);
+	BUG_ON(obj->last_fence.request);
+
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (!list_empty(&vma->mm_list))
+			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
+	}
+
+	drm_gem_object_unreference(&obj->base);
+
+	WARN_ON(i915_verify_lists(dev));
+}
+
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_request *rq;
+	int i;
+
+	if (!obj->active)
+		return;
+
+	rq = obj->last_write.request;
+	if (rq && i915_request_complete(rq, true))
+		i915_gem_object_retire__write(obj);
+
+	rq = obj->last_fence.request;
+	if (rq && i915_request_complete(rq, true))
+		i915_gem_object_retire__fence(obj);
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		rq = obj->last_read[i].request;
+		if (rq && i915_request_complete(rq, true))
+			i915_gem_object_retire__read(obj, rq->ring);
+	}
+}
+
 static int
 i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
@@ -1337,15 +1402,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
  * equal.
  */
 int
-i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
+i915_gem_check_olr(struct i915_gem_request *rq)
 {
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
 
 	ret = 0;
-	if (seqno == ring->outstanding_lazy_seqno)
-		ret = i915_add_request(ring, NULL);
+	if (rq == rq->ring->preallocated_request)
+		ret = i915_add_request(rq->ring);
 
 	return ret;
 }
@@ -1370,9 +1435,8 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
 }
 
 /**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
+ * __wait_request - wait until execution of request has finished
+ * @request: the request to wait upon
  * @reset_counter: reset sequence associated with the given seqno
  * @interruptible: do an interruptible wait (normally yes)
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
@@ -1387,24 +1451,26 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
  * Returns 0 if the seqno was found within the alloted time. Else returns the
  * errno with remaining time filled in timeout argument.
  */
-static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
-			unsigned reset_counter,
-			bool interruptible,
-			struct timespec *timeout,
-			struct drm_i915_file_private *file_priv)
+static int __wait_request(struct i915_gem_request *rq,
+			  unsigned reset_counter,
+			  bool interruptible,
+			  struct timespec *timeout,
+			  struct drm_i915_file_private *file_priv)
 {
+	struct intel_engine_cs *ring = rq->ring;
 	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	const bool irq_test_in_progress =
 		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
 	struct timespec before, now;
 	DEFINE_WAIT(wait);
 	unsigned long timeout_expire;
+	u32 seqno = rq->seqno;
 	int ret;
 
 	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
+	if (i915_request_complete(rq, true))
 		return 0;
 
 	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
@@ -1440,7 +1506,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 			break;
 		}
 
-		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
+		if (i915_request_complete(rq, false)) {
 			ret = 0;
 			break;
 		}
@@ -1494,46 +1560,30 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
  * request and object lists appropriately for that event.
  */
 int
-i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
+i915_wait_request(struct i915_gem_request *rq)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	bool interruptible = dev_priv->mm.interruptible;
+	struct drm_device *dev = rq->ring->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(seqno == 0);
+	if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
+		return -EINVAL;
+
+	if (i915_request_complete(rq, true))
+		return 0;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_check_olr(ring, seqno);
+	ret = i915_gem_check_olr(rq);
 	if (ret)
 		return ret;
 
-	return __wait_seqno(ring, seqno,
-			    atomic_read(&dev_priv->gpu_error.reset_counter),
-			    interruptible, NULL, NULL);
-}
-
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
-				     struct intel_engine_cs *ring)
-{
-	if (!obj->active)
-		return 0;
-
-	/* Manually manage the write flush as we may have not yet
-	 * retired the buffer.
-	 *
-	 * Note that the last_write_seqno is always the earlier of
-	 * the two (read/write) seqno, so if we haved successfully waited,
-	 * we know we have passed the last write.
-	 */
-	obj->last_write_seqno = 0;
-
-	return 0;
+	return __wait_request(rq,
+			      atomic_read(&dev_priv->gpu_error.reset_counter),
+			      dev_priv->mm.interruptible, NULL, NULL);
 }
 
 /**
@@ -1544,19 +1594,37 @@ static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
-	struct intel_engine_cs *ring = obj->ring;
-	u32 seqno;
-	int ret;
+	int i, ret;
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
+	if (readonly) {
+		if (obj->last_write.request == NULL)
+			return 0;
 
-	ret = i915_wait_seqno(ring, seqno);
-	if (ret)
-		return ret;
+		ret = i915_wait_request(obj->last_write.request);
+		if (ret)
+			return ret;
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			ret = i915_wait_request(obj->last_read[i].request);
+			if (ret)
+				return ret;
+		}
+	}
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+	/* Manually manage the write flush as we may have not yet
+	 * retired the buffer.
+	 *
+	 * Note that the last_write_seqno is always the earlier of
+	 * the two (read/write) seqno, so if we haved successfully waited,
+	 * we know we have passed the last write.
+	 */
+	if (obj->last_write.request)
+		i915_gem_object_retire__write(obj);
+
+	return 0;
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1569,34 +1637,48 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = obj->ring;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
 	unsigned reset_counter;
-	u32 seqno;
-	int ret;
+	int i, n, ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
+	n = 0;
+	if (readonly) {
+		if (obj->last_write.request)
+			rq[n++] = i915_request_get(obj->last_write.request);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++)
+			if (obj->last_read[i].request)
+				rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+	if (n == 0)
 		return 0;
 
 	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
 	if (ret)
-		return ret;
+		goto out;
 
-	ret = i915_gem_check_olr(ring, seqno);
-	if (ret)
-		return ret;
+	for (i = 0; i < n; i++) {
+		ret = i915_gem_check_olr(rq[i]);
+		if (ret)
+			goto out;
+	}
 
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
+
+	for (i = 0; ret == 0 && i < n; i++)
+		ret = __wait_request(rq[i], reset_counter, true, NULL, file_priv);
+
 	mutex_lock(&dev->struct_mutex);
-	if (ret)
-		return ret;
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+out:
+	for (i = 0; i < n; i++)
+		i915_request_put(rq[i]);
+
+	return ret;
 }
 
 /**
@@ -2387,78 +2469,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *ring)
-{
-	u32 seqno = intel_ring_get_seqno(ring);
-
-	BUG_ON(ring == NULL);
-	if (obj->ring != ring && obj->last_write_seqno) {
-		/* Keep the seqno relative to the current ring */
-		obj->last_write_seqno = seqno;
-	}
-	obj->ring = ring;
-
-	/* Add a reference if we're newly entering the active list. */
-	if (!obj->active) {
-		drm_gem_object_reference(&obj->base);
-		obj->active = 1;
-	}
-
-	list_move_tail(&obj->ring_list, &ring->active_list);
-
-	obj->last_read_seqno = seqno;
-}
-
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring)
+			     struct intel_engine_cs *ring,
+			     unsigned fenced)
 {
-	list_move_tail(&vma->mm_list, &vma->vm->active_list);
-	return i915_gem_object_move_to_active(vma->obj, ring);
-}
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = intel_ring_get_request(ring);
+	u32 old_read = obj->base.read_domains;
+	u32 old_write = obj->base.write_domain;
 
-static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
+	BUG_ON(rq == NULL);
 
-	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
-	BUG_ON(!obj->active);
+	obj->base.write_domain = obj->base.pending_write_domain;
+	if (obj->base.write_domain == 0)
+		obj->base.pending_read_domains |= obj->base.read_domains;
+	obj->base.read_domains = obj->base.pending_read_domains;
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (!list_empty(&vma->mm_list))
-			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
-	}
-
-	intel_fb_obj_flush(obj, true);
-
-	list_del_init(&obj->ring_list);
-	obj->ring = NULL;
+	obj->base.pending_read_domains = 0;
+	obj->base.pending_write_domain = 0;
 
-	obj->last_read_seqno = 0;
-	obj->last_write_seqno = 0;
-	obj->base.write_domain = 0;
+	trace_i915_gem_object_change_domain(obj, old_read, old_write);
+	if (obj->base.read_domains == 0)
+		return;
 
-	obj->last_fenced_seqno = 0;
+	/* Add a reference if we're newly entering the active list. */
+	if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
+		drm_gem_object_reference(&obj->base);
 
-	obj->active = 0;
-	drm_gem_object_unreference(&obj->base);
+	obj->last_read[ring->id].request = rq;
+	list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
 
-	WARN_ON(i915_verify_lists(dev));
-}
+	if (obj->base.write_domain) {
+		obj->dirty = 1;
+		obj->last_write.request = rq;
+		list_move_tail(&obj->last_write.ring_list, &ring->write_list);
+		intel_fb_obj_invalidate(obj, ring);
 
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *ring = obj->ring;
+		/* update for the implicit flush after a batch */
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
 
-	if (ring == NULL)
-		return;
+	if (fenced) {
+		obj->last_fence.request = rq;
+		list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
+		if (fenced & 2) {
+			struct drm_i915_private *dev_priv = to_i915(ring->dev);
+			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+					&dev_priv->mm.fence_list);
+		}
+	}
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_read_seqno))
-		i915_gem_object_move_to_inactive(obj);
+	list_move_tail(&vma->mm_list, &vma->vm->active_list);
 }
 
 static int
@@ -2533,11 +2594,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *obj,
-		       u32 *out_seqno)
+		       struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	u32 request_ring_position, request_start;
 	int ret;
 
@@ -2553,8 +2613,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request = ring->preallocated_lazy_request;
-	if (WARN_ON(request == NULL))
+	rq = ring->preallocated_request;
+	if (WARN_ON(rq == NULL))
 		return -ENOMEM;
 
 	/* Record the position of the start of the request so that
@@ -2568,10 +2628,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request->seqno = intel_ring_get_seqno(ring);
-	request->ring = ring;
-	request->head = request_start;
-	request->tail = request_ring_position;
+	rq->head = request_start;
+	rq->tail = request_ring_position;
 
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2579,32 +2637,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
-	request->batch_obj = obj;
+	rq->batch_obj = obj;
 
 	/* Hold a reference to the current context so that we can inspect
 	 * it later in case a hangcheck error event fires.
 	 */
-	request->ctx = ring->last_context;
-	if (request->ctx)
-		i915_gem_context_reference(request->ctx);
+	rq->ctx = ring->last_context;
+	if (rq->ctx)
+		i915_gem_context_reference(rq->ctx);
 
-	request->emitted_jiffies = jiffies;
-	list_add_tail(&request->list, &ring->request_list);
-	request->file_priv = NULL;
+	rq->emitted_jiffies = jiffies;
+	list_add_tail(&rq->list, &ring->request_list);
+	rq->file_priv = NULL;
 
 	if (file) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
 
 		spin_lock(&file_priv->mm.lock);
-		request->file_priv = file_priv;
-		list_add_tail(&request->client_list,
+		rq->file_priv = file_priv;
+		list_add_tail(&rq->client_list,
 			      &file_priv->mm.request_list);
 		spin_unlock(&file_priv->mm.lock);
 	}
 
-	trace_i915_gem_request_add(ring, request->seqno);
-	ring->outstanding_lazy_seqno = 0;
-	ring->preallocated_lazy_request = NULL;
+	trace_i915_gem_request_add(ring, rq->seqno);
+	ring->preallocated_request = NULL;
 
 	if (!dev_priv->ums.mm_suspended) {
 		i915_queue_hangcheck(ring->dev);
@@ -2616,22 +2673,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
 		intel_mark_busy(dev_priv->dev);
 	}
 
-	if (out_seqno)
-		*out_seqno = request->seqno;
 	return 0;
 }
 
 static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+i915_gem_request_remove_from_client(struct i915_gem_request *rq)
 {
-	struct drm_i915_file_private *file_priv = request->file_priv;
+	struct drm_i915_file_private *file_priv = rq->file_priv;
 
 	if (!file_priv)
 		return;
 
 	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
+	list_del(&rq->client_list);
+	rq->file_priv = NULL;
 	spin_unlock(&file_priv->mm.lock);
 }
 
@@ -2679,30 +2734,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
+void __i915_request_free(struct kref *kref)
+{
+	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
+	kfree(rq);
+}
+
+static void i915_request_retire(struct i915_gem_request *rq)
 {
-	list_del(&request->list);
-	i915_gem_request_remove_from_client(request);
+	rq->completed = true;
+
+	list_del(&rq->list);
+	i915_gem_request_remove_from_client(rq);
 
-	if (request->ctx)
-		i915_gem_context_unreference(request->ctx);
+	if (rq->ctx) {
+		i915_gem_context_unreference(rq->ctx);
+		rq->ctx = NULL;
+	}
 
-	kfree(request);
+	i915_request_put(rq);
 }
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
-	u32 completed_seqno;
+	struct i915_gem_request *rq;
 
-	completed_seqno = ring->get_seqno(ring, false);
-
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (i915_seqno_passed(completed_seqno, request->seqno))
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (i915_request_complete(rq, false))
 			continue;
 
-		return request;
+		return rq;
 	}
 
 	return NULL;
@@ -2711,33 +2773,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
 				       struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	bool ring_hung;
 
-	request = i915_gem_find_active_request(ring);
+	rq = i915_gem_find_active_request(ring);
 
-	if (request == NULL)
+	if (rq == NULL)
 		return;
 
 	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 
-	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
+	i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
 
-	list_for_each_entry_continue(request, &ring->request_list, list)
-		i915_set_reset_status(dev_priv, request->ctx, false);
+	list_for_each_entry_continue(rq, &ring->request_list, list)
+		i915_set_reset_status(dev_priv, rq->ctx, false);
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 					struct intel_engine_cs *ring)
 {
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
+		obj = list_first_entry(&ring->write_list,
 				       struct drm_i915_gem_object,
-				       ring_list);
+				       last_write.ring_list);
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
+
+		i915_gem_object_retire__fence(obj);
+	}
+
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		i915_gem_object_retire__read(obj, ring);
 	}
 
 	/*
@@ -2748,19 +2830,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	 * the request.
 	 */
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	/* These may not have been flush before the reset, do so now */
-	kfree(ring->preallocated_lazy_request);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	kfree(ring->preallocated_request);
+	ring->preallocated_request = NULL;
 }
 
 void i915_gem_restore_fences(struct drm_device *dev)
@@ -2825,43 +2906,71 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 	 * by the ringbuffer to the flushing/inactive lists as appropriate,
 	 * before we free the context associated with the requests.
 	 */
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->write_list,
+				       struct drm_i915_gem_object,
+				       last_write.ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_write.request->seqno))
+			break;
+
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
-				      struct drm_i915_gem_object,
-				      ring_list);
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
 
-		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_fence.request->seqno))
 			break;
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__fence(obj);
 	}
 
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_read[ring->id].request->seqno))
+			break;
+
+		i915_gem_object_retire__read(obj, ring);
+	}
 
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		if (!i915_seqno_passed(seqno, request->seqno))
+		if (!__i915_seqno_passed(seqno, rq->seqno))
 			break;
 
-		trace_i915_gem_request_retire(ring, request->seqno);
+		trace_i915_gem_request_retire(ring, rq->seqno);
 		/* We know the GPU must have read the request to have
 		 * sent us the seqno + interrupt, so use the position
 		 * of tail of the request to update the last known position
 		 * of the GPU head.
 		 */
-		ring->buffer->last_retired_head = request->tail;
+		ring->buffer->last_retired_head = rq->tail;
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	if (unlikely(ring->trace_irq_seqno &&
-		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
+		     __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
 		ring->irq_put(ring);
 		ring->trace_irq_seqno = 0;
 	}
@@ -2926,14 +3035,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
 static int
 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
-	int ret;
+	int i;
 
-	if (obj->active) {
-		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
+	if (!obj->active)
+		return 0;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct i915_gem_request *rq = obj->last_read[i].request;
+		int ret;
+
+		if (rq == NULL)
+			continue;
+
+		ret = i915_gem_check_olr(rq);
 		if (ret)
 			return ret;
 
-		i915_gem_retire_requests_ring(obj->ring);
+		i915_gem_retire_requests_ring(rq->ring);
 	}
 
 	return 0;
@@ -2967,11 +3085,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *ring = NULL;
 	struct timespec timeout_stack, *timeout = NULL;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
 	unsigned reset_counter;
-	u32 seqno = 0;
-	int ret = 0;
+	int i, n, ret = 0;
 
 	if (args->timeout_ns >= 0) {
 		timeout_stack = ns_to_timespec(args->timeout_ns);
@@ -2993,13 +3110,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (ret)
 		goto out;
 
-	if (obj->active) {
-		seqno = obj->last_read_seqno;
-		ring = obj->ring;
-	}
-
-	if (seqno == 0)
-		 goto out;
+	if (!obj->active)
+		goto out;
 
 	/* Do this after OLR check to make sure we make forward progress polling
 	 * on this IOCTL with a 0 timeout (like busy ioctl)
@@ -3009,11 +3121,25 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto out;
 	}
 
+	for (i = n = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+
 	drm_gem_object_unreference(&obj->base);
+
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
+	for (i = 0; i < n; i++) {
+		if (ret == 0)
+			ret = __wait_request(rq[i], reset_counter, true, timeout, file->driver_priv);
+
+		i915_request_put(rq[i]);
+	}
+
 	if (timeout)
 		args->timeout_ns = timespec_to_ns(timeout);
 	return ret;
@@ -3024,6 +3150,45 @@ out:
 	return ret;
 }
 
+static int
+i915_request_sync(struct i915_gem_request *rq,
+		  struct intel_engine_cs *to,
+		  struct drm_i915_gem_object *obj)
+{
+	int ret, idx;
+
+	if (to == NULL)
+		return i915_wait_request(rq);
+
+	/* XXX this is broken by VEBOX+ */
+	idx = intel_ring_sync_index(rq->ring, to);
+
+	/* Optimization: Avoid semaphore sync when we are sure we already
+	 * waited for an object with higher seqno */
+	if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
+		return 0;
+
+	ret = i915_gem_check_olr(rq);
+	if (ret)
+		return ret;
+
+	if (!i915_request_complete(rq, true)) {
+		trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
+		ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
+		if (ret)
+			return ret;
+	}
+
+	/* We must recheck last_reqad_request because sync_to()
+	 * might have just caused seqno wrap under
+	 * the radar.
+	 */
+	if (obj->last_read[rq->ring->id].request == rq)
+		rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
+
+	return 0;
+}
+
 /**
  * i915_gem_object_sync - sync an object to a ring.
  *
@@ -3038,44 +3203,35 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to)
+		     struct intel_engine_cs *to,
+		     bool readonly)
 {
-	struct intel_engine_cs *from = obj->ring;
-	u32 seqno;
-	int ret, idx;
+	struct i915_gem_request *rq;
+	struct intel_engine_cs *semaphore;
+	int ret = 0, i;
 
-	if (from == NULL || to == from)
-		return 0;
+	semaphore = NULL;
+	if (i915_semaphore_is_enabled(obj->base.dev))
+		semaphore = to;
 
-	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
-
-	/* XXX this is broken by VEBOX+ */
-	idx = intel_ring_sync_index(from, to);
-
-	seqno = obj->last_read_seqno;
-	/* Optimization: Avoid semaphore sync when we are sure we already
-	 * waited for an object with higher seqno */
-	if (seqno <= from->semaphore.sync_seqno[idx])
-		return 0;
-
-	ret = 0;
-	if (!i915_seqno_passed(from->get_seqno(from, true), seqno)) {
-		ret = i915_gem_check_olr(from, seqno);
-		if (ret)
-			return ret;
+	if (readonly) {
+		rq = obj->last_write.request;
+		if (rq != NULL && to != rq->ring)
+			ret = i915_request_sync(rq, semaphore, obj);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			rq = obj->last_read[i].request;
+			if (rq == NULL || to == rq->ring)
+				continue;
 
-		trace_i915_gem_ring_sync_to(from, to, seqno);
-		ret = to->semaphore.sync_to(to, from, seqno);
+			ret = i915_request_sync(rq, semaphore, obj);
+			if (ret)
+				break;
+		}
 	}
-	if (!ret)
-		/* We use last_read_seqno because sync_to()
-		 * might have just caused seqno wrap under
-		 * the radar.
-		 */
-		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
 
 	return ret;
+
 }
 
 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -3381,14 +3537,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 static int
 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 {
-	if (obj->last_fenced_seqno) {
-		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
-		if (ret)
-			return ret;
+	int ret;
 
-		obj->last_fenced_seqno = 0;
-	}
+	if (obj->last_fence.request == NULL)
+		return 0;
 
+	ret = i915_wait_request(obj->last_fence.request);
+	if (ret)
+		return ret;
+
+	i915_gem_object_retire__fence(obj);
 	return 0;
 }
 
@@ -3836,11 +3994,12 @@ int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
 	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	/* Not valid to be called on unbound objects. */
-	if (!i915_gem_obj_bound_any(obj))
+	if (vma == NULL)
 		return -EINVAL;
 
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
@@ -3882,14 +4041,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	if (!obj->active) {
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
-		if (vma)
-			list_move_tail(&vma->mm_list,
-				       &dev_priv->gtt.base.inactive_list);
-
-	}
-
+	list_move_tail(&vma->mm_list,
+		       &dev_priv->gtt.base.inactive_list);
 	return 0;
 }
 
@@ -4087,11 +4240,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	bool was_pin_display;
 	int ret;
 
-	if (pipelined != obj->ring) {
-		ret = i915_gem_object_sync(obj, pipelined);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_sync(obj, pipelined, true);
+	if (ret)
+		return ret;
 
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -4239,10 +4390,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
-	struct drm_i915_gem_request *request;
-	struct intel_engine_cs *ring = NULL;
+	struct i915_gem_request *rq;
 	unsigned reset_counter;
-	u32 seqno = 0;
 	int ret;
 
 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
@@ -4254,23 +4403,22 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 		return ret;
 
 	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
+	list_for_each_entry(rq, &file_priv->mm.request_list, client_list) {
+		if (time_after_eq(rq->emitted_jiffies, recent_enough))
 			break;
-
-		ring = request->ring;
-		seqno = request->seqno;
 	}
+	rq = i915_request_get(&rq->client_list == &file_priv->mm.request_list ? NULL : rq);
 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	spin_unlock(&file_priv->mm.lock);
 
-	if (seqno == 0)
+	if (rq == NULL)
 		return 0;
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
+	ret = __wait_request(rq, reset_counter, true, NULL, NULL);
 	if (ret == 0)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
+	i915_request_put(rq);
 	return ret;
 }
 
@@ -4488,7 +4636,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret;
+	int ret, i;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -4507,10 +4655,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 */
 	ret = i915_gem_object_flush_active(obj);
 
-	args->busy = obj->active;
-	if (obj->ring) {
+	args->busy = 0;
+	if (obj->active) {
 		BUILD_BUG_ON(I915_NUM_RINGS > 16);
-		args->busy |= intel_ring_flag(obj->ring) << 16;
+		args->busy |= 1;
+		for (i = 0; i < I915_NUM_RINGS; i++)  {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			args->busy |= 1 << (16 + i);
+		}
 	}
 
 	drm_gem_object_unreference(&obj->base);
@@ -4584,8 +4738,13 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
+	int i;
+
 	INIT_LIST_HEAD(&obj->global_list);
-	INIT_LIST_HEAD(&obj->ring_list);
+	INIT_LIST_HEAD(&obj->last_fence.ring_list);
+	INIT_LIST_HEAD(&obj->last_write.ring_list);
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		INIT_LIST_HEAD(&obj->last_read[i].ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 
@@ -5117,7 +5276,9 @@ i915_gem_lastclose(struct drm_device *dev)
 static void
 init_ring_lists(struct intel_engine_cs *ring)
 {
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
+	INIT_LIST_HEAD(&ring->fence_list);
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
@@ -5213,13 +5374,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	 */
 	spin_lock(&file_priv->mm.lock);
 	while (!list_empty(&file_priv->mm.request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&file_priv->mm.request_list,
-					   struct drm_i915_gem_request,
-					   client_list);
-		list_del(&request->client_list);
-		request->file_priv = NULL;
+		rq = list_first_entry(&file_priv->mm.request_list,
+				      struct i915_gem_request,
+				      client_list);
+		list_del(&rq->client_list);
+		rq->file_priv = NULL;
 	}
 	spin_unlock(&file_priv->mm.lock);
 }
@@ -5503,15 +5664,27 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
-	/* This WARN has probably outlived its usefulness (callers already
-	 * WARN if they don't find the GGTT vma they expect). When removing,
-	 * remember to remove the pre-check in is_pin_display() as well */
-	if (WARN_ON(list_empty(&obj->vma_list)))
-		return NULL;
-
 	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
 	if (vma->vm != obj_to_ggtt(obj))
 		return NULL;
 
 	return vma;
 }
+
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)
+{
+	u32 seqno = 0;
+	struct i915_gem_request *rq = NULL;
+	int i;
+
+	/* This is approximate as seqno cannot be used across rings */
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
+			rq = obj->last_read[i].request, seqno = rq->seqno;
+	}
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 79dc77b..690e2dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -394,13 +394,9 @@ void i915_gem_context_reset(struct drm_device *dev)
 		if (!lctx)
 			continue;
 
-		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
+		if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
 			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
 						      get_context_alignment(dev), 0));
-			/* Fake a finish/inactive */
-			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
-			dctx->legacy_hw_ctx.rcs_state->active = 0;
-		}
 
 		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
 			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
@@ -467,7 +463,6 @@ void i915_gem_context_fini(struct drm_device *dev)
 		WARN_ON(!dev_priv->ring[RCS].last_context);
 		if (dev_priv->ring[RCS].last_context == dctx) {
 			/* Fake switch to NULL context */
-			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
 			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
 			i915_gem_context_unreference(dctx);
 			dev_priv->ring[RCS].last_context = NULL;
@@ -741,8 +736,11 @@ static int do_switch(struct intel_engine_cs *ring,
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
+		struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
+
+		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
+
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -750,11 +748,10 @@ static int do_switch(struct intel_engine_cs *ring,
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
-		from->legacy_hw_ctx.rcs_state->dirty = 1;
-		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
+		from_obj->dirty = 1;
 
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
+		i915_gem_object_ggtt_unpin(from_obj);
 		i915_gem_context_unreference(from);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
index 57d4dde..787ea6f 100644
--- a/drivers/gpu/drm/i915/i915_gem_exec.c
+++ b/drivers/gpu/drm/i915/i915_gem_exec.c
@@ -45,7 +45,7 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	ret = i915_gem_object_sync(obj, ring);
+	ret = i915_gem_object_sync(obj, ring, false);
 	if (ret)
 		return ret;
 
@@ -65,11 +65,9 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
 static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
 				       struct intel_engine_cs *ring)
 {
-	obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
-	obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
-	obj->last_write_seqno = intel_ring_get_seqno(ring);
-	obj->dirty = 1;
+	obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
+	obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring, 0);
 
 	ring->gpu_caches_dirty = true;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0faab01..8f1c2a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -847,7 +847,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring);
+
+		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
 		if (ret)
 			return ret;
 
@@ -956,40 +957,20 @@ static void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 				   struct intel_engine_cs *ring)
 {
-	u32 seqno = intel_ring_get_seqno(ring);
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-		struct drm_i915_gem_object *obj = vma->obj;
-		u32 old_read = obj->base.read_domains;
-		u32 old_write = obj->base.write_domain;
-
-		obj->base.write_domain = obj->base.pending_write_domain;
-		if (obj->base.write_domain == 0)
-			obj->base.pending_read_domains |= obj->base.read_domains;
-		obj->base.read_domains = obj->base.pending_read_domains;
-
-		i915_vma_move_to_active(vma, ring);
-		if (obj->base.write_domain) {
-			obj->dirty = 1;
-			obj->last_write_seqno = seqno;
+		unsigned fenced;
 
-			intel_fb_obj_invalidate(obj, ring);
-
-			/* update for the implicit flush after a batch */
-			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-		}
+		fenced = 0;
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			obj->last_fenced_seqno = seqno;
-			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-				struct drm_i915_private *dev_priv = to_i915(ring->dev);
-				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
-					       &dev_priv->mm.fence_list);
-			}
+			fenced |= VMA_IS_FENCED;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+				fenced |= VMA_HAS_FENCE;
 		}
 
-		trace_i915_gem_object_change_domain(obj, old_read, old_write);
+		i915_vma_move_to_active(vma, ring, fenced);
 	}
 }
 
@@ -1003,7 +984,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, NULL);
+	(void)__i915_add_request(ring, file, obj);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index e60be3f..fc1223c 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
 	if (ret)
 		goto out;
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
 
-	ret = __i915_add_request(ring, NULL, so.obj, NULL);
+	ret = __i915_add_request(ring, NULL, so.obj);
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	render_state_fini(&so);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index af5d31a..e46fb34 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -326,7 +326,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 
 	if (ret == 0) {
 		obj->fence_dirty =
-			obj->last_fenced_seqno ||
+			obj->last_fence.request ||
 			obj->fence_reg != I915_FENCE_REG_NONE;
 		obj->tiling_mode = tiling_mode;
 		obj->stride = stride;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ebc8529..584b863 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -572,7 +572,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 	if (i915_gem_obj_bound(src, vm))
 		dst->gtt_offset = i915_gem_obj_offset(src, vm);
 	else
-		dst->gtt_offset = -1UL;
+		dst->gtt_offset = -1;
 
 	reloc_offset = dst->gtt_offset;
 	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
@@ -653,11 +653,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
-	err->rseqno = obj->last_read_seqno;
-	err->wseqno = obj->last_write_seqno;
+	err->rseqno = i915_request_seqno(rq);
+	err->wseqno = i915_request_seqno(obj->last_write.request);
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
@@ -671,7 +672,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
 	err->userptr = obj->userptr.mm != NULL;
-	err->ring = obj->ring ? obj->ring->id : -1;
+	err->ring = i915_request_ring_id(rq);
 	err->cache_level = obj->cache_level;
 }
 
@@ -963,7 +964,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	int i, count;
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
@@ -978,17 +979,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
 		i915_record_ring_state(dev, error, ring, &error->ring[i]);
 
-		request = i915_gem_find_active_request(ring);
-		if (request) {
+		rq = i915_gem_find_active_request(ring);
+		if (rq) {
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
 			 * by userspace.
 			 */
 			error->ring[i].batchbuffer =
 				i915_error_object_create(dev_priv,
-							 request->batch_obj,
-							 request->ctx ?
-							 request->ctx->vm :
+							 rq->batch_obj,
+							 rq->ctx ?
+							 rq->ctx->vm :
 							 &dev_priv->gtt.base);
 
 			if (HAS_BROKEN_CS_TLB(dev_priv))
@@ -996,11 +997,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
 					i915_error_ggtt_object_create(dev_priv,
 							     ring->scratch.obj);
 
-			if (request->file_priv) {
+			if (rq->file_priv) {
 				struct task_struct *task;
 
 				rcu_read_lock();
-				task = pid_task(request->file_priv->file->pid,
+				task = pid_task(rq->file_priv->file->pid,
 						PIDTYPE_PID);
 				if (task) {
 					strcpy(error->ring[i].comm, task->comm);
@@ -1019,7 +1020,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list)
+		list_for_each_entry(rq, &ring->request_list, list)
 			count++;
 
 		error->ring[i].num_requests = count;
@@ -1032,13 +1033,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		}
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			struct drm_i915_error_request *erq;
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
-			erq->jiffies = request->emitted_jiffies;
-			erq->tail = request->tail;
+			erq->seqno = rq->seqno;
+			erq->jiffies = rq->emitted_jiffies;
+			erq->tail = rq->tail;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 717c111..6d4f5a7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2935,14 +2935,14 @@ static u32
 ring_last_seqno(struct intel_engine_cs *ring)
 {
 	return list_entry(ring->request_list.prev,
-			  struct drm_i915_gem_request, list)->seqno;
+			  struct i915_gem_request, list)->seqno;
 }
 
 static bool
 ring_idle(struct intel_engine_cs *ring, u32 seqno)
 {
 	return (list_empty(&ring->request_list) ||
-		i915_seqno_passed(seqno, ring_last_seqno(ring)));
+		__i915_seqno_passed(seqno, ring_last_seqno(ring)));
 }
 
 static bool
@@ -3057,7 +3057,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
 	if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
 		return -1;
 
-	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
+	if (__i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
 		return 1;
 
 	/* cursory check for an unkickable deadlock */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 75f423d..f1c2a28 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -17,16 +17,16 @@ static bool gpu_active(struct drm_i915_private *i915)
 	int i;
 
 	for_each_ring(ring, i915, i) {
-		struct drm_i915_gem_request *rq;
+		struct i915_gem_request *rq;
 
 		if (list_empty(&ring->request_list))
 			continue;
 
 		rq = list_last_entry(&ring->request_list,
-				     struct drm_i915_gem_request,
+				     struct i915_gem_request,
 				     list);
 
-		if (i915_seqno_passed(ring->get_seqno(ring, true), rq->seqno))
+		if (i915_request_complete(rq, true))
 			continue;
 
 		return true;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 63f6875..0ebd85d 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -389,7 +389,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->seqno = intel_ring_get_seqno(ring),
+			   __entry->seqno = intel_ring_get_request(ring)->seqno,
 			   __entry->flags = flags;
 			   i915_trace_irq_get(ring, __entry->seqno);
 			   ),
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d828f47..9b7931c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9167,6 +9167,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
 	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
 
+	i915_request_put(work->flip_queued_request);
 	kfree(work);
 }
 
@@ -9548,7 +9549,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
 	else if (i915.use_mmio_flip > 0)
 		return true;
 	else
-		return ring != obj->ring;
+		return ring != i915_request_ring(obj->last_write.request);
 }
 
 static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -9581,25 +9582,22 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
 
 static int intel_postpone_flip(struct drm_i915_gem_object *obj)
 {
-	struct intel_engine_cs *ring;
+	struct i915_gem_request *rq = obj->last_write.request;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	if (!obj->last_write_seqno)
-		return 0;
-
-	ring = obj->ring;
-
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_write_seqno))
+	if (rq == NULL)
 		return 0;
 
-	ret = i915_gem_check_olr(ring, obj->last_write_seqno);
+	ret = i915_gem_check_olr(rq);
 	if (ret)
 		return ret;
 
-	if (WARN_ON(!ring->irq_get(ring)))
+	if (i915_request_complete(rq, true))
+		return 0;
+
+	if (WARN_ON(!rq->ring->irq_get(rq->ring)))
 		return 0;
 
 	return 1;
@@ -9625,7 +9623,7 @@ void intel_notify_mmio_flip(struct intel_engine_cs *ring)
 		if (ring->id != mmio_flip->ring_id)
 			continue;
 
-		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
+		if (__i915_seqno_passed(seqno, mmio_flip->seqno)) {
 			intel_do_mmio_flip(intel_crtc);
 			mmio_flip->seqno = 0;
 			ring->irq_put(ring);
@@ -9643,6 +9641,7 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct i915_gem_request *rq;
 	unsigned long irq_flags;
 	int ret;
 
@@ -9657,16 +9656,20 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
 		return 0;
 	}
 
+	rq = obj->last_write.request;
+	if (WARN_ON(rq == NULL))
+		return 0;
+
 	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
-	intel_crtc->mmio_flip.ring_id = obj->ring->id;
+	intel_crtc->mmio_flip.seqno = rq->seqno;
+	intel_crtc->mmio_flip.ring_id = rq->ring->id;
 	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
 
 	/*
 	 * Double check to catch cases where irq fired before
 	 * mmio flip data was ready
 	 */
-	intel_notify_mmio_flip(obj->ring);
+	intel_notify_mmio_flip(rq->ring);
 	return 0;
 }
 
@@ -9695,9 +9698,8 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev,
 		return false;
 
 	if (work->flip_ready_vblank == 0) {
-		if (work->ring &&
-		    !i915_seqno_passed(work->ring->get_seqno(work->ring, true),
-				      work->flip_queued_seqno))
+		struct i915_gem_request *rq = work->flip_queued_request;
+		if (rq && !i915_request_complete(rq, true))
 			return false;
 
 		work->flip_ready_vblank = drm_vblank_count(dev, intel_crtc->pipe);
@@ -9758,6 +9760,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	enum pipe pipe = intel_crtc->pipe;
 	struct intel_unpin_work *work;
 	struct intel_engine_cs *ring;
+	struct i915_gem_request *rq;
 	unsigned long flags;
 	int ret;
 
@@ -9856,7 +9859,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev)) {
 		ring = &dev_priv->ring[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		ring = obj->ring;
+		ring = i915_request_ring(obj->last_write.request);
 		if (ring == NULL || ring->id != RCS)
 			ring = &dev_priv->ring[BCS];
 	} else {
@@ -9864,7 +9867,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	}
 
 	if (use_mmio_flip(ring, obj, page_flip_flags)) {
-		ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+		ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(obj->last_write.request));
 		if (ret)
 			goto cleanup_pending;
 
@@ -9876,8 +9879,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		if (ret)
 			goto cleanup_unpin;
 
-		work->flip_queued_seqno = obj->last_write_seqno;
-		work->ring = obj->ring;
+		rq = obj->last_write.request;
 	} else {
 		ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
 		if (ret)
@@ -9891,10 +9893,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		if (ret)
 			goto cleanup_unpin;
 
-		work->flip_queued_seqno = intel_ring_get_seqno(ring);
-		work->ring = ring;
+		rq = intel_ring_get_request(ring);
 	}
 
+	work->flip_queued_request = i915_request_get(rq);
 	work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
 	work->enable_stall_check = true;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 274f77c..5f336a3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -657,14 +657,13 @@ struct intel_unpin_work {
 	struct drm_i915_gem_object *old_fb_obj;
 	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
-	struct intel_engine_cs *ring;
 	atomic_t pending;
 #define INTEL_FLIP_INACTIVE	0
 #define INTEL_FLIP_PENDING	1
 #define INTEL_FLIP_COMPLETE	2
 	u32 flip_count;
 	u32 gtt_offset;
-	u32 flip_queued_seqno;
+	struct i915_gem_request *flip_queued_request;
 	int flip_queued_vblank;
 	int flip_ready_vblank;
 	bool enable_stall_check;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index d94af27..c709ca5 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -183,7 +183,7 @@ struct intel_overlay {
 	u32 flip_addr;
 	struct drm_i915_gem_object *reg_bo;
 	/* flip handling */
-	uint32_t last_flip_req;
+	struct i915_gem_request *flip_request;
 	void (*flip_tail)(struct intel_overlay *);
 };
 
@@ -209,29 +209,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
 		io_mapping_unmap(regs);
 }
 
-static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
-					 void (*tail)(struct intel_overlay *))
+/* recover from an interruption due to a signal
+ * We have to be careful not to repeat work forever an make forward progess. */
+static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->last_flip_req);
-	ret = i915_add_request(ring, &overlay->last_flip_req);
-	if (ret)
-		return ret;
+	if (overlay->flip_request == NULL)
+		return 0;
 
-	overlay->flip_tail = tail;
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_request(overlay->flip_request);
 	if (ret)
 		return ret;
-	i915_gem_retire_requests(dev);
 
-	overlay->last_flip_req = 0;
+	i915_request_put(overlay->flip_request);
+	overlay->flip_request = NULL;
+
+	i915_gem_retire_requests(overlay->dev);
+
+	if (overlay->flip_tail)
+		overlay->flip_tail(overlay);
+
 	return 0;
 }
 
+static int intel_overlay_add_request(struct intel_overlay *overlay,
+				     struct intel_engine_cs *ring,
+				     void (*tail)(struct intel_overlay *))
+{
+	BUG_ON(overlay->flip_request);
+	overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
+	overlay->flip_tail = tail;
+
+	return i915_add_request(ring);
+}
+
+static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
+					 struct intel_engine_cs *ring,
+					 void (*tail)(struct intel_overlay *))
+{
+	intel_overlay_add_request(overlay, ring, tail);
+	return intel_overlay_recover_from_interrupt(overlay);
+}
+
 /* overlay needs to be disable in OCMD reg */
 static int intel_overlay_on(struct intel_overlay *overlay)
 {
@@ -253,9 +273,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return intel_overlay_do_wait_request(overlay, NULL);
+	return intel_overlay_do_wait_request(overlay, ring, NULL);
 }
 
 /* overlay needs to be enabled in OCMD reg */
@@ -285,15 +305,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 
 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	intel_ring_emit(ring, flip_addr);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return i915_add_request(ring, &overlay->last_flip_req);
+	return intel_overlay_add_request(overlay, ring, NULL);
 }
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
 
+	i915_gem_track_fb(obj, NULL,
+			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
+
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 
@@ -353,33 +376,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 		intel_ring_emit(ring, flip_addr);
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	}
-	intel_ring_advance(ring);
-
-	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
-}
-
-/* recover from an interruption due to a signal
- * We have to be careful not to repeat work forever an make forward progess. */
-static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
-{
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
-
-	if (overlay->last_flip_req == 0)
-		return 0;
+	__intel_ring_advance(ring);
 
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
-	if (ret)
-		return ret;
-	i915_gem_retire_requests(dev);
-
-	if (overlay->flip_tail)
-		overlay->flip_tail(overlay);
-
-	overlay->last_flip_req = 0;
-	return 0;
+	return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -388,10 +387,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
  */
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
+	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
+	int ret = 0;
 
 	/* Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
@@ -400,6 +397,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		return 0;
 
 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
+		struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+
 		/* synchronous slowpath */
 		ret = intel_ring_begin(ring, 2);
 		if (ret)
@@ -407,20 +406,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
+		__intel_ring_advance(ring);
 
-		ret = intel_overlay_do_wait_request(overlay,
+		ret = intel_overlay_do_wait_request(overlay, ring,
 						    intel_overlay_release_old_vid_tail);
-		if (ret)
-			return ret;
-	}
-
-	intel_overlay_release_old_vid_tail(overlay);
+	} else
+		intel_overlay_release_old_vid_tail(overlay);
 
-
-	i915_gem_track_fb(overlay->old_vid_bo, NULL,
-			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
-	return 0;
+	return ret;
 }
 
 struct put_image_params {
@@ -827,12 +820,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	iowrite32(0, &regs->OCMD);
 	intel_overlay_unmap_regs(overlay, regs);
 
-	ret = intel_overlay_off(overlay);
-	if (ret != 0)
-		return ret;
-
-	intel_overlay_off_tail(overlay);
-	return 0;
+	return intel_overlay_off(overlay);
 }
 
 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7c5a6c5..ae96de5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -726,7 +726,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 					   PIPE_CONTROL_FLUSH_ENABLE);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
 		intel_ring_emit(signaller, 0);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
@@ -763,7 +763,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, signaller->preallocated_request->seqno);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
 		intel_ring_emit(signaller, 0);
@@ -797,7 +797,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
 		if (mbox_reg != GEN6_NOSYNC) {
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+			intel_ring_emit(signaller, signaller->preallocated_request->seqno);
 		}
 	}
 
@@ -832,7 +832,7 @@ gen6_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -950,7 +950,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -969,7 +969,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, 0);
 	__intel_ring_advance(ring);
 
@@ -1224,7 +1224,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, ring->preallocated_request->seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -1602,7 +1602,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	}
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	ringbuf->size = 32 * PAGE_SIZE;
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
@@ -1662,8 +1663,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
 	intel_destroy_ringbuffer_obj(ringbuf);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	ring->preallocated_request = NULL;
 
 	if (ring->cleanup)
 		ring->cleanup(ring);
@@ -1679,8 +1679,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
+	struct i915_gem_request *rq;
 	int ret;
 
 	if (ringbuf->last_retired_head != -1) {
@@ -1692,17 +1691,15 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 			return 0;
 	}
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
-			seqno = request->seqno;
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (__ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
 			break;
-		}
 	}
 
-	if (seqno == 0)
+	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
 		return -ENOSPC;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_request(rq);
 	if (ret)
 		return ret;
 
@@ -1803,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 
 int intel_ring_idle(struct intel_engine_cs *ring)
 {
-	u32 seqno;
 	int ret;
 
 	/* We need to add any requests required to flush the objects and ring */
-	if (ring->outstanding_lazy_seqno) {
-		ret = i915_add_request(ring, NULL);
+	if (ring->preallocated_request) {
+		ret = i915_add_request(ring);
 		if (ret)
 			return ret;
 	}
@@ -1817,30 +1813,36 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 	if (list_empty(&ring->request_list))
 		return 0;
 
-	seqno = list_entry(ring->request_list.prev,
-			   struct drm_i915_gem_request,
-			   list)->seqno;
-
-	return i915_wait_seqno(ring, seqno);
+	return i915_wait_request(container_of(ring->request_list.prev,
+					      struct i915_gem_request,
+					      list));
 }
 
 static int
-intel_ring_alloc_seqno(struct intel_engine_cs *ring)
+intel_ring_alloc_request(struct intel_engine_cs *ring)
 {
-	if (ring->outstanding_lazy_seqno)
-		return 0;
+	struct i915_gem_request *rq;
+	int ret;
 
-	if (ring->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
+	if (ring->preallocated_request)
+		return 0;
 
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
+	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
+	if (rq == NULL)
+		return -ENOMEM;
 
-		ring->preallocated_lazy_request = request;
+	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
+	if (ret) {
+		kfree(rq);
+		return ret;
 	}
 
-	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
+	kref_init(&rq->kref);
+	rq->ring = ring;
+	rq->completed = false;
+
+	ring->preallocated_request = rq;
+	return 0;
 }
 
 static int __intel_ring_prepare(struct intel_engine_cs *ring,
@@ -1876,7 +1878,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
 		return ret;
 
 	/* Preallocate the olr before touching the ring, */
-	ret = intel_ring_alloc_seqno(ring);
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -1886,7 +1888,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
 		return ret;
 
 	/* but we may flush the seqno during prepare. */
-	ret = intel_ring_alloc_seqno(ring);
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -1921,7 +1923,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	BUG_ON(ring->outstanding_lazy_seqno);
+	BUG_ON(ring->preallocated_request);
 
 	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
@@ -2300,7 +2302,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 	ring->cleanup = render_ring_cleanup;
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 
 	ringbuf->size = size;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index dcd2e44..2a78051 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -222,7 +222,7 @@ struct  intel_engine_cs {
 	 *
 	 * A reference is held on the buffer while on this list.
 	 */
-	struct list_head active_list;
+	struct list_head read_list, write_list, fence_list;
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
@@ -233,8 +233,7 @@ struct  intel_engine_cs {
 	/**
 	 * Do we have some not yet emitted requests outstanding?
 	 */
-	struct drm_i915_gem_request *preallocated_lazy_request;
-	u32 outstanding_lazy_seqno;
+	struct i915_gem_request *preallocated_request;
 	bool gpu_caches_dirty;
 	bool fbc_dirty;
 
@@ -393,10 +392,10 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 	return ringbuf->tail;
 }
 
-static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
+static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
 {
-	BUG_ON(ring->outstanding_lazy_seqno == 0);
-	return ring->outstanding_lazy_seqno;
+	BUG_ON(ring->preallocated_request == 0);
+	return ring->preallocated_request;
 }
 
 static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-07-29 14:54 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-25 12:27 [PATCH] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
2014-07-28 16:24 ` Daniel Vetter
2014-07-28 20:44 ` Jesse Barnes
2014-07-29  6:53   ` Chris Wilson
2014-07-29  7:29   ` Chris Wilson
2014-07-29 10:41     ` Daniel Vetter
2014-07-29 14:54       ` Jesse Barnes
  -- strict thread matches above, loose matches on Subject: below --
2014-07-25 12:19 Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.