All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
@ 2014-08-12 19:05 Chris Wilson
  2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Chris Wilson @ 2014-08-12 19:05 UTC (permalink / raw)
  To: intel-gfx

The current error state harks back to the era of just a single VM. For
full-ppgtt, we capture every bo on every VM. It behoves us to then print
every bo for every VM, which we currently fail to do and so miss vital
information in the error state.

v2: Use the vma address rather than -1!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 80 ++++++++++++++++++++++++-----------
 2 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1bf2cea..e0dcd70 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -396,6 +396,7 @@ struct drm_i915_error_state {
 		pid_t pid;
 		char comm[TASK_COMM_LEN];
 	} ring[I915_NUM_RINGS];
+
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
@@ -414,6 +415,7 @@ struct drm_i915_error_state {
 	} **active_bo, **pinned_bo;
 
 	u32 *active_bo_count, *pinned_bo_count;
+	u32 vm_count;
 };
 
 struct intel_connector;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fc11ac6..35e70d5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	err_printf(m, "%s [%d]:\n", name, count);
+	err_printf(m, "  %s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "  %08x %8u %02x %02x %x %x",
+		err_printf(m, "    %08x %8u %02x %02x %x %x",
 			   err->gtt_offset,
 			   err->size,
 			   err->read_domains,
@@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		i915_ring_error_state(m, dev, &error->ring[i]);
 	}
 
-	if (error->active_bo)
+	for (i = 0; i < error->vm_count; i++) {
+		err_printf(m, "vm[%d]\n", i);
+
 		print_error_buffers(m, "Active",
-				    error->active_bo[0],
-				    error->active_bo_count[0]);
+				    error->active_bo[i],
+				    error->active_bo_count[i]);
 
-	if (error->pinned_bo)
 		print_error_buffers(m, "Pinned",
-				    error->pinned_bo[0],
-				    error->pinned_bo_count[0]);
+				    error->pinned_bo[i],
+				    error->pinned_bo_count[i]);
+	}
 
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		obj = error->ring[i].batchbuffer;
@@ -644,13 +646,15 @@ unwind:
 				       (src)->base.size>>PAGE_SHIFT)
 
 static void capture_bo(struct drm_i915_error_buffer *err,
-		       struct drm_i915_gem_object *obj)
+		       struct i915_vma *vma)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
+
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 	err->rseqno = obj->last_read_seqno;
 	err->wseqno = obj->last_write_seqno;
-	err->gtt_offset = i915_gem_obj_ggtt_offset(obj);
+	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
 	err->fence_reg = obj->fence_reg;
@@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 	int i = 0;
 
 	list_for_each_entry(vma, head, mm_list) {
-		capture_bo(err++, vma->obj);
+		capture_bo(err++, vma);
 		if (++i == count)
 			break;
 	}
@@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 }
 
 static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
-			     int count, struct list_head *head)
+			     int count, struct list_head *head,
+			     struct i915_address_space *vm)
 {
 	struct drm_i915_gem_object *obj;
-	int i = 0;
+	struct drm_i915_error_buffer * const first = err;
+	struct drm_i915_error_buffer * const last = err + count;
 
 	list_for_each_entry(obj, head, global_list) {
-		if (!i915_gem_obj_is_pinned(obj))
-			continue;
+		struct i915_vma *vma;
 
-		capture_bo(err++, obj);
-		if (++i == count)
+		if (err == last)
 			break;
+
+		list_for_each_entry(vma, &obj->vma_list, vma_link)
+			if (vma->vm == vm && vma->pin_count > 0) {
+				capture_bo(err++, vma);
+				break;
+			}
 	}
 
-	return i;
+	return err - first;
 }
 
 /* Generate a semi-unique error code. The code is not meant to have meaning, The
@@ -1053,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 	list_for_each_entry(vma, &vm->active_list, mm_list)
 		i++;
 	error->active_bo_count[ndx] = i;
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (i915_gem_obj_is_pinned(obj))
-			i++;
+
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		list_for_each_entry(vma, &obj->vma_list, vma_link)
+			if (vma->vm == vm && vma->pin_count > 0) {
+				i++;
+				break;
+			}
+	}
 	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
 
 	if (i) {
@@ -1074,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 		error->pinned_bo_count[ndx] =
 			capture_pinned_bo(pinned_bo,
 					  error->pinned_bo_count[ndx],
-					  &dev_priv->mm.bound_list);
+					  &dev_priv->mm.bound_list, vm);
 	error->active_bo[ndx] = active_bo;
 	error->pinned_bo[ndx] = pinned_bo;
 }
@@ -1095,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 	error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
 					 GFP_ATOMIC);
 
-	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
-		i915_gem_capture_vm(dev_priv, error, vm, i++);
+	if (error->active_bo == NULL ||
+	    error->pinned_bo == NULL ||
+	    error->active_bo_count == NULL ||
+	    error->pinned_bo_count == NULL) {
+		kfree(error->active_bo);
+		kfree(error->active_bo_count);
+		kfree(error->pinned_bo);
+		kfree(error->pinned_bo_count);
+
+		error->active_bo = NULL;
+		error->active_bo_count = NULL;
+		error->pinned_bo = NULL;
+		error->pinned_bo_count = NULL;
+	} else {
+		list_for_each_entry(vm, &dev_priv->vm_list, global_link)
+			i915_gem_capture_vm(dev_priv, error, vm, i++);
+
+		error->vm_count = cnt;
+	}
 }
 
 /* Capture all registers which don't fit into another category. */
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture
  2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
@ 2014-08-12 19:05 ` Chris Wilson
  2014-08-14 14:51   ` Mika Kuoppala
  2014-08-15 11:11   ` Mika Kuoppala
  2014-08-12 19:05 ` [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create() Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 20+ messages in thread
From: Chris Wilson @ 2014-08-12 19:05 UTC (permalink / raw)
  To: intel-gfx

For stolen pages, since it is verboten to access them directly on many
architectures, we have to read them through the GTT aperture. If they
are not accessible through the aperture, then we have to abort.

This was complicated by

commit 8b6124a633d8095b0c8364f585edff9c59568a96
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jan 30 14:38:16 2014 +0000

    drm/i915: Don't access snooped pages through the GTT (even for error capture)

and the desire to use stolen memory for ringbuffers, contexts and
batches in the future.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 50 ++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 35e70d5..6d280c07 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -561,10 +561,11 @@ static struct drm_i915_error_object *
 i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 			       struct drm_i915_gem_object *src,
 			       struct i915_address_space *vm,
-			       const int num_pages)
+			       int num_pages)
 {
 	struct drm_i915_error_object *dst;
-	int i;
+	bool use_ggtt;
+	int i = 0;
 	u32 reloc_offset;
 
 	if (src == NULL || src->pages == NULL)
@@ -574,8 +575,32 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 	if (dst == NULL)
 		return NULL;
 
-	reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
-	for (i = 0; i < num_pages; i++) {
+	dst->gtt_offset = i915_gem_obj_offset(src, vm);
+
+	reloc_offset = dst->gtt_offset;
+	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
+		    i915_is_ggtt(vm) &&
+		    src->has_global_gtt_mapping &&
+		    reloc_offset + num_pages * PAGE_SIZE <= dev_priv->gtt.mappable_end);
+
+	/* Cannot access stolen address directly, try to use the aperture */
+	if (src->stolen) {
+		use_ggtt = true;
+
+		if (!src->has_global_gtt_mapping)
+			goto unwind;
+
+		reloc_offset = i915_gem_obj_ggtt_offset(src);
+		if (reloc_offset + num_pages * PAGE_SIZE > dev_priv->gtt.mappable_end)
+			goto unwind;
+	}
+
+	/* Cannot access snooped pages through the aperture */
+	if (use_ggtt && src->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv->dev))
+		goto unwind;
+
+	dst->page_count = num_pages;
+	while (num_pages--) {
 		unsigned long flags;
 		void *d;
 
@@ -584,10 +609,7 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 			goto unwind;
 
 		local_irq_save(flags);
-		if (src->cache_level == I915_CACHE_NONE &&
-		    reloc_offset < dev_priv->gtt.mappable_end &&
-		    src->has_global_gtt_mapping &&
-		    i915_is_ggtt(vm)) {
+		if (use_ggtt) {
 			void __iomem *s;
 
 			/* Simply ignore tiling or any overlapping fence.
@@ -599,14 +621,6 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 						     reloc_offset);
 			memcpy_fromio(d, s, PAGE_SIZE);
 			io_mapping_unmap_atomic(s);
-		} else if (src->stolen) {
-			unsigned long offset;
-
-			offset = dev_priv->mm.stolen_base;
-			offset += src->stolen->start;
-			offset += i << PAGE_SHIFT;
-
-			memcpy_fromio(d, (void __iomem *) offset, PAGE_SIZE);
 		} else {
 			struct page *page;
 			void *s;
@@ -623,11 +637,9 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 		}
 		local_irq_restore(flags);
 
-		dst->pages[i] = d;
-
+		dst->pages[i++] = d;
 		reloc_offset += PAGE_SIZE;
 	}
-	dst->page_count = num_pages;
 
 	return dst;
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create()
  2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
  2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
@ 2014-08-12 19:05 ` Chris Wilson
  2014-08-15 18:07   ` Mika Kuoppala
  2014-08-12 19:05 ` [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang Chris Wilson
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2014-08-12 19:05 UTC (permalink / raw)
  To: intel-gfx

For cleanliness, i915_error_object_create() was written to handle the
NULL pointer in a central location. The macro that wrapped it and passed
it a num_pages to use, was not safe. As we now never limit the num_pages
to use (we did so at one point to only capture the first page of the
context), we can remove the redundant macro and be NULL safe again.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6d280c07..726e6b1 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -558,12 +558,12 @@ static void i915_error_state_free(struct kref *error_ref)
 }
 
 static struct drm_i915_error_object *
-i915_error_object_create_sized(struct drm_i915_private *dev_priv,
-			       struct drm_i915_gem_object *src,
-			       struct i915_address_space *vm,
-			       int num_pages)
+i915_error_object_create(struct drm_i915_private *dev_priv,
+			 struct drm_i915_gem_object *src,
+			 struct i915_address_space *vm)
 {
 	struct drm_i915_error_object *dst;
+	int num_pages;
 	bool use_ggtt;
 	int i = 0;
 	u32 reloc_offset;
@@ -571,6 +571,8 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 	if (src == NULL || src->pages == NULL)
 		return NULL;
 
+	num_pages = src->base.size >> PAGE_SHIFT;
+
 	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
 	if (dst == NULL)
 		return NULL;
@@ -649,13 +651,8 @@ unwind:
 	kfree(dst);
 	return NULL;
 }
-#define i915_error_object_create(dev_priv, src, vm) \
-	i915_error_object_create_sized((dev_priv), (src), (vm), \
-				       (src)->base.size>>PAGE_SHIFT)
-
 #define i915_error_ggtt_object_create(dev_priv, src) \
-	i915_error_object_create_sized((dev_priv), (src), &(dev_priv)->gtt.base, \
-				       (src)->base.size>>PAGE_SHIFT)
+	i915_error_object_create((dev_priv), (src), &(dev_priv)->gtt.base)
 
 static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
@@ -1004,8 +1001,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 							 request->batch_obj,
 							 vm);
 
-			if (HAS_BROKEN_CS_TLB(dev_priv->dev) &&
-			    ring->scratch.obj)
+			if (HAS_BROKEN_CS_TLB(dev_priv->dev))
 				error->ring[i].wa_batchbuffer =
 					i915_error_ggtt_object_create(dev_priv,
 							     ring->scratch.obj);
@@ -1027,9 +1023,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		error->ring[i].ringbuffer =
 			i915_error_ggtt_object_create(dev_priv, ring->buffer->obj);
 
-		if (ring->status_page.obj)
-			error->ring[i].hws_page =
-				i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
+		error->ring[i].hws_page =
+			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
 
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang
  2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
  2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
  2014-08-12 19:05 ` [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create() Chris Wilson
@ 2014-08-12 19:05 ` Chris Wilson
  2014-08-15 18:09   ` Mika Kuoppala
  2014-08-12 19:05 ` [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
  2014-08-13 14:50 ` [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Mika Kuoppala
  4 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2014-08-12 19:05 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 726e6b1..1e05414 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -577,7 +577,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 	if (dst == NULL)
 		return NULL;
 
-	dst->gtt_offset = i915_gem_obj_offset(src, vm);
+	if (i915_gem_obj_bound(src, vm))
+		dst->gtt_offset = i915_gem_obj_offset(src, vm);
+	else
+		dst->gtt_offset = -1;
 
 	reloc_offset = dst->gtt_offset;
 	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects
  2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
                   ` (2 preceding siblings ...)
  2014-08-12 19:05 ` [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang Chris Wilson
@ 2014-08-12 19:05 ` Chris Wilson
  2014-08-27  9:55   ` Daniel Vetter
  2014-08-13 14:50 ` [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Mika Kuoppala
  4 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2014-08-12 19:05 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Kukanova, Svetlana, Brad Volkin

At the heart of this change is that the seqno is a too low level of an
abstraction to handle the growing complexities of command tracking, both
with the introduction of multiple command queues with execbuffer and the
potential for reordering with a scheduler. On top of the seqno we have
the request. Conceptually this is just a fence, but it also has
substantial bookkeeping of its own in order to track the context and
batch in flight, for example. It is the central structure upon which we
can extend with dependency tracking et al.

As regards the objects, they were using the seqno as a simple fence,
upon which is check or even wait upon for command completion. This patch
exchanges that seqno/ring pair with the request itself. For the
majority, lifetime of the request is ordered by how we retire objects
then requests. However, both the unlocked waits and probing elsewhere do
not tie into the normal request lifetimes and so we need to introduce a
kref. Extending the objects to use the request as the fence naturally
extends to segregrating read/write fence tracking. This has significance
for it reduces the number of semaphores we need to emit, reducing the
likelihood of #54226, and improving performance overall.

v2: Rebase and split out the othogonal tweaks.

A silly happened with this patch. It seemed to nullify our earlier
seqno-vs-interrupt w/a. I could not spot why, but gen6+ started to fail
with missed interrupts (a good test of our robustness handling). So I
ripped out the existing ACTHD read and replaced it with a RING_HEAD to
manually check whether the request is complete. That also had the nice
consequence of forcing __wait_request() to being the central arbiter of
request completion.

The keener eyed reviewr will also spot that the reset_counter is moved
into the request simplifing __wait_request() callsites and reducing the
number of atomic reads by virtue of moving the check for a pending GPU
reset to the endpoints of GPU access.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Brad Volkin <bradley.d.volkin@intel.com>
Cc: "Kukanova, Svetlana" <svetlana.kukanova@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c          |  29 +-
 drivers/gpu/drm/i915/i915_dma.c              |   2 +
 drivers/gpu/drm/i915/i915_drv.h              | 121 ++--
 drivers/gpu/drm/i915/i915_gem.c              | 850 +++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c        |  36 +-
 drivers/gpu/drm/i915/i915_irq.c              |  28 +-
 drivers/gpu/drm/i915/i915_trace.h            |   4 +-
 drivers/gpu/drm/i915/intel_display.c         | 151 ++---
 drivers/gpu/drm/i915/intel_drv.h             |   8 +-
 drivers/gpu/drm/i915/intel_lrc.c             | 115 +---
 drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 164 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  19 +-
 17 files changed, 922 insertions(+), 786 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d42db6b..604a73a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
@@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
 		   obj->base.write_domain,
-		   obj->last_read_seqno,
-		   obj->last_write_seqno,
-		   obj->last_fenced_seqno,
+		   i915_request_seqno(rq),
+		   i915_request_seqno(obj->last_write.request),
+		   i915_request_seqno(obj->last_fence.request),
 		   i915_cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		*t = '\0';
 		seq_printf(m, " (%s mappable)", s);
 	}
-	if (obj->ring != NULL)
-		seq_printf(m, " (%s)", obj->ring->name);
+	if (rq)
+		seq_printf(m, " (%s)", rq->ring->name);
 	if (obj->frontbuffer_bits)
 		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 			if (ppgtt->file_priv != stats->file_priv)
 				continue;
 
-			if (obj->ring) /* XXX per-vma statistic */
+			if (obj->active) /* XXX per-vma statistic */
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	} else {
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			stats->global += obj->base.size;
-			if (obj->ring)
+			if (obj->active)
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -574,7 +575,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_i915_gem_request *gem_request;
+	struct i915_gem_request *rq;
 	int ret, count, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -587,12 +588,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 			continue;
 
 		seq_printf(m, "%s requests:\n", ring->name);
-		list_for_each_entry(gem_request,
-				    &ring->request_list,
-				    list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			seq_printf(m, "    %d @ %d\n",
-				   gem_request->seqno,
-				   (int) (jiffies - gem_request->emitted_jiffies));
+				   rq->seqno,
+				   (int)(jiffies - rq->emitted_jiffies));
 		}
 		count++;
 	}
@@ -609,7 +608,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
 {
 	if (ring->get_seqno) {
 		seq_printf(m, "Current sequence (%s): %u\n",
-			   ring->name, ring->get_seqno(ring, false));
+			   ring->name, ring->get_seqno(ring));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 04dd611..ba7f15c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1598,6 +1598,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	/* For the ugly agnostic INTEL_INFO macro */
 	BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev));
 
+	BUILD_BUG_ON(I915_NUM_RINGS >= (1 << I915_NUM_RING_BITS));
+
 	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e0dcd70..c3563a0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -191,6 +191,7 @@ enum hpd_pin {
 
 struct drm_i915_private;
 struct i915_mmu_object;
+struct i915_gem_request;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -1740,16 +1741,15 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	struct list_head ring_list;
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
+	 * rendering and so a submitted request), and is not set if it is on
+	 * inactive (ready to be unbound) list. We track activity per engine.
 	 */
-	unsigned int active:1;
+	unsigned int active:I915_NUM_RING_BITS;
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -1817,13 +1817,11 @@ struct drm_i915_gem_object {
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
-	struct intel_engine_cs *ring;
-
-	/** Breadcrumb of last rendering to the buffer. */
-	uint32_t last_read_seqno;
-	uint32_t last_write_seqno;
-	/** Breadcrumb of last fenced GPU access to the buffer. */
-	uint32_t last_fenced_seqno;
+	/** Breadcrumbs of last rendering to the buffer. */
+	struct {
+		struct i915_gem_request *request;
+		struct list_head ring_list;
+	} last_write, last_read[I915_NUM_RINGS], last_fence;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	uint32_t stride;
@@ -1856,6 +1854,8 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
+
 void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits);
@@ -1870,10 +1870,14 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  * sequence-number comparisons on buffer last_rendering_seqnos, and associate
  * an emission time with seqnos for tracking how far ahead of the GPU we are.
  */
-struct drm_i915_gem_request {
+struct i915_gem_request {
+	struct kref kref;
+
 	/** On Which ring this request was generated */
 	struct intel_engine_cs *ring;
 
+	unsigned reset_counter;
+
 	/** GEM sequence number associated with this request. */
 	uint32_t seqno;
 
@@ -1898,8 +1902,64 @@ struct drm_i915_gem_request {
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_list;
+
+	bool completed:1;
 };
 
+static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring : NULL;
+}
+
+static inline int i915_request_ring_id(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring->id : -1;
+}
+
+static inline u32 i915_request_seqno(struct i915_gem_request *rq)
+{
+	return rq ? rq->seqno : 0;
+}
+
+/**
+ * Returns true if seq1 is later than seq2.
+ */
+static inline bool
+__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+{
+	return (int32_t)(seq1 - seq2) >= 0;
+}
+
+static inline bool
+i915_request_complete(struct i915_gem_request *rq)
+{
+	if (!rq->completed &&
+	    __i915_seqno_passed(rq->ring->get_seqno(rq->ring),
+				rq->seqno))
+		rq->completed = true;
+	return rq->completed;
+}
+
+static inline struct i915_gem_request *
+i915_request_get(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_get(&rq->kref);
+	return rq;
+}
+
+void __i915_request_free(struct kref *kref);
+
+struct i915_gem_request *i915_gem_seqno_to_request(struct intel_engine_cs *ring,
+						   u32 seqno);
+
+static inline void
+i915_request_put(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_put(&rq->kref, __i915_request_free);
+}
+
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
 	struct drm_file *file;
@@ -2368,22 +2428,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to);
+			 struct intel_engine_cs *to,
+			 bool readonly);
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring);
+			     struct intel_engine_cs *ring,
+			     unsigned fenced);
+#define VMA_IS_FENCED 0x1
+#define VMA_HAS_FENCE 0x2
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
-/**
- * Returns true if seq1 is later than seq2.
- */
-static inline bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
-{
-	return (int32_t)(seq1 - seq2) >= 0;
-}
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
@@ -2393,14 +2449,15 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
 void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
-				      bool interruptible);
-int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
+				      bool interruptible,
+				      unsigned *reset_counter);
+int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
 
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
@@ -2443,12 +2500,12 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *batch_obj,
-		       u32 *seqno);
-#define i915_add_request(ring, seqno) \
-	__i915_add_request(ring, NULL, NULL, seqno)
-int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
-				 uint32_t seqno);
+		       struct drm_i915_gem_object *batch_obj);
+#define i915_add_request(ring) \
+	__i915_add_request(ring, NULL, NULL)
+int __must_check i915_wait_request(struct i915_gem_request *rq);
+int __i915_request_wait(struct i915_gem_request *rq,
+			bool interruptible);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
@@ -2776,8 +2833,6 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
-void intel_notify_mmio_flip(struct intel_engine_cs *ring);
-
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c2f0b8..9c8c881 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -44,9 +44,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
-
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
@@ -108,6 +105,85 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
+{
+	intel_fb_obj_flush(obj, true);
+	obj->last_write.request = NULL;
+	list_del_init(&obj->last_write.ring_list);
+}
+
+static void
+i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
+{
+	obj->last_fence.request = NULL;
+	list_del_init(&obj->last_fence.ring_list);
+}
+
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
+			     struct intel_engine_cs *ring)
+{
+	struct i915_vma *vma;
+
+	BUG_ON(obj->active == 0);
+	BUG_ON(obj->base.write_domain);
+
+	obj->last_read[ring->id].request = NULL;
+	list_del_init(&obj->last_read[ring->id].ring_list);
+
+	if (--obj->active)
+		return;
+
+	BUG_ON(obj->last_write.request);
+	BUG_ON(obj->last_fence.request);
+
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (!list_empty(&vma->mm_list))
+			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
+	}
+
+	drm_gem_object_unreference(&obj->base);
+
+	WARN_ON(i915_verify_lists(dev));
+}
+
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_request *rq;
+	int i;
+
+	if (!obj->active)
+		return;
+
+	rq = obj->last_write.request;
+	if (rq && i915_request_complete(rq))
+		i915_gem_object_retire__write(obj);
+
+	rq = obj->last_fence.request;
+	if (rq && i915_request_complete(rq))
+		i915_gem_object_retire__fence(obj);
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		rq = obj->last_read[i].request;
+		if (rq && i915_request_complete(rq)) {
+			/* Although we just checked these above, the hardware
+			 * may have just completed them in the interval and
+			 * to keep the request lifetimes correct, we must
+			 * retire write/fence before read.
+			 */
+			if (i915_request_ring_id(obj->last_write.request) == i)
+				i915_gem_object_retire__write(obj);
+
+			if (i915_request_ring_id(obj->last_fence.request) == i)
+				i915_gem_object_retire__fence(obj);
+
+			i915_gem_object_retire__read(obj, rq->ring);
+		}
+	}
+}
+
 static int
 i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
@@ -1073,9 +1149,12 @@ unlock:
 
 int
 i915_gem_check_wedge(struct i915_gpu_error *error,
-		     bool interruptible)
+		     bool interruptible,
+		     unsigned *reset_counter)
 {
-	if (i915_reset_in_progress(error)) {
+	unsigned wedge = atomic_read(&error->reset_counter);
+
+	if (wedge & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)) {
 		/* Non-interruptible callers can't handle -EAGAIN, hence return
 		 * -EIO unconditionally for these. */
 		if (!interruptible)
@@ -1088,6 +1167,10 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
 		return -EAGAIN;
 	}
 
+	if (*reset_counter && *reset_counter != wedge)
+		return -EAGAIN;
+
+	*reset_counter = wedge;
 	return 0;
 }
 
@@ -1096,15 +1179,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
  * equal.
  */
 int
-i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
+i915_gem_check_olr(struct i915_gem_request *rq)
 {
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
 
 	ret = 0;
-	if (seqno == ring->outstanding_lazy_seqno)
-		ret = i915_add_request(ring, NULL);
+	if (rq == rq->ring->preallocated_request)
+		ret = i915_add_request(rq->ring);
 
 	return ret;
 }
@@ -1128,32 +1211,50 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
 	return !atomic_xchg(&file_priv->rps_wait_boost, true);
 }
 
+static bool __i915_request_complete__wa(struct i915_gem_request *rq)
+{
+	struct intel_engine_cs *ring = rq->ring;
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	unsigned head, tail;
+
+	if (i915_request_complete(rq))
+		return true;
+
+	/* Sadly not all architectures are coherent wrt to the seqno
+	 * write being visible before the CPU is woken up by the
+	 * interrupt. In order to avoid going to sleep without seeing
+	 * the last seqno and never waking up again, we explicity check
+	 * whether the ring has advanced past our request. The uncached
+	 * register read (which requires waking the GT up) is pure brute
+	 * force, and only just enough.
+	 */
+	head = __intel_ring_space(I915_READ_HEAD(ring) & HEAD_ADDR,
+				  ring->buffer->tail, ring->buffer->size);
+	tail = __intel_ring_space(rq->tail,
+				  ring->buffer->tail, ring->buffer->size);
+	if (head >= tail)
+		rq->completed = true;
+
+	return rq->completed;
+}
+
 /**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
- * @reset_counter: reset sequence associated with the given seqno
+ * __wait_request - wait until execution of request has finished
+ * @request: the request to wait upon
  * @interruptible: do an interruptible wait (normally yes)
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
  *
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
- *
- * Returns 0 if the seqno was found within the alloted time. Else returns the
+ * Returns 0 if the request was completed within the alloted time. Else returns the
  * errno with remaining time filled in timeout argument.
  */
-static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
-			unsigned reset_counter,
-			bool interruptible,
-			struct timespec *timeout,
-			struct drm_i915_file_private *file_priv)
+static int __wait_request(struct i915_gem_request *rq,
+			  bool interruptible,
+			  struct timespec *timeout,
+			  struct drm_i915_file_private *file_priv)
 {
+	struct intel_engine_cs *ring = rq->ring;
 	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	const bool irq_test_in_progress =
 		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
 	struct timespec before, now;
@@ -1163,7 +1264,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 
 	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
+	if (i915_request_complete(rq))
 		return 0;
 
 	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
@@ -1180,7 +1281,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 		return -ENODEV;
 
 	/* Record current time in case interrupted by signal, or wedged */
-	trace_i915_gem_request_wait_begin(ring, seqno);
+	trace_i915_gem_request_wait_begin(ring, rq->seqno);
 	getrawmonotonic(&before);
 	for (;;) {
 		struct timer_list timer;
@@ -1190,19 +1291,12 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 
 		/* We need to check whether any gpu reset happened in between
 		 * the caller grabbing the seqno and now ... */
-		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
-			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
-			 * is truely gone. */
-			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-			if (ret == 0)
-				ret = -EAGAIN;
+		ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible, &rq->reset_counter);
+		if (ret)
 			break;
-		}
 
-		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
-			ret = 0;
+		if (__i915_request_complete__wa(rq))
 			break;
-		}
 
 		if (interruptible && signal_pending(current)) {
 			ret = -ERESTARTSYS;
@@ -1231,7 +1325,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 		}
 	}
 	getrawmonotonic(&now);
-	trace_i915_gem_request_wait_end(ring, seqno);
+	trace_i915_gem_request_wait_end(ring, rq->seqno);
 
 	if (!irq_test_in_progress)
 		ring->irq_put(ring);
@@ -1253,46 +1347,28 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
  * request and object lists appropriately for that event.
  */
 int
-i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
+i915_wait_request(struct i915_gem_request *rq)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	bool interruptible = dev_priv->mm.interruptible;
+	struct drm_device *dev = rq->ring->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(seqno == 0);
-
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-	if (ret)
-		return ret;
+	if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
+		return -EINVAL;
 
-	ret = i915_gem_check_olr(ring, seqno);
+	ret = i915_gem_check_olr(rq);
 	if (ret)
 		return ret;
 
-	return __wait_seqno(ring, seqno,
-			    atomic_read(&dev_priv->gpu_error.reset_counter),
-			    interruptible, NULL, NULL);
+	return __wait_request(rq, dev_priv->mm.interruptible,
+			      NULL, NULL);
 }
 
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
-				     struct intel_engine_cs *ring)
+int
+__i915_request_wait(struct i915_gem_request *rq,
+		    bool interruptible)
 {
-	if (!obj->active)
-		return 0;
-
-	/* Manually manage the write flush as we may have not yet
-	 * retired the buffer.
-	 *
-	 * Note that the last_write_seqno is always the earlier of
-	 * the two (read/write) seqno, so if we haved successfully waited,
-	 * we know we have passed the last write.
-	 */
-	obj->last_write_seqno = 0;
-
-	return 0;
+	return __wait_request(rq, interruptible, NULL, NULL);
 }
 
 /**
@@ -1303,19 +1379,27 @@ static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
-	struct intel_engine_cs *ring = obj->ring;
-	u32 seqno;
-	int ret;
+	int i, ret;
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
+	if (readonly) {
+		if (obj->last_write.request == NULL)
+			return 0;
 
-	ret = i915_wait_seqno(ring, seqno);
-	if (ret)
-		return ret;
+		ret = i915_wait_request(obj->last_write.request);
+		if (ret)
+			return ret;
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			if (obj->last_read[i].request == NULL)
+				continue;
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+			ret = i915_wait_request(obj->last_read[i].request);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1328,34 +1412,42 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = obj->ring;
-	unsigned reset_counter;
-	u32 seqno;
-	int ret;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
+	int i, n, ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
+	n = 0;
+	if (readonly) {
+		if (obj->last_write.request)
+			rq[n++] = i915_request_get(obj->last_write.request);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++)
+			if (obj->last_read[i].request)
+				rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+	if (n == 0)
 		return 0;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_check_olr(ring, seqno);
-	if (ret)
-		return ret;
+	for (i = 0; i < n; i++) {
+		ret = i915_gem_check_olr(rq[i]);
+		if (ret)
+			goto out;
+	}
 
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
+
+	for (i = 0; ret == 0 && i < n; i++)
+		ret = __wait_request(rq[i], true, NULL, file_priv);
+
 	mutex_lock(&dev->struct_mutex);
-	if (ret)
-		return ret;
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+out:
+	for (i = 0; i < n; i++)
+		i915_request_put(rq[i]);
+
+	return ret;
 }
 
 /**
@@ -2157,81 +2249,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *ring)
-{
-	u32 seqno = intel_ring_get_seqno(ring);
-
-	BUG_ON(ring == NULL);
-	if (obj->ring != ring && obj->last_write_seqno) {
-		/* Keep the seqno relative to the current ring */
-		obj->last_write_seqno = seqno;
-	}
-	obj->ring = ring;
-
-	/* Add a reference if we're newly entering the active list. */
-	if (!obj->active) {
-		drm_gem_object_reference(&obj->base);
-		obj->active = 1;
-	}
-
-	list_move_tail(&obj->ring_list, &ring->active_list);
-
-	obj->last_read_seqno = seqno;
-}
-
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring)
+			     struct intel_engine_cs *ring,
+			     unsigned fenced)
 {
-	list_move_tail(&vma->mm_list, &vma->vm->active_list);
-	return i915_gem_object_move_to_active(vma->obj, ring);
-}
-
-static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct i915_address_space *vm;
-	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = intel_ring_get_request(ring);
+	u32 old_read = obj->base.read_domains;
+	u32 old_write = obj->base.write_domain;
 
-	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
-	BUG_ON(!obj->active);
+	BUG_ON(rq == NULL);
 
-	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
-		vma = i915_gem_obj_to_vma(obj, vm);
-		if (vma && !list_empty(&vma->mm_list))
-			list_move_tail(&vma->mm_list, &vm->inactive_list);
-	}
-
-	intel_fb_obj_flush(obj, true);
+	obj->base.write_domain = obj->base.pending_write_domain;
+	if (obj->base.write_domain == 0)
+		obj->base.pending_read_domains |= obj->base.read_domains;
+	obj->base.read_domains = obj->base.pending_read_domains;
 
-	list_del_init(&obj->ring_list);
-	obj->ring = NULL;
+	obj->base.pending_read_domains = 0;
+	obj->base.pending_write_domain = 0;
 
-	obj->last_read_seqno = 0;
-	obj->last_write_seqno = 0;
-	obj->base.write_domain = 0;
+	trace_i915_gem_object_change_domain(obj, old_read, old_write);
+	if (obj->base.read_domains == 0)
+		return;
 
-	obj->last_fenced_seqno = 0;
+	/* Add a reference if we're newly entering the active list. */
+	if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
+		drm_gem_object_reference(&obj->base);
 
-	obj->active = 0;
-	drm_gem_object_unreference(&obj->base);
+	obj->last_read[ring->id].request = rq;
+	list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
 
-	WARN_ON(i915_verify_lists(dev));
-}
+	if (obj->base.write_domain) {
+		obj->dirty = 1;
+		obj->last_write.request = rq;
+		list_move_tail(&obj->last_write.ring_list, &ring->write_list);
+		intel_fb_obj_invalidate(obj, ring);
 
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *ring = obj->ring;
+		/* update for the implicit flush after a batch */
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
 
-	if (ring == NULL)
-		return;
+	if (fenced & VMA_IS_FENCED) {
+		obj->last_fence.request = rq;
+		list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
+		if (fenced & VMA_HAS_FENCE) {
+			struct drm_i915_private *dev_priv = to_i915(ring->dev);
+			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+					&dev_priv->mm.fence_list);
+		}
+	}
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_read_seqno))
-		i915_gem_object_move_to_inactive(obj);
+	list_move_tail(&vma->mm_list, &vma->vm->active_list);
 }
 
 static int
@@ -2306,11 +2374,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *obj,
-		       u32 *out_seqno)
+		       struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	u32 request_ring_position, request_start;
 	int ret;
 
@@ -2326,10 +2393,16 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request = ring->preallocated_lazy_request;
-	if (WARN_ON(request == NULL))
+	rq = ring->preallocated_request;
+	if (WARN_ON(rq == NULL))
 		return -ENOMEM;
 
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible,
+				   &rq->reset_counter);
+	if (ret)
+		return ret;
+
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -2341,10 +2414,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request->seqno = intel_ring_get_seqno(ring);
-	request->ring = ring;
-	request->head = request_start;
-	request->tail = request_ring_position;
+	rq->head = request_start;
+	rq->tail = request_ring_position;
 
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2352,32 +2423,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
-	request->batch_obj = obj;
+	rq->batch_obj = obj;
 
 	/* Hold a reference to the current context so that we can inspect
 	 * it later in case a hangcheck error event fires.
 	 */
-	request->ctx = ring->last_context;
-	if (request->ctx)
-		i915_gem_context_reference(request->ctx);
+	rq->ctx = ring->last_context;
+	if (rq->ctx)
+		i915_gem_context_reference(rq->ctx);
 
-	request->emitted_jiffies = jiffies;
-	list_add_tail(&request->list, &ring->request_list);
-	request->file_priv = NULL;
+	rq->emitted_jiffies = jiffies;
+	list_add_tail(&rq->list, &ring->request_list);
+	rq->file_priv = NULL;
 
 	if (file) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
 
 		spin_lock(&file_priv->mm.lock);
-		request->file_priv = file_priv;
-		list_add_tail(&request->client_list,
+		rq->file_priv = file_priv;
+		list_add_tail(&rq->client_list,
 			      &file_priv->mm.request_list);
 		spin_unlock(&file_priv->mm.lock);
 	}
 
-	trace_i915_gem_request_add(ring, request->seqno);
-	ring->outstanding_lazy_seqno = 0;
-	ring->preallocated_lazy_request = NULL;
+	trace_i915_gem_request_add(ring, rq->seqno);
+	ring->preallocated_request = NULL;
 
 	if (!dev_priv->ums.mm_suspended) {
 		i915_queue_hangcheck(ring->dev);
@@ -2389,22 +2459,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
 		intel_mark_busy(dev_priv->dev);
 	}
 
-	if (out_seqno)
-		*out_seqno = request->seqno;
 	return 0;
 }
 
 static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+i915_gem_request_remove_from_client(struct i915_gem_request *rq)
 {
-	struct drm_i915_file_private *file_priv = request->file_priv;
+	struct drm_i915_file_private *file_priv = rq->file_priv;
 
 	if (!file_priv)
 		return;
 
 	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
+	list_del(&rq->client_list);
+	rq->file_priv = NULL;
 	spin_unlock(&file_priv->mm.lock);
 }
 
@@ -2452,30 +2520,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
+void __i915_request_free(struct kref *kref)
 {
-	list_del(&request->list);
-	i915_gem_request_remove_from_client(request);
+	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
+	kfree(rq);
+}
 
-	if (request->ctx)
-		i915_gem_context_unreference(request->ctx);
+static void i915_request_retire(struct i915_gem_request *rq)
+{
+	rq->completed = true;
 
-	kfree(request);
+	list_del(&rq->list);
+	i915_gem_request_remove_from_client(rq);
+
+	if (rq->ctx) {
+		i915_gem_context_unreference(rq->ctx);
+		rq->ctx = NULL;
+	}
+
+	i915_request_put(rq);
 }
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
-	u32 completed_seqno;
-
-	completed_seqno = ring->get_seqno(ring, false);
+	struct i915_gem_request *rq;
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (i915_seqno_passed(completed_seqno, request->seqno))
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (i915_request_complete(rq))
 			continue;
 
-		return request;
+		return rq;
 	}
 
 	return NULL;
@@ -2484,33 +2559,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
 				       struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	bool ring_hung;
 
-	request = i915_gem_find_active_request(ring);
+	rq = i915_gem_find_active_request(ring);
 
-	if (request == NULL)
+	if (rq == NULL)
 		return;
 
 	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 
-	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
+	i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
 
-	list_for_each_entry_continue(request, &ring->request_list, list)
-		i915_set_reset_status(dev_priv, request->ctx, false);
+	list_for_each_entry_continue(rq, &ring->request_list, list)
+		i915_set_reset_status(dev_priv, rq->ctx, false);
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 					struct intel_engine_cs *ring)
 {
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
+		obj = list_first_entry(&ring->write_list,
 				       struct drm_i915_gem_object,
-				       ring_list);
+				       last_write.ring_list);
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
+
+		i915_gem_object_retire__fence(obj);
+	}
+
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		i915_gem_object_retire__read(obj, ring);
 	}
 
 	/*
@@ -2521,19 +2616,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	 * the request.
 	 */
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	/* These may not have been flush before the reset, do so now */
-	kfree(ring->preallocated_lazy_request);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	kfree(ring->preallocated_request);
+	ring->preallocated_request = NULL;
 }
 
 void i915_gem_restore_fences(struct drm_device *dev)
@@ -2592,49 +2686,77 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 
 	WARN_ON(i915_verify_lists(ring->dev));
 
-	seqno = ring->get_seqno(ring, true);
+	seqno = ring->get_seqno(ring);
 
 	/* Move any buffers on the active list that are no longer referenced
 	 * by the ringbuffer to the flushing/inactive lists as appropriate,
 	 * before we free the context associated with the requests.
 	 */
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->write_list,
+				       struct drm_i915_gem_object,
+				       last_write.ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_write.request->seqno))
+			break;
+
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
-				      struct drm_i915_gem_object,
-				      ring_list);
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
 
-		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_fence.request->seqno))
 			break;
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__fence(obj);
 	}
 
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_read[ring->id].request->seqno))
+			break;
+
+		i915_gem_object_retire__read(obj, ring);
+	}
 
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		if (!i915_seqno_passed(seqno, request->seqno))
+		if (!__i915_seqno_passed(seqno, rq->seqno))
 			break;
 
-		trace_i915_gem_request_retire(ring, request->seqno);
+		trace_i915_gem_request_retire(ring, rq->seqno);
 		/* We know the GPU must have read the request to have
 		 * sent us the seqno + interrupt, so use the position
 		 * of tail of the request to update the last known position
 		 * of the GPU head.
 		 */
-		ring->buffer->last_retired_head = request->tail;
+		ring->buffer->last_retired_head = rq->tail;
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	if (unlikely(ring->trace_irq_seqno &&
-		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
+		     __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
 		ring->irq_put(ring);
 		ring->trace_irq_seqno = 0;
 	}
@@ -2699,14 +2821,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
 static int
 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
-	int ret;
+	int i;
 
-	if (obj->active) {
-		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
+	if (!obj->active)
+		return 0;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct i915_gem_request *rq = obj->last_read[i].request;
+		int ret;
+
+		if (rq == NULL)
+			continue;
+
+		ret = i915_gem_check_olr(rq);
 		if (ret)
 			return ret;
 
-		i915_gem_retire_requests_ring(obj->ring);
+		i915_gem_retire_requests_ring(rq->ring);
 	}
 
 	return 0;
@@ -2737,14 +2868,11 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 int
 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *ring = NULL;
 	struct timespec timeout_stack, *timeout = NULL;
-	unsigned reset_counter;
-	u32 seqno = 0;
-	int ret = 0;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
+	int i, n, ret = 0;
 
 	if (args->timeout_ns >= 0) {
 		timeout_stack = ns_to_timespec(args->timeout_ns);
@@ -2766,13 +2894,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (ret)
 		goto out;
 
-	if (obj->active) {
-		seqno = obj->last_read_seqno;
-		ring = obj->ring;
-	}
-
-	if (seqno == 0)
-		 goto out;
+	if (!obj->active)
+		goto out;
 
 	/* Do this after OLR check to make sure we make forward progress polling
 	 * on this IOCTL with a 0 timeout (like busy ioctl)
@@ -2782,11 +2905,23 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto out;
 	}
 
+	for (i = n = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+
 	drm_gem_object_unreference(&obj->base);
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
+	for (i = 0; i < n; i++) {
+		if (ret == 0)
+			ret = __wait_request(rq[i], true, timeout, file->driver_priv);
+
+		i915_request_put(rq[i]);
+	}
+
 	if (timeout)
 		args->timeout_ns = timespec_to_ns(timeout);
 	return ret;
@@ -2797,6 +2932,41 @@ out:
 	return ret;
 }
 
+static int
+i915_request_sync(struct i915_gem_request *rq,
+		  struct intel_engine_cs *to,
+		  struct drm_i915_gem_object *obj)
+{
+	int ret, idx;
+
+	if (to == NULL)
+		return i915_wait_request(rq);
+
+	idx = intel_ring_sync_index(rq->ring, to);
+	if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
+		return 0;
+
+	ret = i915_gem_check_olr(rq);
+	if (ret)
+		return ret;
+
+	if (!i915_request_complete(rq)) {
+		trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
+		ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
+		if (ret)
+			return ret;
+	}
+
+	/* We must recheck last_read_request because sync_to()
+	 * might have just caused seqno wrap under
+	 * the radar.
+	 */
+	if (obj->last_read[rq->ring->id].request == rq)
+		rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
+
+	return 0;
+}
+
 /**
  * i915_gem_object_sync - sync an object to a ring.
  *
@@ -2811,40 +2981,36 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to)
+		     struct intel_engine_cs *to,
+		     bool readonly)
 {
-	struct intel_engine_cs *from = obj->ring;
-	u32 seqno;
-	int ret, idx;
-
-	if (from == NULL || to == from)
-		return 0;
+	struct i915_gem_request *rq;
+	struct intel_engine_cs *semaphore;
+	int ret = 0, i;
 
-	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
+	semaphore = NULL;
+	if (i915_semaphore_is_enabled(obj->base.dev))
+		semaphore = to;
 
-	idx = intel_ring_sync_index(from, to);
-
-	seqno = obj->last_read_seqno;
-	/* Optimization: Avoid semaphore sync when we are sure we already
-	 * waited for an object with higher seqno */
-	if (seqno <= from->semaphore.sync_seqno[idx])
-		return 0;
-
-	ret = i915_gem_check_olr(obj->ring, seqno);
-	if (ret)
-		return ret;
+	if (readonly) {
+		rq = obj->last_write.request;
+		if (rq != NULL && to != rq->ring)
+			ret = i915_request_sync(rq, semaphore, obj);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			rq = obj->last_read[i].request;
+			if (rq == NULL || to == rq->ring)
+				continue;
 
-	trace_i915_gem_ring_sync_to(from, to, seqno);
-	ret = to->semaphore.sync_to(to, from, seqno);
-	if (!ret)
-		/* We use last_read_seqno because sync_to()
-		 * might have just caused seqno wrap under
-		 * the radar.
-		 */
-		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
+			ret = i915_request_sync(rq, semaphore, obj);
+			if (ret)
+				break;
+		}
+	}
 
+	i915_gem_object_retire(obj);
 	return ret;
+
 }
 
 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -3150,14 +3316,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 static int
 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 {
-	if (obj->last_fenced_seqno) {
-		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
-		if (ret)
-			return ret;
+	int ret;
 
-		obj->last_fenced_seqno = 0;
-	}
+	if (obj->last_fence.request == NULL)
+		return 0;
 
+	ret = i915_wait_request(obj->last_fence.request);
+	if (ret)
+		return ret;
+
+	i915_gem_object_retire__fence(obj);
 	return 0;
 }
 
@@ -3822,11 +3990,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	bool was_pin_display;
 	int ret;
 
-	if (pipelined != obj->ring) {
-		ret = i915_gem_object_sync(obj, pipelined);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_sync(obj, pipelined, true);
+	if (ret)
+		return ret;
 
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -3974,38 +4140,35 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
-	struct drm_i915_gem_request *request;
-	struct intel_engine_cs *ring = NULL;
-	unsigned reset_counter;
-	u32 seqno = 0;
+	struct i915_gem_request *rq, *iter;
 	int ret;
 
 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
-	if (ret)
-		return ret;
+	/* used for querying whethering the GPU is wedged by legacy userspace */
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return -EIO;
 
 	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
+	rq = NULL;
+	list_for_each_entry(iter, &file_priv->mm.request_list, client_list) {
+		if (time_after_eq(iter->emitted_jiffies, recent_enough))
 			break;
-
-		ring = request->ring;
-		seqno = request->seqno;
+		rq = iter;
 	}
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
+	rq = i915_request_get(rq);
 	spin_unlock(&file_priv->mm.lock);
 
-	if (seqno == 0)
+	if (rq == NULL)
 		return 0;
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
+	ret = __wait_request(rq, true, NULL, NULL);
 	if (ret == 0)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
+	i915_request_put(rq);
 	return ret;
 }
 
@@ -4219,7 +4382,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret;
+	int ret, i;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -4238,10 +4401,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 */
 	ret = i915_gem_object_flush_active(obj);
 
-	args->busy = obj->active;
-	if (obj->ring) {
+	args->busy = 0;
+	if (obj->active) {
 		BUILD_BUG_ON(I915_NUM_RINGS > 16);
-		args->busy |= intel_ring_flag(obj->ring) << 16;
+		args->busy |= 1;
+		for (i = 0; i < I915_NUM_RINGS; i++)  {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			args->busy |= 1 << (16 + i);
+		}
 	}
 
 	drm_gem_object_unreference(&obj->base);
@@ -4307,8 +4476,13 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
+	int i;
+
 	INIT_LIST_HEAD(&obj->global_list);
-	INIT_LIST_HEAD(&obj->ring_list);
+	INIT_LIST_HEAD(&obj->last_fence.ring_list);
+	INIT_LIST_HEAD(&obj->last_write.ring_list);
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		INIT_LIST_HEAD(&obj->last_read[i].ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 
@@ -4876,7 +5050,9 @@ i915_gem_lastclose(struct drm_device *dev)
 static void
 init_ring_lists(struct intel_engine_cs *ring)
 {
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
+	INIT_LIST_HEAD(&ring->fence_list);
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
@@ -4972,13 +5148,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	 */
 	spin_lock(&file_priv->mm.lock);
 	while (!list_empty(&file_priv->mm.request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&file_priv->mm.request_list,
-					   struct drm_i915_gem_request,
-					   client_list);
-		list_del(&request->client_list);
-		request->file_priv = NULL;
+		rq = list_first_entry(&file_priv->mm.request_list,
+				      struct i915_gem_request,
+				      client_list);
+		list_del(&rq->client_list);
+		rq->file_priv = NULL;
 	}
 	spin_unlock(&file_priv->mm.lock);
 }
@@ -5266,3 +5442,37 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 
 	return vma;
 }
+
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)
+{
+	u32 seqno = 0;
+	struct i915_gem_request *rq = NULL;
+	int i;
+
+	/* This is approximate as seqno cannot be used across rings */
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
+			rq = obj->last_read[i].request, seqno = rq->seqno;
+	}
+
+	return rq;
+}
+
+struct i915_gem_request *i915_gem_seqno_to_request(struct intel_engine_cs *ring,
+						   u32 seqno)
+{
+	struct i915_gem_request *rq;
+
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (rq->seqno == seqno)
+			return rq;
+
+		if (__i915_seqno_passed(seqno, rq->seqno))
+			break;
+	}
+
+	return NULL;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9683e62..5cc1e98 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -303,13 +303,9 @@ void i915_gem_context_reset(struct drm_device *dev)
 		if (!lctx)
 			continue;
 
-		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
+		if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
 			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
 						      get_context_alignment(dev), 0));
-			/* Fake a finish/inactive */
-			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
-			dctx->legacy_hw_ctx.rcs_state->active = 0;
-		}
 
 		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
 			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
@@ -385,7 +381,6 @@ void i915_gem_context_fini(struct drm_device *dev)
 		WARN_ON(!dev_priv->ring[RCS].last_context);
 		if (dev_priv->ring[RCS].last_context == dctx) {
 			/* Fake switch to NULL context */
-			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
 			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
 			i915_gem_context_unreference(dctx);
 			dev_priv->ring[RCS].last_context = NULL;
@@ -613,8 +608,11 @@ static int do_switch(struct intel_engine_cs *ring,
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
+		struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
+
+		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
+
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -622,11 +620,10 @@ static int do_switch(struct intel_engine_cs *ring,
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
-		from->legacy_hw_ctx.rcs_state->dirty = 1;
-		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
+		from_obj->dirty = 1;
 
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
+		i915_gem_object_ggtt_unpin(from_obj);
 		i915_gem_context_unreference(from);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1a0611b..13a2f13 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -832,7 +832,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring);
+
+		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
 		if (ret)
 			return ret;
 
@@ -946,40 +947,20 @@ void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 				   struct intel_engine_cs *ring)
 {
-	u32 seqno = intel_ring_get_seqno(ring);
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-		struct drm_i915_gem_object *obj = vma->obj;
-		u32 old_read = obj->base.read_domains;
-		u32 old_write = obj->base.write_domain;
-
-		obj->base.write_domain = obj->base.pending_write_domain;
-		if (obj->base.write_domain == 0)
-			obj->base.pending_read_domains |= obj->base.read_domains;
-		obj->base.read_domains = obj->base.pending_read_domains;
-
-		i915_vma_move_to_active(vma, ring);
-		if (obj->base.write_domain) {
-			obj->dirty = 1;
-			obj->last_write_seqno = seqno;
+		unsigned fenced;
 
-			intel_fb_obj_invalidate(obj, ring);
-
-			/* update for the implicit flush after a batch */
-			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-		}
+		fenced = 0;
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			obj->last_fenced_seqno = seqno;
-			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-				struct drm_i915_private *dev_priv = to_i915(ring->dev);
-				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
-					       &dev_priv->mm.fence_list);
-			}
+			fenced |= VMA_IS_FENCED;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+				fenced |= VMA_HAS_FENCE;
 		}
 
-		trace_i915_gem_object_change_domain(obj, old_read, old_write);
+		i915_vma_move_to_active(vma, ring, fenced);
 	}
 }
 
@@ -993,7 +974,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, NULL);
+	(void)__i915_add_request(ring, file, obj);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index e60be3f..fc1223c 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
 	if (ret)
 		goto out;
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
 
-	ret = __i915_add_request(ring, NULL, so.obj, NULL);
+	ret = __i915_add_request(ring, NULL, so.obj);
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	render_state_fini(&so);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 7e623bf..a45651d 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 		if (ret == 0) {
 			obj->fence_dirty =
-				obj->last_fenced_seqno ||
+				obj->last_fence.request ||
 				obj->fence_reg != I915_FENCE_REG_NONE;
 
 			obj->tiling_mode = args->tiling_mode;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1e05414..fb1041f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -661,11 +661,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
-	err->rseqno = obj->last_read_seqno;
-	err->wseqno = obj->last_write_seqno;
+	err->rseqno = i915_request_seqno(rq);
+	err->wseqno = i915_request_seqno(obj->last_write.request);
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
@@ -679,7 +680,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
 	err->userptr = obj->userptr.mm != NULL;
-	err->ring = obj->ring ? obj->ring->id : -1;
+	err->ring = i915_request_ring_id(rq);
 	err->cache_level = obj->cache_level;
 }
 
@@ -877,8 +878,8 @@ static void i915_record_ring_state(struct drm_device *dev,
 
 	ering->waiting = waitqueue_active(&ring->irq_queue);
 	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
-	ering->seqno = ring->get_seqno(ring, false);
 	ering->acthd = intel_ring_get_active_head(ring);
+	ering->seqno = ring->get_seqno(ring);
 	ering->head = I915_READ_HEAD(ring);
 	ering->tail = I915_READ_TAIL(ring);
 	ering->ctl = I915_READ_CTL(ring);
@@ -972,7 +973,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	int i, count;
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
@@ -987,13 +988,12 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
 		i915_record_ring_state(dev, error, ring, &error->ring[i]);
 
-		request = i915_gem_find_active_request(ring);
-		if (request) {
+		rq = i915_gem_find_active_request(ring);
+		if (rq) {
 			struct i915_address_space *vm;
 
-			vm = request->ctx && request->ctx->ppgtt ?
-				&request->ctx->ppgtt->base :
-				&dev_priv->gtt.base;
+			vm = rq->ctx && rq->ctx->ppgtt ?
+				&rq->ctx->ppgtt->base : &dev_priv->gtt.base;
 
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
@@ -1001,7 +1001,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			 */
 			error->ring[i].batchbuffer =
 				i915_error_object_create(dev_priv,
-							 request->batch_obj,
+							 rq->batch_obj,
 							 vm);
 
 			if (HAS_BROKEN_CS_TLB(dev_priv->dev))
@@ -1009,11 +1009,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
 					i915_error_ggtt_object_create(dev_priv,
 							     ring->scratch.obj);
 
-			if (request->file_priv) {
+			if (rq->file_priv) {
 				struct task_struct *task;
 
 				rcu_read_lock();
-				task = pid_task(request->file_priv->file->pid,
+				task = pid_task(rq->file_priv->file->pid,
 						PIDTYPE_PID);
 				if (task) {
 					strcpy(error->ring[i].comm, task->comm);
@@ -1032,7 +1032,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list)
+		list_for_each_entry(rq, &ring->request_list, list)
 			count++;
 
 		error->ring[i].num_requests = count;
@@ -1045,13 +1045,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		}
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			struct drm_i915_error_request *erq;
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
-			erq->jiffies = request->emitted_jiffies;
-			erq->tail = request->tail;
+			erq->seqno = rq->seqno;
+			erq->jiffies = rq->emitted_jiffies;
+			erq->tail = rq->tail;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b1bb88f..2dab019 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1265,9 +1265,6 @@ static void notify_ring(struct drm_device *dev,
 
 	trace_i915_gem_request_complete(ring);
 
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		intel_notify_mmio_flip(ring);
-
 	wake_up_all(&ring->irq_queue);
 	i915_queue_hangcheck(dev);
 }
@@ -3041,18 +3038,15 @@ static void gen8_disable_vblank(struct drm_device *dev, int pipe)
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
-static u32
-ring_last_seqno(struct intel_engine_cs *ring)
-{
-	return list_entry(ring->request_list.prev,
-			  struct drm_i915_gem_request, list)->seqno;
-}
-
 static bool
-ring_idle(struct intel_engine_cs *ring, u32 seqno)
+ring_idle(struct intel_engine_cs *ring)
 {
-	return (list_empty(&ring->request_list) ||
-		i915_seqno_passed(seqno, ring_last_seqno(ring)));
+	if (list_empty(&ring->request_list))
+		return true;
+
+	return i915_request_complete(list_entry(ring->request_list.prev,
+						struct i915_gem_request,
+						list));
 }
 
 static bool
@@ -3155,6 +3149,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_engine_cs *signaller;
+	struct i915_gem_request *rq;
 	u32 seqno;
 
 	ring->hangcheck.deadlock++;
@@ -3167,7 +3162,8 @@ static int semaphore_passed(struct intel_engine_cs *ring)
 	if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
 		return -1;
 
-	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
+	rq = i915_gem_seqno_to_request(ring, seqno);
+	if (rq == NULL || i915_request_complete(rq))
 		return 1;
 
 	/* cursory check for an unkickable deadlock */
@@ -3268,11 +3264,11 @@ static void i915_hangcheck_elapsed(unsigned long data)
 
 		semaphore_clear_deadlocks(dev_priv);
 
-		seqno = ring->get_seqno(ring, false);
 		acthd = intel_ring_get_active_head(ring);
+		seqno = ring->get_seqno(ring);
 
 		if (ring->hangcheck.seqno == seqno) {
-			if (ring_idle(ring, seqno)) {
+			if (ring_idle(ring)) {
 				ring->hangcheck.action = HANGCHECK_IDLE;
 
 				if (waitqueue_active(&ring->irq_queue)) {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index f5aa006..0072d17 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -365,7 +365,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->seqno = seqno;
+			   __entry->seqno = intel_ring_get_seqno(ring);
 			   __entry->flags = flags;
 			   i915_trace_irq_get(ring, seqno);
 			   ),
@@ -435,7 +435,7 @@ TRACE_EVENT(i915_gem_request_complete,
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->seqno = ring->get_seqno(ring, false);
+			   __entry->seqno = ring->get_seqno(ring);
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u",
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a1cf052..4432fe8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9065,6 +9065,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
 	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
 
+	i915_request_put(work->flip_queued_request);
 	kfree(work);
 }
 
@@ -9455,7 +9456,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
 	else if (i915.enable_execlists)
 		return true;
 	else
-		return ring != obj->ring;
+		return ring != i915_request_ring(obj->last_write.request);
 }
 
 static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -9486,94 +9487,54 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
 	POSTING_READ(DSPSURF(intel_crtc->plane));
 }
 
-static int intel_postpone_flip(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *ring;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!obj->last_write_seqno)
-		return 0;
-
-	ring = obj->ring;
-
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_write_seqno))
-		return 0;
-
-	ret = i915_gem_check_olr(ring, obj->last_write_seqno);
-	if (ret)
-		return ret;
-
-	if (WARN_ON(!ring->irq_get(ring)))
-		return 0;
-
-	return 1;
-}
+struct flip_work {
+	struct work_struct work;
+	struct i915_gem_request *rq;
+	struct intel_crtc *crtc;
+};
 
-void intel_notify_mmio_flip(struct intel_engine_cs *ring)
+static void intel_mmio_flip_work(struct work_struct *work)
 {
-	struct drm_i915_private *dev_priv = to_i915(ring->dev);
-	struct intel_crtc *intel_crtc;
-	unsigned long irq_flags;
-	u32 seqno;
-
-	seqno = ring->get_seqno(ring, false);
-
-	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	for_each_intel_crtc(ring->dev, intel_crtc) {
-		struct intel_mmio_flip *mmio_flip;
+	struct flip_work *flip = container_of(work, struct flip_work, work);
 
-		mmio_flip = &intel_crtc->mmio_flip;
-		if (mmio_flip->seqno == 0)
-			continue;
-
-		if (ring->id != mmio_flip->ring_id)
-			continue;
+	if (__i915_request_wait(flip->rq, false) == 0)
+		intel_do_mmio_flip(flip->crtc);
 
-		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
-			intel_do_mmio_flip(intel_crtc);
-			mmio_flip->seqno = 0;
-			ring->irq_put(ring);
-		}
-	}
-	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+	i915_request_put(flip->rq);
+	kfree(flip);
 }
 
-static int intel_queue_mmio_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
-				 uint32_t flags)
+static int intel_queue_mmio_flip(struct intel_crtc *crtc,
+				 struct i915_gem_request *rq)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned long irq_flags;
+	struct flip_work *flip;
 	int ret;
 
-	if (WARN_ON(intel_crtc->mmio_flip.seqno))
+	if (WARN_ON(crtc->mmio_flip))
 		return -EBUSY;
 
-	ret = intel_postpone_flip(obj);
-	if (ret < 0)
+	if (rq == NULL) {
+		intel_do_mmio_flip(crtc);
+		return 0;
+	}
+
+	ret = i915_gem_check_olr(rq);
+	if (ret)
 		return ret;
-	if (ret == 0) {
-		intel_do_mmio_flip(intel_crtc);
+
+	if (i915_request_complete(rq)) {
+		intel_do_mmio_flip(crtc);
 		return 0;
 	}
 
-	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
-	intel_crtc->mmio_flip.ring_id = obj->ring->id;
-	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+	flip = kmalloc(sizeof(*flip), GFP_KERNEL);
+	if (flip == NULL)
+		return -ENOMEM;
 
-	/*
-	 * Double check to catch cases where irq fired before
-	 * mmio flip data was ready
-	 */
-	intel_notify_mmio_flip(obj->ring);
+	INIT_WORK(&flip->work, intel_mmio_flip_work);
+	flip->rq = i915_request_get(rq);
+	flip->crtc = crtc;
+	schedule_work(&flip->work);
 	return 0;
 }
 
@@ -9587,6 +9548,7 @@ static int intel_default_queue_flip(struct drm_device *dev,
 	return -ENODEV;
 }
 
+
 static int intel_crtc_page_flip(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
 				struct drm_pending_vblank_event *event,
@@ -9600,6 +9562,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	enum pipe pipe = intel_crtc->pipe;
 	struct intel_unpin_work *work;
 	struct intel_engine_cs *ring;
+	struct i915_gem_request *rq;
 	unsigned long flags;
 	int ret;
 
@@ -9684,28 +9647,44 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev)) {
 		ring = &dev_priv->ring[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		ring = obj->ring;
+		ring = i915_request_ring(obj->last_write.request);
 		if (ring == NULL || ring->id != RCS)
 			ring = &dev_priv->ring[BCS];
 	} else {
 		ring = &dev_priv->ring[RCS];
 	}
 
-	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
-	if (ret)
-		goto cleanup_pending;
+	if (use_mmio_flip(ring, obj)) {
+		rq = obj->last_write.request;
 
-	work->gtt_offset =
-		i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+		ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(rq));
+		if (ret)
+			goto cleanup_pending;
+
+		work->gtt_offset =
+			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+
+		ret = intel_queue_mmio_flip(intel_crtc, rq);
+		if (ret)
+			goto cleanup_unpin;
+	} else {
+		ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
+		if (ret)
+			goto cleanup_pending;
+
+		work->gtt_offset =
+			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
 
-	if (use_mmio_flip(ring, obj))
-		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring,
-					    page_flip_flags);
-	else
 		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
-				page_flip_flags);
-	if (ret)
-		goto cleanup_unpin;
+						   page_flip_flags);
+		if (ret)
+			goto cleanup_unpin;
+
+		rq = intel_ring_get_request(ring);
+	}
+
+	work->flip_queued_request = i915_request_get(rq);
+	work->enable_stall_check = true;
 
 	i915_gem_track_fb(work->old_fb_obj, obj,
 			  INTEL_FRONTBUFFER_PRIMARY(pipe));
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1b3d1d7..617af38 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -372,11 +372,6 @@ struct intel_pipe_wm {
 	bool sprites_scaled;
 };
 
-struct intel_mmio_flip {
-	u32 seqno;
-	u32 ring_id;
-};
-
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
@@ -426,7 +421,7 @@ struct intel_crtc {
 	} wm;
 
 	int scanline_offset;
-	struct intel_mmio_flip mmio_flip;
+	struct i915_gem_request *mmio_flip;
 };
 
 struct intel_plane_wm_parameters {
@@ -657,6 +652,7 @@ struct intel_unpin_work {
 #define INTEL_FLIP_COMPLETE	2
 	u32 flip_count;
 	u32 gtt_offset;
+	struct i915_gem_request *flip_queued_request;
 	bool enable_stall_check;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6b5f416..bbcc0e6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -122,7 +122,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		ret = i915_gem_object_sync(obj, ring);
+		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
 		if (ret)
 			return ret;
 
@@ -262,30 +262,11 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
 	/* TODO: how to submit a context to the ELSP is not here yet */
 }
 
-static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
-{
-	if (ring->outstanding_lazy_seqno)
-		return 0;
-
-	if (ring->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
-
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
-
-		ring->preallocated_lazy_request = request;
-	}
-
-	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
-}
-
-static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
-				     int bytes)
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
+				       int bytes)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
+	struct i915_gem_request *rq;
 	int ret;
 
 	if (ringbuf->last_retired_head != -1) {
@@ -297,24 +278,20 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
 			return 0;
 	}
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= bytes) {
-			seqno = request->seqno;
+	list_for_each_entry(rq, &ring->request_list, list)
+		if (__intel_ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= bytes)
 			break;
-		}
-	}
 
-	if (seqno == 0)
+	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
 		return -ENOSPC;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_request(rq);
 	if (ret)
 		return ret;
 
+	i915_gem_retire_requests_ring(ring);
 	/* TODO: make sure we update the right ringbuffer's last_retired_head
 	 * when retiring requests */
-	i915_gem_retire_requests_ring(ring);
 	ringbuf->head = ringbuf->last_retired_head;
 	ringbuf->last_retired_head = -1;
 
@@ -322,58 +299,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
 	return 0;
 }
 
-static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
-				       int bytes)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long end;
-	int ret;
-
-	ret = logical_ring_wait_request(ringbuf, bytes);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* Force the context submission in case we have been skipping it */
-	intel_logical_ring_advance_and_submit(ringbuf);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	do {
-		ringbuf->head = I915_READ_HEAD(ring);
-		ringbuf->space = intel_ring_space(ringbuf);
-		if (ringbuf->space >= bytes) {
-			ret = 0;
-			break;
-		}
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-
-	return ret;
-}
-
 static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
 {
 	uint32_t __iomem *virt;
@@ -419,21 +344,14 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
 int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
 	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
 	if (ret)
 		return ret;
 
 	/* Preallocate the olr before touching the ring */
-	ret = logical_ring_alloc_seqno(ring);
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -620,7 +538,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
 	return 0;
 }
 
-static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+static u32 gen8_get_seqno(struct intel_engine_cs *ring)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
@@ -648,7 +566,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
 				(ring->status_page.gfx_addr +
 				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
 	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
+	intel_logical_ring_emit(ringbuf, intel_ring_get_seqno(ring));
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	intel_logical_ring_advance_and_submit(ringbuf);
@@ -665,8 +583,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 
 	intel_logical_ring_stop(ring);
 	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+
+	kfree(ring->preallocated_request);
+	ring->preallocated_request = NULL;
 
 	if (ring->cleanup)
 		ring->cleanup(ring);
@@ -689,7 +608,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->buffer = NULL;
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
+	INIT_LIST_HEAD(&ring->fence_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	init_waitqueue_head(&ring->irq_queue);
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index dc2f4f2..42ebbf9 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -182,7 +182,7 @@ struct intel_overlay {
 	u32 flip_addr;
 	struct drm_i915_gem_object *reg_bo;
 	/* flip handling */
-	uint32_t last_flip_req;
+	struct i915_gem_request *flip_request;
 	void (*flip_tail)(struct intel_overlay *);
 };
 
@@ -208,29 +208,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
 		io_mapping_unmap(regs);
 }
 
-static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
-					 void (*tail)(struct intel_overlay *))
+/* recover from an interruption due to a signal
+ * We have to be careful not to repeat work forever an make forward progess. */
+static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->last_flip_req);
-	ret = i915_add_request(ring, &overlay->last_flip_req);
-	if (ret)
-		return ret;
+	if (overlay->flip_request == NULL)
+		return 0;
 
-	overlay->flip_tail = tail;
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_request(overlay->flip_request);
 	if (ret)
 		return ret;
-	i915_gem_retire_requests(dev);
 
-	overlay->last_flip_req = 0;
+	i915_request_put(overlay->flip_request);
+	overlay->flip_request = NULL;
+
+	i915_gem_retire_requests(overlay->dev);
+
+	if (overlay->flip_tail)
+		overlay->flip_tail(overlay);
+
 	return 0;
 }
 
+static int intel_overlay_add_request(struct intel_overlay *overlay,
+				     struct intel_engine_cs *ring,
+				     void (*tail)(struct intel_overlay *))
+{
+	BUG_ON(overlay->flip_request);
+	overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
+	overlay->flip_tail = tail;
+
+	return i915_add_request(ring);
+}
+
+static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
+					 struct intel_engine_cs *ring,
+					 void (*tail)(struct intel_overlay *))
+{
+	intel_overlay_add_request(overlay, ring, tail);
+	return intel_overlay_recover_from_interrupt(overlay);
+}
+
 /* overlay needs to be disable in OCMD reg */
 static int intel_overlay_on(struct intel_overlay *overlay)
 {
@@ -252,9 +272,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return intel_overlay_do_wait_request(overlay, NULL);
+	return intel_overlay_do_wait_request(overlay, ring, NULL);
 }
 
 /* overlay needs to be enabled in OCMD reg */
@@ -284,15 +304,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 
 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	intel_ring_emit(ring, flip_addr);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return i915_add_request(ring, &overlay->last_flip_req);
+	return intel_overlay_add_request(overlay, ring, NULL);
 }
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
 
+	i915_gem_track_fb(obj, NULL,
+			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
+
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 
@@ -352,33 +375,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 		intel_ring_emit(ring, flip_addr);
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	}
-	intel_ring_advance(ring);
-
-	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
-}
-
-/* recover from an interruption due to a signal
- * We have to be careful not to repeat work forever an make forward progess. */
-static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
-{
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
-
-	if (overlay->last_flip_req == 0)
-		return 0;
+	__intel_ring_advance(ring);
 
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
-	if (ret)
-		return ret;
-	i915_gem_retire_requests(dev);
-
-	if (overlay->flip_tail)
-		overlay->flip_tail(overlay);
-
-	overlay->last_flip_req = 0;
-	return 0;
+	return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -387,10 +386,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
  */
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
+	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
+	int ret = 0;
 
 	/* Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
@@ -399,6 +396,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		return 0;
 
 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
+		struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+
 		/* synchronous slowpath */
 		ret = intel_ring_begin(ring, 2);
 		if (ret)
@@ -406,20 +405,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
+		__intel_ring_advance(ring);
 
-		ret = intel_overlay_do_wait_request(overlay,
+		ret = intel_overlay_do_wait_request(overlay, ring,
 						    intel_overlay_release_old_vid_tail);
-		if (ret)
-			return ret;
-	}
-
-	intel_overlay_release_old_vid_tail(overlay);
+	} else
+		intel_overlay_release_old_vid_tail(overlay);
 
-
-	i915_gem_track_fb(overlay->old_vid_bo, NULL,
-			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
-	return 0;
+	return ret;
 }
 
 struct put_image_params {
@@ -821,12 +814,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	iowrite32(0, &regs->OCMD);
 	intel_overlay_unmap_regs(overlay, regs);
 
-	ret = intel_overlay_off(overlay);
-	if (ret != 0)
-		return ret;
-
-	intel_overlay_off_tail(overlay);
-	return 0;
+	return intel_overlay_off(overlay);
 }
 
 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 13543f8..ee656ea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -750,7 +750,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 					   PIPE_CONTROL_FLUSH_ENABLE);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
 		intel_ring_emit(signaller, 0);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
@@ -787,7 +787,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
 		intel_ring_emit(signaller, 0);
@@ -818,7 +818,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
 		if (mbox_reg != GEN6_NOSYNC) {
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+			intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
 		}
 	}
 
@@ -853,7 +853,7 @@ gen6_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -971,7 +971,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -990,7 +990,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, 0);
 	__intel_ring_advance(ring);
 
@@ -998,21 +998,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 }
 
 static u32
-gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
-{
-	/* Workaround to force correct ordering between irq and seqno writes on
-	 * ivb (and maybe also on snb) by reading from a CS register (like
-	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency) {
-		struct drm_i915_private *dev_priv = ring->dev->dev_private;
-		POSTING_READ(RING_ACTHD(ring->mmio_base));
-	}
-
-	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
-}
-
-static u32
-ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+ring_get_seqno(struct intel_engine_cs *ring)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
@@ -1024,7 +1010,7 @@ ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 }
 
 static u32
-pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+pc_render_get_seqno(struct intel_engine_cs *ring)
 {
 	return ring->scratch.cpu_page[0];
 }
@@ -1230,7 +1216,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -1247,6 +1233,11 @@ gen6_ring_get_irq(struct intel_engine_cs *ring)
 	if (!dev->irq_enabled)
 	       return false;
 
+	/* It looks like we need to prevent the gt from suspending while waiting
+	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
+	 * blt/bsd rings on ivb. */
+	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
 	if (ring->irq_refcount++ == 0) {
 		if (HAS_L3_DPF(dev) && ring->id == RCS)
@@ -1278,6 +1269,8 @@ gen6_ring_put_irq(struct intel_engine_cs *ring)
 		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+
+	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
 static bool
@@ -1610,7 +1603,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	}
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	ringbuf->size = 32 * PAGE_SIZE;
 	ringbuf->ring = ring;
@@ -1671,8 +1665,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
 	intel_destroy_ringbuffer_obj(ringbuf);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	ring->preallocated_request = NULL;
 
 	if (ring->cleanup)
 		ring->cleanup(ring);
@@ -1688,8 +1681,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
+	struct i915_gem_request *rq;
 	int ret;
 
 	if (ringbuf->last_retired_head != -1) {
@@ -1701,18 +1693,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 			return 0;
 	}
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= n) {
-			seqno = request->seqno;
+	list_for_each_entry(rq, &ring->request_list, list)
+		if (__intel_ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
 			break;
-		}
-	}
 
-	if (seqno == 0)
+	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
 		return -ENOSPC;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_request(rq);
 	if (ret)
 		return ret;
 
@@ -1729,6 +1717,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ringbuffer *ringbuf = ring->buffer;
+	unsigned reset_counter;
 	unsigned long end;
 	int ret;
 
@@ -1739,6 +1728,13 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	/* force the tail write in case we have been skipping them */
 	__intel_ring_advance(ring);
 
+	reset_counter = 0;
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible,
+				   &reset_counter);
+	if (ret)
+		return ret;
+
 	/* With GEM the hangcheck timer should kick us out of the loop,
 	 * leaving it early runs the risk of corrupting GEM state (due
 	 * to running on almost untested codepaths). But on resume
@@ -1755,6 +1751,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 			break;
 		}
 
+		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+					   dev_priv->mm.interruptible,
+					   &reset_counter);
+		if (ret)
+			return ret;
+
 		if (!drm_core_check_feature(dev, DRIVER_MODESET) &&
 		    dev->primary->master) {
 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
@@ -1764,16 +1766,6 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 
 		msleep(1);
 
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
 		if (time_after(jiffies, end)) {
 			ret = -EBUSY;
 			break;
@@ -1808,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 
 int intel_ring_idle(struct intel_engine_cs *ring)
 {
-	u32 seqno;
 	int ret;
 
 	/* We need to add any requests required to flush the objects and ring */
-	if (ring->outstanding_lazy_seqno) {
-		ret = i915_add_request(ring, NULL);
+	if (ring->preallocated_request) {
+		ret = i915_add_request(ring);
 		if (ret)
 			return ret;
 	}
@@ -1822,30 +1813,46 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 	if (list_empty(&ring->request_list))
 		return 0;
 
-	seqno = list_entry(ring->request_list.prev,
-			   struct drm_i915_gem_request,
-			   list)->seqno;
-
-	return i915_wait_seqno(ring, seqno);
+	return i915_wait_request(container_of(ring->request_list.prev,
+					      struct i915_gem_request,
+					      list));
 }
 
-static int
-intel_ring_alloc_seqno(struct intel_engine_cs *ring)
+int
+intel_ring_alloc_request(struct intel_engine_cs *ring)
 {
-	if (ring->outstanding_lazy_seqno)
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	struct i915_gem_request *rq;
+	int ret;
+
+	if (ring->preallocated_request)
 		return 0;
 
-	if (ring->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
+	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
+	if (rq == NULL)
+		return -ENOMEM;
 
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
+	rq->reset_counter = 0;
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible,
+				   &rq->reset_counter);
+	if (ret)
+		goto err;
 
-		ring->preallocated_lazy_request = request;
-	}
+	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
+	if (ret)
+		goto err;
 
-	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
+	kref_init(&rq->kref);
+	rq->ring = ring;
+	rq->completed = false;
+
+	ring->preallocated_request = rq;
+	return 0;
+
+err:
+	kfree(rq);
+	return ret;
 }
 
 static int __intel_ring_prepare(struct intel_engine_cs *ring,
@@ -1872,20 +1879,20 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
 int intel_ring_begin(struct intel_engine_cs *ring,
 		     int num_dwords)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	int ret;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
+	/* Preallocate the olr before touching the ring, */
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
+	/* and by holding the seqno before we prepare we prevent recursion */
 	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
 	if (ret)
 		return ret;
 
-	/* Preallocate the olr before touching the ring */
-	ret = intel_ring_alloc_seqno(ring);
+	/* but we may flush the seqno during prepare. */
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -1920,7 +1927,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	BUG_ON(ring->outstanding_lazy_seqno);
+	BUG_ON(ring->preallocated_request);
 
 	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
@@ -2140,7 +2147,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-		ring->get_seqno = gen6_ring_get_seqno;
+		ring->get_seqno = ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		if (i915_semaphore_is_enabled(dev)) {
 			WARN_ON(!dev_priv->semaphore_obj);
@@ -2156,7 +2163,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen6_ring_get_irq;
 		ring->irq_put = gen6_ring_put_irq;
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-		ring->get_seqno = gen6_ring_get_seqno;
+		ring->get_seqno = ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		if (i915_semaphore_is_enabled(dev)) {
 			ring->semaphore.sync_to = gen6_ring_sync;
@@ -2297,7 +2304,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 	ring->cleanup = render_ring_cleanup;
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 
 	ringbuf->size = size;
@@ -2345,7 +2353,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->write_tail = gen6_bsd_ring_write_tail;
 		ring->flush = gen6_bsd_ring_flush;
 		ring->add_request = gen6_add_request;
-		ring->get_seqno = gen6_ring_get_seqno;
+		ring->get_seqno = ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		if (INTEL_INFO(dev)->gen >= 8) {
 			ring->irq_enable_mask =
@@ -2423,7 +2431,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
 	ring->mmio_base = GEN8_BSD2_RING_BASE;
 	ring->flush = gen6_bsd_ring_flush;
 	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
+	ring->get_seqno = ring_get_seqno;
 	ring->set_seqno = ring_set_seqno;
 	ring->irq_enable_mask =
 			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
@@ -2453,7 +2461,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 	ring->write_tail = ring_write_tail;
 	ring->flush = gen6_ring_flush;
 	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
+	ring->get_seqno = ring_get_seqno;
 	ring->set_seqno = ring_set_seqno;
 	if (INTEL_INFO(dev)->gen >= 8) {
 		ring->irq_enable_mask =
@@ -2510,7 +2518,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 	ring->write_tail = ring_write_tail;
 	ring->flush = gen6_ring_flush;
 	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
+	ring->get_seqno = ring_get_seqno;
 	ring->set_seqno = ring_set_seqno;
 
 	if (INTEL_INFO(dev)->gen >= 8) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 24437da..eb4875a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -126,6 +126,7 @@ struct  intel_engine_cs {
 		VCS2
 	} id;
 #define I915_NUM_RINGS 5
+#define I915_NUM_RING_BITS 4
 #define LAST_USER_RING (VECS + 1)
 	u32		mmio_base;
 	struct		drm_device *dev;
@@ -153,8 +154,7 @@ struct  intel_engine_cs {
 	 * seen value is good enough. Note that the seqno will always be
 	 * monotonic, even if not coherent.
 	 */
-	u32		(*get_seqno)(struct intel_engine_cs *ring,
-				     bool lazy_coherency);
+	u32		(*get_seqno)(struct intel_engine_cs *ring);
 	void		(*set_seqno)(struct intel_engine_cs *ring,
 				     u32 seqno);
 	int		(*dispatch_execbuffer)(struct intel_engine_cs *ring,
@@ -242,7 +242,7 @@ struct  intel_engine_cs {
 	 *
 	 * A reference is held on the buffer while on this list.
 	 */
-	struct list_head active_list;
+	struct list_head read_list, write_list, fence_list;
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
@@ -253,8 +253,7 @@ struct  intel_engine_cs {
 	/**
 	 * Do we have some not yet emitted requests outstanding?
 	 */
-	struct drm_i915_gem_request *preallocated_lazy_request;
-	u32 outstanding_lazy_seqno;
+	struct i915_gem_request *preallocated_request;
 	bool gpu_caches_dirty;
 	bool fbc_dirty;
 
@@ -395,6 +394,7 @@ int intel_ring_space(struct intel_ringbuffer *ringbuf);
 bool intel_ring_stopped(struct intel_engine_cs *ring);
 void __intel_ring_advance(struct intel_engine_cs *ring);
 
+int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring);
 int __must_check intel_ring_idle(struct intel_engine_cs *ring);
 void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
 int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
@@ -417,12 +417,15 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 	return ringbuf->tail;
 }
 
-static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
+static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
 {
-	BUG_ON(ring->outstanding_lazy_seqno == 0);
-	return ring->outstanding_lazy_seqno;
+	BUG_ON(ring->preallocated_request == 0);
+	return ring->preallocated_request;
 }
 
+/* C - the bringer of joy */
+#define intel_ring_get_seqno(ring) intel_ring_get_request(ring)->seqno
+
 static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
 {
 	if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
  2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
                   ` (3 preceding siblings ...)
  2014-08-12 19:05 ` [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
@ 2014-08-13 14:50 ` Mika Kuoppala
  2014-08-14  6:50   ` Chris Wilson
  4 siblings, 1 reply; 20+ messages in thread
From: Mika Kuoppala @ 2014-08-13 14:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The current error state harks back to the era of just a single VM. For
> full-ppgtt, we capture every bo on every VM. It behoves us to then print
> every bo for every VM, which we currently fail to do and so miss vital
> information in the error state.
>
> v2: Use the vma address rather than -1!
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Offsets can collide between different vm areas.

If we add vm index also to the captured batchbuffer objects,
we could print it part of the offset '%d:0x%x' that would easily
identify vm and we would immediately see what vm was active on a ring.

-Mika

> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  2 +
>  drivers/gpu/drm/i915/i915_gpu_error.c | 80 ++++++++++++++++++++++++-----------
>  2 files changed, 58 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1bf2cea..e0dcd70 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -396,6 +396,7 @@ struct drm_i915_error_state {
>  		pid_t pid;
>  		char comm[TASK_COMM_LEN];
>  	} ring[I915_NUM_RINGS];
> +
>  	struct drm_i915_error_buffer {
>  		u32 size;
>  		u32 name;
> @@ -414,6 +415,7 @@ struct drm_i915_error_state {
>  	} **active_bo, **pinned_bo;
>  
>  	u32 *active_bo_count, *pinned_bo_count;
> +	u32 vm_count;
>  };
>  
>  struct intel_connector;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index fc11ac6..35e70d5 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>  				struct drm_i915_error_buffer *err,
>  				int count)
>  {
> -	err_printf(m, "%s [%d]:\n", name, count);
> +	err_printf(m, "  %s [%d]:\n", name, count);
>  
>  	while (count--) {
> -		err_printf(m, "  %08x %8u %02x %02x %x %x",
> +		err_printf(m, "    %08x %8u %02x %02x %x %x",
>  			   err->gtt_offset,
>  			   err->size,
>  			   err->read_domains,
> @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  		i915_ring_error_state(m, dev, &error->ring[i]);
>  	}
>  
> -	if (error->active_bo)
> +	for (i = 0; i < error->vm_count; i++) {
> +		err_printf(m, "vm[%d]\n", i);
> +
>  		print_error_buffers(m, "Active",
> -				    error->active_bo[0],
> -				    error->active_bo_count[0]);
> +				    error->active_bo[i],
> +				    error->active_bo_count[i]);
>  
> -	if (error->pinned_bo)
>  		print_error_buffers(m, "Pinned",
> -				    error->pinned_bo[0],
> -				    error->pinned_bo_count[0]);
> +				    error->pinned_bo[i],
> +				    error->pinned_bo_count[i]);
> +	}
>  
>  	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
>  		obj = error->ring[i].batchbuffer;
> @@ -644,13 +646,15 @@ unwind:
>  				       (src)->base.size>>PAGE_SHIFT)
>  
>  static void capture_bo(struct drm_i915_error_buffer *err,
> -		       struct drm_i915_gem_object *obj)
> +		       struct i915_vma *vma)
>  {
> +	struct drm_i915_gem_object *obj = vma->obj;
> +
>  	err->size = obj->base.size;
>  	err->name = obj->base.name;
>  	err->rseqno = obj->last_read_seqno;
>  	err->wseqno = obj->last_write_seqno;
> -	err->gtt_offset = i915_gem_obj_ggtt_offset(obj);
> +	err->gtt_offset = vma->node.start;
>  	err->read_domains = obj->base.read_domains;
>  	err->write_domain = obj->base.write_domain;
>  	err->fence_reg = obj->fence_reg;
> @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
>  	int i = 0;
>  
>  	list_for_each_entry(vma, head, mm_list) {
> -		capture_bo(err++, vma->obj);
> +		capture_bo(err++, vma);
>  		if (++i == count)
>  			break;
>  	}
> @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
>  }
>  
>  static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
> -			     int count, struct list_head *head)
> +			     int count, struct list_head *head,
> +			     struct i915_address_space *vm)
>  {
>  	struct drm_i915_gem_object *obj;
> -	int i = 0;
> +	struct drm_i915_error_buffer * const first = err;
> +	struct drm_i915_error_buffer * const last = err + count;
>  
>  	list_for_each_entry(obj, head, global_list) {
> -		if (!i915_gem_obj_is_pinned(obj))
> -			continue;
> +		struct i915_vma *vma;
>  
> -		capture_bo(err++, obj);
> -		if (++i == count)
> +		if (err == last)
>  			break;
> +
> +		list_for_each_entry(vma, &obj->vma_list, vma_link)
> +			if (vma->vm == vm && vma->pin_count > 0) {
> +				capture_bo(err++, vma);
> +				break;
> +			}
>  	}
>  
> -	return i;
> +	return err - first;
>  }
>  
>  /* Generate a semi-unique error code. The code is not meant to have meaning, The
> @@ -1053,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>  	list_for_each_entry(vma, &vm->active_list, mm_list)
>  		i++;
>  	error->active_bo_count[ndx] = i;
> -	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> -		if (i915_gem_obj_is_pinned(obj))
> -			i++;
> +
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> +		list_for_each_entry(vma, &obj->vma_list, vma_link)
> +			if (vma->vm == vm && vma->pin_count > 0) {
> +				i++;
> +				break;
> +			}
> +	}
>  	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
>  
>  	if (i) {
> @@ -1074,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>  		error->pinned_bo_count[ndx] =
>  			capture_pinned_bo(pinned_bo,
>  					  error->pinned_bo_count[ndx],
> -					  &dev_priv->mm.bound_list);
> +					  &dev_priv->mm.bound_list, vm);
>  	error->active_bo[ndx] = active_bo;
>  	error->pinned_bo[ndx] = pinned_bo;
>  }
> @@ -1095,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
>  	error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
>  					 GFP_ATOMIC);
>  
> -	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
> -		i915_gem_capture_vm(dev_priv, error, vm, i++);
> +	if (error->active_bo == NULL ||
> +	    error->pinned_bo == NULL ||
> +	    error->active_bo_count == NULL ||
> +	    error->pinned_bo_count == NULL) {
> +		kfree(error->active_bo);
> +		kfree(error->active_bo_count);
> +		kfree(error->pinned_bo);
> +		kfree(error->pinned_bo_count);
> +
> +		error->active_bo = NULL;
> +		error->active_bo_count = NULL;
> +		error->pinned_bo = NULL;
> +		error->pinned_bo_count = NULL;
> +	} else {
> +		list_for_each_entry(vm, &dev_priv->vm_list, global_link)
> +			i915_gem_capture_vm(dev_priv, error, vm, i++);
> +
> +		error->vm_count = cnt;
> +	}
>  }
>  
>  /* Capture all registers which don't fit into another category. */
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
  2014-08-13 14:50 ` [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Mika Kuoppala
@ 2014-08-14  6:50   ` Chris Wilson
  2014-08-14 10:18     ` Mika Kuoppala
  0 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2014-08-14  6:50 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > The current error state harks back to the era of just a single VM. For
> > full-ppgtt, we capture every bo on every VM. It behoves us to then print
> > every bo for every VM, which we currently fail to do and so miss vital
> > information in the error state.
> >
> > v2: Use the vma address rather than -1!
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Offsets can collide between different vm areas.
> 
> If we add vm index also to the captured batchbuffer objects,
> we could print it part of the offset '%d:0x%x' that would easily
> identify vm and we would immediately see what vm was active on a ring.

The offsets are printed out per-vm. You want to be more specific in your
complaint. Based on earlier discussion, I think you just want to know
the guilty vm.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
  2014-08-14  6:50   ` Chris Wilson
@ 2014-08-14 10:18     ` Mika Kuoppala
  2014-08-14 15:03       ` Daniel Vetter
  0 siblings, 1 reply; 20+ messages in thread
From: Mika Kuoppala @ 2014-08-14 10:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote:
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > The current error state harks back to the era of just a single VM. For
>> > full-ppgtt, we capture every bo on every VM. It behoves us to then print
>> > every bo for every VM, which we currently fail to do and so miss vital
>> > information in the error state.
>> >
>> > v2: Use the vma address rather than -1!
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> 
>> Offsets can collide between different vm areas.
>> 
>> If we add vm index also to the captured batchbuffer objects,
>> we could print it part of the offset '%d:0x%x' that would easily
>> identify vm and we would immediately see what vm was active on a ring.
>
> The offsets are printed out per-vm. You want to be more specific in your
> complaint. Based on earlier discussion, I think you just want to know
> the guilty vm.
> -Chris

Yes. And it can be done as a follow up too.

1/5:
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture
  2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
@ 2014-08-14 14:51   ` Mika Kuoppala
  2014-08-14 19:35     ` Chris Wilson
  2014-08-15 11:11   ` Mika Kuoppala
  1 sibling, 1 reply; 20+ messages in thread
From: Mika Kuoppala @ 2014-08-14 14:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> For stolen pages, since it is verboten to access them directly on many
> architectures, we have to read them through the GTT aperture. If they
> are not accessible through the aperture, then we have to abort.
>
> This was complicated by
>
> commit 8b6124a633d8095b0c8364f585edff9c59568a96
> Author: Chris Wilson <chris@chris-wilson.co.uk>
> Date:   Thu Jan 30 14:38:16 2014 +0000
>
>     drm/i915: Don't access snooped pages through the GTT (even for error capture)
>
> and the desire to use stolen memory for ringbuffers, contexts and
> batches in the future.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 50 ++++++++++++++++++++++-------------
>  1 file changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 35e70d5..6d280c07 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -561,10 +561,11 @@ static struct drm_i915_error_object *
>  i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  			       struct drm_i915_gem_object *src,
>  			       struct i915_address_space *vm,
> -			       const int num_pages)
> +			       int num_pages)
>  {
>  	struct drm_i915_error_object *dst;
> -	int i;
> +	bool use_ggtt;
> +	int i = 0;
>  	u32 reloc_offset;
>  
>  	if (src == NULL || src->pages == NULL)
> @@ -574,8 +575,32 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  	if (dst == NULL)
>  		return NULL;
>  
> -	reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
> -	for (i = 0; i < num_pages; i++) {
> +	dst->gtt_offset = i915_gem_obj_offset(src, vm);
> +
> +	reloc_offset = dst->gtt_offset;
> +	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> +		    i915_is_ggtt(vm) &&
> +		    src->has_global_gtt_mapping &&
> +		    reloc_offset + num_pages * PAGE_SIZE <= dev_priv->gtt.mappable_end);
> +
> +	/* Cannot access stolen address directly, try to use the aperture */
> +	if (src->stolen) {
> +		use_ggtt = true;
> +
> +		if (!src->has_global_gtt_mapping)
> +			goto unwind;
> +
> +		reloc_offset = i915_gem_obj_ggtt_offset(src);
> +		if (reloc_offset + num_pages * PAGE_SIZE > dev_priv->gtt.mappable_end)
> +			goto unwind;
> +	}
> +
> +	/* Cannot access snooped pages through the aperture */
> +	if (use_ggtt && src->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv->dev))
> +		goto unwind;

Why do we need to bail out if we dont have LLC ?


> +	dst->page_count = num_pages;
> +	while (num_pages--) {
>  		unsigned long flags;
>  		void *d;
>  
> @@ -584,10 +609,7 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  			goto unwind;
>  
>  		local_irq_save(flags);
> -		if (src->cache_level == I915_CACHE_NONE &&
> -		    reloc_offset < dev_priv->gtt.mappable_end &&
> -		    src->has_global_gtt_mapping &&
> -		    i915_is_ggtt(vm)) {
> +		if (use_ggtt) {
>  			void __iomem *s;
>  
>  			/* Simply ignore tiling or any overlapping fence.
> @@ -599,14 +621,6 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  						     reloc_offset);
>  			memcpy_fromio(d, s, PAGE_SIZE);
>  			io_mapping_unmap_atomic(s);
> -		} else if (src->stolen) {
> -			unsigned long offset;
> -
> -			offset = dev_priv->mm.stolen_base;
> -			offset += src->stolen->start;
> -			offset += i << PAGE_SHIFT;
> -
> -			memcpy_fromio(d, (void __iomem *) offset, PAGE_SIZE);
>  		} else {
>  			struct page *page;
>  			void *s;
> @@ -623,11 +637,9 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  		}
>  		local_irq_restore(flags);
>  
> -		dst->pages[i] = d;
> -
> +		dst->pages[i++] = d;
>  		reloc_offset += PAGE_SIZE;
>  	}
> -	dst->page_count = num_pages;
>  
>  	return dst;
>  
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
  2014-08-14 10:18     ` Mika Kuoppala
@ 2014-08-14 15:03       ` Daniel Vetter
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Vetter @ 2014-08-14 15:03 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Thu, Aug 14, 2014 at 01:18:46PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote:
> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >> 
> >> > The current error state harks back to the era of just a single VM. For
> >> > full-ppgtt, we capture every bo on every VM. It behoves us to then print
> >> > every bo for every VM, which we currently fail to do and so miss vital
> >> > information in the error state.
> >> >
> >> > v2: Use the vma address rather than -1!
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> 
> >> Offsets can collide between different vm areas.
> >> 
> >> If we add vm index also to the captured batchbuffer objects,
> >> we could print it part of the offset '%d:0x%x' that would easily
> >> identify vm and we would immediately see what vm was active on a ring.
> >
> > The offsets are printed out per-vm. You want to be more specific in your
> > complaint. Based on earlier discussion, I think you just want to know
> > the guilty vm.
> > -Chris
> 
> Yes. And it can be done as a follow up too.
> 
> 1/5:
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture
  2014-08-14 14:51   ` Mika Kuoppala
@ 2014-08-14 19:35     ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2014-08-14 19:35 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Thu, Aug 14, 2014 at 05:51:48PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > For stolen pages, since it is verboten to access them directly on many
> > architectures, we have to read them through the GTT aperture. If they
> > are not accessible through the aperture, then we have to abort.
> >
> > This was complicated by
> >
> > commit 8b6124a633d8095b0c8364f585edff9c59568a96
> > Author: Chris Wilson <chris@chris-wilson.co.uk>
> > Date:   Thu Jan 30 14:38:16 2014 +0000
> >
> >     drm/i915: Don't access snooped pages through the GTT (even for error capture)
> >
> > and the desire to use stolen memory for ringbuffers, contexts and
> > batches in the future.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gpu_error.c | 50 ++++++++++++++++++++++-------------
> >  1 file changed, 31 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index 35e70d5..6d280c07 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -561,10 +561,11 @@ static struct drm_i915_error_object *
> >  i915_error_object_create_sized(struct drm_i915_private *dev_priv,
> >  			       struct drm_i915_gem_object *src,
> >  			       struct i915_address_space *vm,
> > -			       const int num_pages)
> > +			       int num_pages)
> >  {
> >  	struct drm_i915_error_object *dst;
> > -	int i;
> > +	bool use_ggtt;
> > +	int i = 0;
> >  	u32 reloc_offset;
> >  
> >  	if (src == NULL || src->pages == NULL)
> > @@ -574,8 +575,32 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
> >  	if (dst == NULL)
> >  		return NULL;
> >  
> > -	reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
> > -	for (i = 0; i < num_pages; i++) {
> > +	dst->gtt_offset = i915_gem_obj_offset(src, vm);
> > +
> > +	reloc_offset = dst->gtt_offset;
> > +	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> > +		    i915_is_ggtt(vm) &&
> > +		    src->has_global_gtt_mapping &&
> > +		    reloc_offset + num_pages * PAGE_SIZE <= dev_priv->gtt.mappable_end);
> > +
> > +	/* Cannot access stolen address directly, try to use the aperture */
> > +	if (src->stolen) {
> > +		use_ggtt = true;
> > +
> > +		if (!src->has_global_gtt_mapping)
> > +			goto unwind;
> > +
> > +		reloc_offset = i915_gem_obj_ggtt_offset(src);
> > +		if (reloc_offset + num_pages * PAGE_SIZE > dev_priv->gtt.mappable_end)
> > +			goto unwind;
> > +	}
> > +
> > +	/* Cannot access snooped pages through the aperture */
> > +	if (use_ggtt && src->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv->dev))
> > +		goto unwind;
> 
> Why do we need to bail out if we dont have LLC ?

It is verboten to access snooped PTEs through the GTT. (As in it will
hang some machines.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture
  2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
  2014-08-14 14:51   ` Mika Kuoppala
@ 2014-08-15 11:11   ` Mika Kuoppala
  2014-08-15 18:07     ` Mika Kuoppala
  1 sibling, 1 reply; 20+ messages in thread
From: Mika Kuoppala @ 2014-08-15 11:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> For stolen pages, since it is verboten to access them directly on many
> architectures, we have to read them through the GTT aperture. If they
> are not accessible through the aperture, then we have to abort.
>
> This was complicated by
>
> commit 8b6124a633d8095b0c8364f585edff9c59568a96
> Author: Chris Wilson <chris@chris-wilson.co.uk>
> Date:   Thu Jan 30 14:38:16 2014 +0000
>
>     drm/i915: Don't access snooped pages through the GTT (even for error capture)
>
> and the desire to use stolen memory for ringbuffers, contexts and
> batches in the future.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 50 ++++++++++++++++++++++-------------
>  1 file changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 35e70d5..6d280c07 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -561,10 +561,11 @@ static struct drm_i915_error_object *
>  i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  			       struct drm_i915_gem_object *src,
>  			       struct i915_address_space *vm,
> -			       const int num_pages)
> +			       int num_pages)
>  {
>  	struct drm_i915_error_object *dst;
> -	int i;
> +	bool use_ggtt;
> +	int i = 0;
>  	u32 reloc_offset;
>  
>  	if (src == NULL || src->pages == NULL)
> @@ -574,8 +575,32 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  	if (dst == NULL)
>  		return NULL;
>  
> -	reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
> -	for (i = 0; i < num_pages; i++) {
> +	dst->gtt_offset = i915_gem_obj_offset(src, vm);
> +
> +	reloc_offset = dst->gtt_offset;
> +	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
                     
Take this cache level check out so that we end up doing the
snoopable check for non stolen objects too?

-Mika

> +		    i915_is_ggtt(vm) &&
> +		    src->has_global_gtt_mapping &&
> +		    reloc_offset + num_pages * PAGE_SIZE <= dev_priv->gtt.mappable_end);
> +
> +	/* Cannot access stolen address directly, try to use the aperture */
> +	if (src->stolen) {
> +		use_ggtt = true;
> +
> +		if (!src->has_global_gtt_mapping)
> +			goto unwind;
> +
> +		reloc_offset = i915_gem_obj_ggtt_offset(src);
> +		if (reloc_offset + num_pages * PAGE_SIZE > dev_priv->gtt.mappable_end)
> +			goto unwind;
> +	}
> +
> +	/* Cannot access snooped pages through the aperture */
> +	if (use_ggtt && src->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv->dev))
> +		goto unwind;
> +
> +	dst->page_count = num_pages;
> +	while (num_pages--) {
>  		unsigned long flags;
>  		void *d;
>  
> @@ -584,10 +609,7 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  			goto unwind;
>  
>  		local_irq_save(flags);
> -		if (src->cache_level == I915_CACHE_NONE &&
> -		    reloc_offset < dev_priv->gtt.mappable_end &&
> -		    src->has_global_gtt_mapping &&
> -		    i915_is_ggtt(vm)) {
> +		if (use_ggtt) {
>  			void __iomem *s;
>  
>  			/* Simply ignore tiling or any overlapping fence.
> @@ -599,14 +621,6 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  						     reloc_offset);
>  			memcpy_fromio(d, s, PAGE_SIZE);
>  			io_mapping_unmap_atomic(s);
> -		} else if (src->stolen) {
> -			unsigned long offset;
> -
> -			offset = dev_priv->mm.stolen_base;
> -			offset += src->stolen->start;
> -			offset += i << PAGE_SHIFT;
> -
> -			memcpy_fromio(d, (void __iomem *) offset, PAGE_SIZE);
>  		} else {
>  			struct page *page;
>  			void *s;
> @@ -623,11 +637,9 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  		}
>  		local_irq_restore(flags);
>  
> -		dst->pages[i] = d;
> -
> +		dst->pages[i++] = d;
>  		reloc_offset += PAGE_SIZE;
>  	}
> -	dst->page_count = num_pages;
>  
>  	return dst;
>  
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture
  2014-08-15 11:11   ` Mika Kuoppala
@ 2014-08-15 18:07     ` Mika Kuoppala
  0 siblings, 0 replies; 20+ messages in thread
From: Mika Kuoppala @ 2014-08-15 18:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:

> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
>> For stolen pages, since it is verboten to access them directly on many
>> architectures, we have to read them through the GTT aperture. If they
>> are not accessible through the aperture, then we have to abort.
>>
>> This was complicated by
>>
>> commit 8b6124a633d8095b0c8364f585edff9c59568a96
>> Author: Chris Wilson <chris@chris-wilson.co.uk>
>> Date:   Thu Jan 30 14:38:16 2014 +0000
>>
>>     drm/i915: Don't access snooped pages through the GTT (even for error capture)
>>
>> and the desire to use stolen memory for ringbuffers, contexts and
>> batches in the future.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>  drivers/gpu/drm/i915/i915_gpu_error.c | 50 ++++++++++++++++++++++-------------
>>  1 file changed, 31 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 35e70d5..6d280c07 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -561,10 +561,11 @@ static struct drm_i915_error_object *
>>  i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>>  			       struct drm_i915_gem_object *src,
>>  			       struct i915_address_space *vm,
>> -			       const int num_pages)
>> +			       int num_pages)
>>  {
>>  	struct drm_i915_error_object *dst;
>> -	int i;
>> +	bool use_ggtt;
>> +	int i = 0;
>>  	u32 reloc_offset;
>>  
>>  	if (src == NULL || src->pages == NULL)
>> @@ -574,8 +575,32 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>>  	if (dst == NULL)
>>  		return NULL;
>>  
>> -	reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
>> -	for (i = 0; i < num_pages; i++) {
>> +	dst->gtt_offset = i915_gem_obj_offset(src, vm);
>> +
>> +	reloc_offset = dst->gtt_offset;
>> +	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
>                      
> Take this cache level check out so that we end up doing the
> snoopable check for non stolen objects too?

This _is_ the snoopable check for non stolen objects.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>


> -Mika
>
>> +		    i915_is_ggtt(vm) &&
>> +		    src->has_global_gtt_mapping &&
>> +		    reloc_offset + num_pages * PAGE_SIZE <= dev_priv->gtt.mappable_end);
>> +
>> +	/* Cannot access stolen address directly, try to use the aperture */
>> +	if (src->stolen) {
>> +		use_ggtt = true;
>> +
>> +		if (!src->has_global_gtt_mapping)
>> +			goto unwind;
>> +
>> +		reloc_offset = i915_gem_obj_ggtt_offset(src);
>> +		if (reloc_offset + num_pages * PAGE_SIZE > dev_priv->gtt.mappable_end)
>> +			goto unwind;
>> +	}
>> +
>> +	/* Cannot access snooped pages through the aperture */
>> +	if (use_ggtt && src->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv->dev))
>> +		goto unwind;
>> +
>> +	dst->page_count = num_pages;
>> +	while (num_pages--) {
>>  		unsigned long flags;
>>  		void *d;
>>  
>> @@ -584,10 +609,7 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>>  			goto unwind;
>>  
>>  		local_irq_save(flags);
>> -		if (src->cache_level == I915_CACHE_NONE &&
>> -		    reloc_offset < dev_priv->gtt.mappable_end &&
>> -		    src->has_global_gtt_mapping &&
>> -		    i915_is_ggtt(vm)) {
>> +		if (use_ggtt) {
>>  			void __iomem *s;
>>  
>>  			/* Simply ignore tiling or any overlapping fence.
>> @@ -599,14 +621,6 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>>  						     reloc_offset);
>>  			memcpy_fromio(d, s, PAGE_SIZE);
>>  			io_mapping_unmap_atomic(s);
>> -		} else if (src->stolen) {
>> -			unsigned long offset;
>> -
>> -			offset = dev_priv->mm.stolen_base;
>> -			offset += src->stolen->start;
>> -			offset += i << PAGE_SHIFT;
>> -
>> -			memcpy_fromio(d, (void __iomem *) offset, PAGE_SIZE);
>>  		} else {
>>  			struct page *page;
>>  			void *s;
>> @@ -623,11 +637,9 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>>  		}
>>  		local_irq_restore(flags);
>>  
>> -		dst->pages[i] = d;
>> -
>> +		dst->pages[i++] = d;
>>  		reloc_offset += PAGE_SIZE;
>>  	}
>> -	dst->page_count = num_pages;
>>  
>>  	return dst;
>>  
>> -- 
>> 1.9.1
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create()
  2014-08-12 19:05 ` [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create() Chris Wilson
@ 2014-08-15 18:07   ` Mika Kuoppala
  0 siblings, 0 replies; 20+ messages in thread
From: Mika Kuoppala @ 2014-08-15 18:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> For cleanliness, i915_error_object_create() was written to handle the
> NULL pointer in a central location. The macro that wrapped it and passed
> it a num_pages to use, was not safe. As we now never limit the num_pages
> to use (we did so at one point to only capture the first page of the
> context), we can remove the redundant macro and be NULL safe again.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: John Harrison <John.C.Harrison@Intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>                                                                                                                                                        

> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 25 ++++++++++---------------
>  1 file changed, 10 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6d280c07..726e6b1 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -558,12 +558,12 @@ static void i915_error_state_free(struct kref *error_ref)
>  }
>  
>  static struct drm_i915_error_object *
> -i915_error_object_create_sized(struct drm_i915_private *dev_priv,
> -			       struct drm_i915_gem_object *src,
> -			       struct i915_address_space *vm,
> -			       int num_pages)
> +i915_error_object_create(struct drm_i915_private *dev_priv,
> +			 struct drm_i915_gem_object *src,
> +			 struct i915_address_space *vm)
>  {
>  	struct drm_i915_error_object *dst;
> +	int num_pages;
>  	bool use_ggtt;
>  	int i = 0;
>  	u32 reloc_offset;
> @@ -571,6 +571,8 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  	if (src == NULL || src->pages == NULL)
>  		return NULL;
>  
> +	num_pages = src->base.size >> PAGE_SHIFT;
> +
>  	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
>  	if (dst == NULL)
>  		return NULL;
> @@ -649,13 +651,8 @@ unwind:
>  	kfree(dst);
>  	return NULL;
>  }
> -#define i915_error_object_create(dev_priv, src, vm) \
> -	i915_error_object_create_sized((dev_priv), (src), (vm), \
> -				       (src)->base.size>>PAGE_SHIFT)
> -
>  #define i915_error_ggtt_object_create(dev_priv, src) \
> -	i915_error_object_create_sized((dev_priv), (src), &(dev_priv)->gtt.base, \
> -				       (src)->base.size>>PAGE_SHIFT)
> +	i915_error_object_create((dev_priv), (src), &(dev_priv)->gtt.base)
>  
>  static void capture_bo(struct drm_i915_error_buffer *err,
>  		       struct i915_vma *vma)
> @@ -1004,8 +1001,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  							 request->batch_obj,
>  							 vm);
>  
> -			if (HAS_BROKEN_CS_TLB(dev_priv->dev) &&
> -			    ring->scratch.obj)
> +			if (HAS_BROKEN_CS_TLB(dev_priv->dev))
>  				error->ring[i].wa_batchbuffer =
>  					i915_error_ggtt_object_create(dev_priv,
>  							     ring->scratch.obj);
> @@ -1027,9 +1023,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		error->ring[i].ringbuffer =
>  			i915_error_ggtt_object_create(dev_priv, ring->buffer->obj);
>  
> -		if (ring->status_page.obj)
> -			error->ring[i].hws_page =
> -				i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
> +		error->ring[i].hws_page =
> +			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
>  
>  		i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang
  2014-08-12 19:05 ` [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang Chris Wilson
@ 2014-08-15 18:09   ` Mika Kuoppala
  2014-08-25 21:27     ` Daniel Vetter
  0 siblings, 1 reply; 20+ messages in thread
From: Mika Kuoppala @ 2014-08-15 18:09 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>                                                                                                                                                        

> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 726e6b1..1e05414 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -577,7 +577,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  	if (dst == NULL)
>  		return NULL;
>  
> -	dst->gtt_offset = i915_gem_obj_offset(src, vm);
> +	if (i915_gem_obj_bound(src, vm))
> +		dst->gtt_offset = i915_gem_obj_offset(src, vm);
> +	else
> +		dst->gtt_offset = -1;
>  
>  	reloc_offset = dst->gtt_offset;
>  	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang
  2014-08-15 18:09   ` Mika Kuoppala
@ 2014-08-25 21:27     ` Daniel Vetter
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Vetter @ 2014-08-25 21:27 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Fri, Aug 15, 2014 at 09:09:10PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

Ok, pulled in the error state fixes Mika reviewed, thanks. Now the big
question: Is the s/seqno/request/ patch ready for prime-time?

... I'll probably known the answer once I'm through with my intel-gfx
backlog ...

Cheers, Daniel
> 
> > ---
> >  drivers/gpu/drm/i915/i915_gpu_error.c | 5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index 726e6b1..1e05414 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -577,7 +577,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
> >  	if (dst == NULL)
> >  		return NULL;
> >  
> > -	dst->gtt_offset = i915_gem_obj_offset(src, vm);
> > +	if (i915_gem_obj_bound(src, vm))
> > +		dst->gtt_offset = i915_gem_obj_offset(src, vm);
> > +	else
> > +		dst->gtt_offset = -1;
> >  
> >  	reloc_offset = dst->gtt_offset;
> >  	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> > -- 
> > 1.9.1
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects
  2014-08-12 19:05 ` [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
@ 2014-08-27  9:55   ` Daniel Vetter
  2014-08-27 10:39     ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Vetter @ 2014-08-27  9:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx, Brad Volkin

On Tue, Aug 12, 2014 at 08:05:51PM +0100, Chris Wilson wrote:
> At the heart of this change is that the seqno is a too low level of an
> abstraction to handle the growing complexities of command tracking, both
> with the introduction of multiple command queues with execbuffer and the
> potential for reordering with a scheduler. On top of the seqno we have
> the request. Conceptually this is just a fence, but it also has
> substantial bookkeeping of its own in order to track the context and
> batch in flight, for example. It is the central structure upon which we
> can extend with dependency tracking et al.
> 
> As regards the objects, they were using the seqno as a simple fence,
> upon which is check or even wait upon for command completion. This patch
> exchanges that seqno/ring pair with the request itself. For the
> majority, lifetime of the request is ordered by how we retire objects
> then requests. However, both the unlocked waits and probing elsewhere do
> not tie into the normal request lifetimes and so we need to introduce a
> kref. Extending the objects to use the request as the fence naturally
> extends to segregrating read/write fence tracking. This has significance
> for it reduces the number of semaphores we need to emit, reducing the
> likelihood of #54226, and improving performance overall.
> 
> v2: Rebase and split out the othogonal tweaks.
> 
> A silly happened with this patch. It seemed to nullify our earlier
> seqno-vs-interrupt w/a. I could not spot why, but gen6+ started to fail
> with missed interrupts (a good test of our robustness handling). So I
> ripped out the existing ACTHD read and replaced it with a RING_HEAD to
> manually check whether the request is complete. That also had the nice
> consequence of forcing __wait_request() to being the central arbiter of
> request completion.
> 
> The keener eyed reviewr will also spot that the reset_counter is moved
> into the request simplifing __wait_request() callsites and reducing the
> number of atomic reads by virtue of moving the check for a pending GPU
> reset to the endpoints of GPU access.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Brad Volkin <bradley.d.volkin@intel.com>
> Cc: "Kukanova, Svetlana" <svetlana.kukanova@intel.com>

So I've tried to split this up and totally failed. Non-complete list of
things I didn't manage to untangle:

- The mmio flip refactoring.
- The overlay request tracking refactoring.
- The switch to multiple parallel readers with the resulting cascading
  changes all over.
- The missed irq w/a prep changes. It's easy to split out the change to
  re-add the rc6 reference and to ditch the ACT_HEAD read, but the commit
  message talks about instead reading the RING_HEAD, and I just didn't
  spot the changes relevant to that in this big diff. Was probably looking
  in the wrong place.
- The move_to_active/retire refactoring. There's a pile of code movement
  in there, but I couldn't spot really what's just refactoring and what is
  real changed needed for the s/seqno/request/ change.
- De-duping some of the logical_ring_ functions. Spotted because it
  conflicted (but was easy to hack around), still this shouldn't really be
  part of this.

Things I've spotted which could be split out but amount to a decent
rewrite of the patch:
- Getting at the ring of the last write to an object. Although I guess
  without the multi-reader stuff and the pageflip refactoring that would
  pretty much disappear.
- Probably similar helpers for seqno if we don't switch to parallel writes
  in the same patch.

Splitting out the renames was easy, but that reduced the diff by less than
5% in size. So didn't help in reviewing the patch at all.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c          |  29 +-
>  drivers/gpu/drm/i915/i915_dma.c              |   2 +
>  drivers/gpu/drm/i915/i915_drv.h              | 121 ++--
>  drivers/gpu/drm/i915/i915_gem.c              | 850 +++++++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
>  drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c        |  36 +-
>  drivers/gpu/drm/i915/i915_irq.c              |  28 +-
>  drivers/gpu/drm/i915/i915_trace.h            |   4 +-
>  drivers/gpu/drm/i915/intel_display.c         | 151 ++---
>  drivers/gpu/drm/i915/intel_drv.h             |   8 +-
>  drivers/gpu/drm/i915/intel_lrc.c             | 115 +---
>  drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c      | 164 +++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  19 +-
>  17 files changed, 922 insertions(+), 786 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d42db6b..604a73a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
>  static void
>  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  {
> +	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>  	struct i915_vma *vma;
>  	int pin_count = 0;
>  
> -	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
> +	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
>  		   &obj->base,
>  		   get_pin_flag(obj),
>  		   get_tiling_flag(obj),
> @@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  		   obj->base.size / 1024,
>  		   obj->base.read_domains,
>  		   obj->base.write_domain,
> -		   obj->last_read_seqno,
> -		   obj->last_write_seqno,
> -		   obj->last_fenced_seqno,
> +		   i915_request_seqno(rq),
> +		   i915_request_seqno(obj->last_write.request),
> +		   i915_request_seqno(obj->last_fence.request),
>  		   i915_cache_level_str(obj->cache_level),
>  		   obj->dirty ? " dirty" : "",
>  		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> @@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  		*t = '\0';
>  		seq_printf(m, " (%s mappable)", s);
>  	}
> -	if (obj->ring != NULL)
> -		seq_printf(m, " (%s)", obj->ring->name);
> +	if (rq)
> +		seq_printf(m, " (%s)", rq->ring->name);
>  	if (obj->frontbuffer_bits)
>  		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
>  }
> @@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  			if (ppgtt->file_priv != stats->file_priv)
>  				continue;
>  
> -			if (obj->ring) /* XXX per-vma statistic */
> +			if (obj->active) /* XXX per-vma statistic */
>  				stats->active += obj->base.size;
>  			else
>  				stats->inactive += obj->base.size;
> @@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>  	} else {
>  		if (i915_gem_obj_ggtt_bound(obj)) {
>  			stats->global += obj->base.size;
> -			if (obj->ring)
> +			if (obj->active)
>  				stats->active += obj->base.size;
>  			else
>  				stats->inactive += obj->base.size;
> @@ -574,7 +575,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  	struct drm_device *dev = node->minor->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring;
> -	struct drm_i915_gem_request *gem_request;
> +	struct i915_gem_request *rq;
>  	int ret, count, i;
>  
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
> @@ -587,12 +588,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  			continue;
>  
>  		seq_printf(m, "%s requests:\n", ring->name);
> -		list_for_each_entry(gem_request,
> -				    &ring->request_list,
> -				    list) {
> +		list_for_each_entry(rq, &ring->request_list, list) {
>  			seq_printf(m, "    %d @ %d\n",
> -				   gem_request->seqno,
> -				   (int) (jiffies - gem_request->emitted_jiffies));
> +				   rq->seqno,
> +				   (int)(jiffies - rq->emitted_jiffies));
>  		}
>  		count++;
>  	}
> @@ -609,7 +608,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
>  {
>  	if (ring->get_seqno) {
>  		seq_printf(m, "Current sequence (%s): %u\n",
> -			   ring->name, ring->get_seqno(ring, false));
> +			   ring->name, ring->get_seqno(ring));
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 04dd611..ba7f15c 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1598,6 +1598,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  	/* For the ugly agnostic INTEL_INFO macro */
>  	BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev));
>  
> +	BUILD_BUG_ON(I915_NUM_RINGS >= (1 << I915_NUM_RING_BITS));
> +
>  	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
>  	if (dev_priv == NULL)
>  		return -ENOMEM;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e0dcd70..c3563a0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -191,6 +191,7 @@ enum hpd_pin {
>  
>  struct drm_i915_private;
>  struct i915_mmu_object;
> +struct i915_gem_request;
>  
>  enum intel_dpll_id {
>  	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -1740,16 +1741,15 @@ struct drm_i915_gem_object {
>  	struct drm_mm_node *stolen;
>  	struct list_head global_list;
>  
> -	struct list_head ring_list;
>  	/** Used in execbuf to temporarily hold a ref */
>  	struct list_head obj_exec_link;
>  
>  	/**
>  	 * This is set if the object is on the active lists (has pending
> -	 * rendering and so a non-zero seqno), and is not set if it i s on
> -	 * inactive (ready to be unbound) list.
> +	 * rendering and so a submitted request), and is not set if it is on
> +	 * inactive (ready to be unbound) list. We track activity per engine.
>  	 */
> -	unsigned int active:1;
> +	unsigned int active:I915_NUM_RING_BITS;
>  
>  	/**
>  	 * This is set if the object has been written to since last bound
> @@ -1817,13 +1817,11 @@ struct drm_i915_gem_object {
>  	void *dma_buf_vmapping;
>  	int vmapping_count;
>  
> -	struct intel_engine_cs *ring;
> -
> -	/** Breadcrumb of last rendering to the buffer. */
> -	uint32_t last_read_seqno;
> -	uint32_t last_write_seqno;
> -	/** Breadcrumb of last fenced GPU access to the buffer. */
> -	uint32_t last_fenced_seqno;
> +	/** Breadcrumbs of last rendering to the buffer. */
> +	struct {
> +		struct i915_gem_request *request;
> +		struct list_head ring_list;
> +	} last_write, last_read[I915_NUM_RINGS], last_fence;
>  
>  	/** Current tiling stride for the object, if it's tiled. */
>  	uint32_t stride;
> @@ -1856,6 +1854,8 @@ struct drm_i915_gem_object {
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
> +
>  void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  		       struct drm_i915_gem_object *new,
>  		       unsigned frontbuffer_bits);
> @@ -1870,10 +1870,14 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>   * sequence-number comparisons on buffer last_rendering_seqnos, and associate
>   * an emission time with seqnos for tracking how far ahead of the GPU we are.
>   */
> -struct drm_i915_gem_request {
> +struct i915_gem_request {
> +	struct kref kref;
> +
>  	/** On Which ring this request was generated */
>  	struct intel_engine_cs *ring;
>  
> +	unsigned reset_counter;
> +
>  	/** GEM sequence number associated with this request. */
>  	uint32_t seqno;
>  
> @@ -1898,8 +1902,64 @@ struct drm_i915_gem_request {
>  	struct drm_i915_file_private *file_priv;
>  	/** file_priv list entry for this request */
>  	struct list_head client_list;
> +
> +	bool completed:1;
>  };
>  
> +static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->ring : NULL;
> +}
> +
> +static inline int i915_request_ring_id(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->ring->id : -1;
> +}
> +
> +static inline u32 i915_request_seqno(struct i915_gem_request *rq)
> +{
> +	return rq ? rq->seqno : 0;
> +}
> +
> +/**
> + * Returns true if seq1 is later than seq2.
> + */
> +static inline bool
> +__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> +{
> +	return (int32_t)(seq1 - seq2) >= 0;
> +}
> +
> +static inline bool
> +i915_request_complete(struct i915_gem_request *rq)
> +{
> +	if (!rq->completed &&
> +	    __i915_seqno_passed(rq->ring->get_seqno(rq->ring),
> +				rq->seqno))
> +		rq->completed = true;
> +	return rq->completed;
> +}
> +
> +static inline struct i915_gem_request *
> +i915_request_get(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_get(&rq->kref);
> +	return rq;
> +}
> +
> +void __i915_request_free(struct kref *kref);
> +
> +struct i915_gem_request *i915_gem_seqno_to_request(struct intel_engine_cs *ring,
> +						   u32 seqno);
> +
> +static inline void
> +i915_request_put(struct i915_gem_request *rq)
> +{
> +	if (rq)
> +		kref_put(&rq->kref, __i915_request_free);
> +}
> +
>  struct drm_i915_file_private {
>  	struct drm_i915_private *dev_priv;
>  	struct drm_file *file;
> @@ -2368,22 +2428,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>  
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -			 struct intel_engine_cs *to);
> +			 struct intel_engine_cs *to,
> +			 bool readonly);
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring);
> +			     struct intel_engine_cs *ring,
> +			     unsigned fenced);
> +#define VMA_IS_FENCED 0x1
> +#define VMA_HAS_FENCE 0x2
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>  			 struct drm_device *dev,
>  			 struct drm_mode_create_dumb *args);
>  int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
>  		      uint32_t handle, uint64_t *offset);
> -/**
> - * Returns true if seq1 is later than seq2.
> - */
> -static inline bool
> -i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> -{
> -	return (int32_t)(seq1 - seq2) >= 0;
> -}
>  
>  int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
>  int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
> @@ -2393,14 +2449,15 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
>  bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
>  void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring);
>  
>  bool i915_gem_retire_requests(struct drm_device *dev);
>  void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
>  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
> -				      bool interruptible);
> -int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
> +				      bool interruptible,
> +				      unsigned *reset_counter);
> +int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
>  
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
> @@ -2443,12 +2500,12 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
> -		       struct drm_i915_gem_object *batch_obj,
> -		       u32 *seqno);
> -#define i915_add_request(ring, seqno) \
> -	__i915_add_request(ring, NULL, NULL, seqno)
> -int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
> -				 uint32_t seqno);
> +		       struct drm_i915_gem_object *batch_obj);
> +#define i915_add_request(ring) \
> +	__i915_add_request(ring, NULL, NULL)
> +int __must_check i915_wait_request(struct i915_gem_request *rq);
> +int __i915_request_wait(struct i915_gem_request *rq,
> +			bool interruptible);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
>  int __must_check
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> @@ -2776,8 +2833,6 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
>  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
>  			       struct drm_file *file);
>  
> -void intel_notify_mmio_flip(struct intel_engine_cs *ring);
> -
>  /* overlay */
>  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
>  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6c2f0b8..9c8c881 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -44,9 +44,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
>  static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly);
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj);
> -
>  static void i915_gem_write_fence(struct drm_device *dev, int reg,
>  				 struct drm_i915_gem_object *obj);
>  static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
> @@ -108,6 +105,85 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
>  	spin_unlock(&dev_priv->mm.object_stat_lock);
>  }
>  
> +static void
> +i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> +{
> +	intel_fb_obj_flush(obj, true);
> +	obj->last_write.request = NULL;
> +	list_del_init(&obj->last_write.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
> +{
> +	obj->last_fence.request = NULL;
> +	list_del_init(&obj->last_fence.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
> +			     struct intel_engine_cs *ring)
> +{
> +	struct i915_vma *vma;
> +
> +	BUG_ON(obj->active == 0);
> +	BUG_ON(obj->base.write_domain);
> +
> +	obj->last_read[ring->id].request = NULL;
> +	list_del_init(&obj->last_read[ring->id].ring_list);
> +
> +	if (--obj->active)
> +		return;
> +
> +	BUG_ON(obj->last_write.request);
> +	BUG_ON(obj->last_fence.request);
> +
> +	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +		if (!list_empty(&vma->mm_list))
> +			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> +	}
> +
> +	drm_gem_object_unreference(&obj->base);
> +
> +	WARN_ON(i915_verify_lists(dev));
> +}
> +
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_request *rq;
> +	int i;
> +
> +	if (!obj->active)
> +		return;
> +
> +	rq = obj->last_write.request;
> +	if (rq && i915_request_complete(rq))
> +		i915_gem_object_retire__write(obj);
> +
> +	rq = obj->last_fence.request;
> +	if (rq && i915_request_complete(rq))
> +		i915_gem_object_retire__fence(obj);
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		rq = obj->last_read[i].request;
> +		if (rq && i915_request_complete(rq)) {
> +			/* Although we just checked these above, the hardware
> +			 * may have just completed them in the interval and
> +			 * to keep the request lifetimes correct, we must
> +			 * retire write/fence before read.
> +			 */
> +			if (i915_request_ring_id(obj->last_write.request) == i)
> +				i915_gem_object_retire__write(obj);
> +
> +			if (i915_request_ring_id(obj->last_fence.request) == i)
> +				i915_gem_object_retire__fence(obj);
> +
> +			i915_gem_object_retire__read(obj, rq->ring);
> +		}
> +	}
> +}
> +
>  static int
>  i915_gem_wait_for_error(struct i915_gpu_error *error)
>  {
> @@ -1073,9 +1149,12 @@ unlock:
>  
>  int
>  i915_gem_check_wedge(struct i915_gpu_error *error,
> -		     bool interruptible)
> +		     bool interruptible,
> +		     unsigned *reset_counter)
>  {
> -	if (i915_reset_in_progress(error)) {
> +	unsigned wedge = atomic_read(&error->reset_counter);
> +
> +	if (wedge & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)) {
>  		/* Non-interruptible callers can't handle -EAGAIN, hence return
>  		 * -EIO unconditionally for these. */
>  		if (!interruptible)
> @@ -1088,6 +1167,10 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>  		return -EAGAIN;
>  	}
>  
> +	if (*reset_counter && *reset_counter != wedge)
> +		return -EAGAIN;
> +
> +	*reset_counter = wedge;
>  	return 0;
>  }
>  
> @@ -1096,15 +1179,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>   * equal.
>   */
>  int
> -i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
> +i915_gem_check_olr(struct i915_gem_request *rq)
>  {
>  	int ret;
>  
> -	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +	BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
>  
>  	ret = 0;
> -	if (seqno == ring->outstanding_lazy_seqno)
> -		ret = i915_add_request(ring, NULL);
> +	if (rq == rq->ring->preallocated_request)
> +		ret = i915_add_request(rq->ring);
>  
>  	return ret;
>  }
> @@ -1128,32 +1211,50 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
>  	return !atomic_xchg(&file_priv->rps_wait_boost, true);
>  }
>  
> +static bool __i915_request_complete__wa(struct i915_gem_request *rq)
> +{
> +	struct intel_engine_cs *ring = rq->ring;
> +	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +	unsigned head, tail;
> +
> +	if (i915_request_complete(rq))
> +		return true;
> +
> +	/* Sadly not all architectures are coherent wrt to the seqno
> +	 * write being visible before the CPU is woken up by the
> +	 * interrupt. In order to avoid going to sleep without seeing
> +	 * the last seqno and never waking up again, we explicity check
> +	 * whether the ring has advanced past our request. The uncached
> +	 * register read (which requires waking the GT up) is pure brute
> +	 * force, and only just enough.
> +	 */
> +	head = __intel_ring_space(I915_READ_HEAD(ring) & HEAD_ADDR,
> +				  ring->buffer->tail, ring->buffer->size);
> +	tail = __intel_ring_space(rq->tail,
> +				  ring->buffer->tail, ring->buffer->size);
> +	if (head >= tail)
> +		rq->completed = true;
> +
> +	return rq->completed;
> +}
> +
>  /**
> - * __wait_seqno - wait until execution of seqno has finished
> - * @ring: the ring expected to report seqno
> - * @seqno: duh!
> - * @reset_counter: reset sequence associated with the given seqno
> + * __wait_request - wait until execution of request has finished
> + * @request: the request to wait upon
>   * @interruptible: do an interruptible wait (normally yes)
>   * @timeout: in - how long to wait (NULL forever); out - how much time remaining
>   *
> - * Note: It is of utmost importance that the passed in seqno and reset_counter
> - * values have been read by the caller in an smp safe manner. Where read-side
> - * locks are involved, it is sufficient to read the reset_counter before
> - * unlocking the lock that protects the seqno. For lockless tricks, the
> - * reset_counter _must_ be read before, and an appropriate smp_rmb must be
> - * inserted.
> - *
> - * Returns 0 if the seqno was found within the alloted time. Else returns the
> + * Returns 0 if the request was completed within the alloted time. Else returns the
>   * errno with remaining time filled in timeout argument.
>   */
> -static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> -			unsigned reset_counter,
> -			bool interruptible,
> -			struct timespec *timeout,
> -			struct drm_i915_file_private *file_priv)
> +static int __wait_request(struct i915_gem_request *rq,
> +			  bool interruptible,
> +			  struct timespec *timeout,
> +			  struct drm_i915_file_private *file_priv)
>  {
> +	struct intel_engine_cs *ring = rq->ring;
>  	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	const bool irq_test_in_progress =
>  		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
>  	struct timespec before, now;
> @@ -1163,7 +1264,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>  
>  	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
>  
> -	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> +	if (i915_request_complete(rq))
>  		return 0;
>  
>  	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
> @@ -1180,7 +1281,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>  		return -ENODEV;
>  
>  	/* Record current time in case interrupted by signal, or wedged */
> -	trace_i915_gem_request_wait_begin(ring, seqno);
> +	trace_i915_gem_request_wait_begin(ring, rq->seqno);
>  	getrawmonotonic(&before);
>  	for (;;) {
>  		struct timer_list timer;
> @@ -1190,19 +1291,12 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>  
>  		/* We need to check whether any gpu reset happened in between
>  		 * the caller grabbing the seqno and now ... */
> -		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
> -			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
> -			 * is truely gone. */
> -			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> -			if (ret == 0)
> -				ret = -EAGAIN;
> +		ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible, &rq->reset_counter);
> +		if (ret)
>  			break;
> -		}
>  
> -		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
> -			ret = 0;
> +		if (__i915_request_complete__wa(rq))
>  			break;
> -		}
>  
>  		if (interruptible && signal_pending(current)) {
>  			ret = -ERESTARTSYS;
> @@ -1231,7 +1325,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>  		}
>  	}
>  	getrawmonotonic(&now);
> -	trace_i915_gem_request_wait_end(ring, seqno);
> +	trace_i915_gem_request_wait_end(ring, rq->seqno);
>  
>  	if (!irq_test_in_progress)
>  		ring->irq_put(ring);
> @@ -1253,46 +1347,28 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
>   * request and object lists appropriately for that event.
>   */
>  int
> -i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
> +i915_wait_request(struct i915_gem_request *rq)
>  {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	bool interruptible = dev_priv->mm.interruptible;
> +	struct drm_device *dev = rq->ring->dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	int ret;
>  
> -	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> -	BUG_ON(seqno == 0);
> -
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> -	if (ret)
> -		return ret;
> +	if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
> +		return -EINVAL;
>  
> -	ret = i915_gem_check_olr(ring, seqno);
> +	ret = i915_gem_check_olr(rq);
>  	if (ret)
>  		return ret;
>  
> -	return __wait_seqno(ring, seqno,
> -			    atomic_read(&dev_priv->gpu_error.reset_counter),
> -			    interruptible, NULL, NULL);
> +	return __wait_request(rq, dev_priv->mm.interruptible,
> +			      NULL, NULL);
>  }
>  
> -static int
> -i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
> -				     struct intel_engine_cs *ring)
> +int
> +__i915_request_wait(struct i915_gem_request *rq,
> +		    bool interruptible)
>  {
> -	if (!obj->active)
> -		return 0;
> -
> -	/* Manually manage the write flush as we may have not yet
> -	 * retired the buffer.
> -	 *
> -	 * Note that the last_write_seqno is always the earlier of
> -	 * the two (read/write) seqno, so if we haved successfully waited,
> -	 * we know we have passed the last write.
> -	 */
> -	obj->last_write_seqno = 0;
> -
> -	return 0;
> +	return __wait_request(rq, interruptible, NULL, NULL);
>  }
>  
>  /**
> @@ -1303,19 +1379,27 @@ static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly)
>  {
> -	struct intel_engine_cs *ring = obj->ring;
> -	u32 seqno;
> -	int ret;
> +	int i, ret;
>  
> -	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -	if (seqno == 0)
> -		return 0;
> +	if (readonly) {
> +		if (obj->last_write.request == NULL)
> +			return 0;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> -	if (ret)
> -		return ret;
> +		ret = i915_wait_request(obj->last_write.request);
> +		if (ret)
> +			return ret;
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			if (obj->last_read[i].request == NULL)
> +				continue;
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +			ret = i915_wait_request(obj->last_read[i].request);
> +			if (ret)
> +				return ret;
> +		}
> +	}
> +
> +	return 0;
>  }
>  
>  /* A nonblocking variant of the above wait. This is a highly dangerous routine
> @@ -1328,34 +1412,42 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = obj->ring;
> -	unsigned reset_counter;
> -	u32 seqno;
> -	int ret;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
> +	int i, n, ret;
>  
>  	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  	BUG_ON(!dev_priv->mm.interruptible);
>  
> -	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -	if (seqno == 0)
> +	n = 0;
> +	if (readonly) {
> +		if (obj->last_write.request)
> +			rq[n++] = i915_request_get(obj->last_write.request);
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++)
> +			if (obj->last_read[i].request)
> +				rq[n++] = i915_request_get(obj->last_read[i].request);
> +	}
> +	if (n == 0)
>  		return 0;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
> -	if (ret)
> -		return ret;
> -
> -	ret = i915_gem_check_olr(ring, seqno);
> -	if (ret)
> -		return ret;
> +	for (i = 0; i < n; i++) {
> +		ret = i915_gem_check_olr(rq[i]);
> +		if (ret)
> +			goto out;
> +	}
>  
> -	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	mutex_unlock(&dev->struct_mutex);
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
> +
> +	for (i = 0; ret == 0 && i < n; i++)
> +		ret = __wait_request(rq[i], true, NULL, file_priv);
> +
>  	mutex_lock(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
>  
> -	return i915_gem_object_wait_rendering__tail(obj, ring);
> +out:
> +	for (i = 0; i < n; i++)
> +		i915_request_put(rq[i]);
> +
> +	return ret;
>  }
>  
>  /**
> @@ -2157,81 +2249,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  	return 0;
>  }
>  
> -static void
> -i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> -			       struct intel_engine_cs *ring)
> -{
> -	u32 seqno = intel_ring_get_seqno(ring);
> -
> -	BUG_ON(ring == NULL);
> -	if (obj->ring != ring && obj->last_write_seqno) {
> -		/* Keep the seqno relative to the current ring */
> -		obj->last_write_seqno = seqno;
> -	}
> -	obj->ring = ring;
> -
> -	/* Add a reference if we're newly entering the active list. */
> -	if (!obj->active) {
> -		drm_gem_object_reference(&obj->base);
> -		obj->active = 1;
> -	}
> -
> -	list_move_tail(&obj->ring_list, &ring->active_list);
> -
> -	obj->last_read_seqno = seqno;
> -}
> -
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring)
> +			     struct intel_engine_cs *ring,
> +			     unsigned fenced)
>  {
> -	list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -	return i915_gem_object_move_to_active(vma->obj, ring);
> -}
> -
> -static void
> -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> -{
> -	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -	struct i915_address_space *vm;
> -	struct i915_vma *vma;
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_request *rq = intel_ring_get_request(ring);
> +	u32 old_read = obj->base.read_domains;
> +	u32 old_write = obj->base.write_domain;
>  
> -	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
> -	BUG_ON(!obj->active);
> +	BUG_ON(rq == NULL);
>  
> -	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
> -		vma = i915_gem_obj_to_vma(obj, vm);
> -		if (vma && !list_empty(&vma->mm_list))
> -			list_move_tail(&vma->mm_list, &vm->inactive_list);
> -	}
> -
> -	intel_fb_obj_flush(obj, true);
> +	obj->base.write_domain = obj->base.pending_write_domain;
> +	if (obj->base.write_domain == 0)
> +		obj->base.pending_read_domains |= obj->base.read_domains;
> +	obj->base.read_domains = obj->base.pending_read_domains;
>  
> -	list_del_init(&obj->ring_list);
> -	obj->ring = NULL;
> +	obj->base.pending_read_domains = 0;
> +	obj->base.pending_write_domain = 0;
>  
> -	obj->last_read_seqno = 0;
> -	obj->last_write_seqno = 0;
> -	obj->base.write_domain = 0;
> +	trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +	if (obj->base.read_domains == 0)
> +		return;
>  
> -	obj->last_fenced_seqno = 0;
> +	/* Add a reference if we're newly entering the active list. */
> +	if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
> +		drm_gem_object_reference(&obj->base);
>  
> -	obj->active = 0;
> -	drm_gem_object_unreference(&obj->base);
> +	obj->last_read[ring->id].request = rq;
> +	list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
>  
> -	WARN_ON(i915_verify_lists(dev));
> -}
> +	if (obj->base.write_domain) {
> +		obj->dirty = 1;
> +		obj->last_write.request = rq;
> +		list_move_tail(&obj->last_write.ring_list, &ring->write_list);
> +		intel_fb_obj_invalidate(obj, ring);
>  
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj)
> -{
> -	struct intel_engine_cs *ring = obj->ring;
> +		/* update for the implicit flush after a batch */
> +		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> +	}
>  
> -	if (ring == NULL)
> -		return;
> +	if (fenced & VMA_IS_FENCED) {
> +		obj->last_fence.request = rq;
> +		list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
> +		if (fenced & VMA_HAS_FENCE) {
> +			struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> +					&dev_priv->mm.fence_list);
> +		}
> +	}
>  
> -	if (i915_seqno_passed(ring->get_seqno(ring, true),
> -			      obj->last_read_seqno))
> -		i915_gem_object_move_to_inactive(obj);
> +	list_move_tail(&vma->mm_list, &vma->vm->active_list);
>  }
>  
>  static int
> @@ -2306,11 +2374,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
> -		       struct drm_i915_gem_object *obj,
> -		       u32 *out_seqno)
> +		       struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	u32 request_ring_position, request_start;
>  	int ret;
>  
> @@ -2326,10 +2393,16 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	request = ring->preallocated_lazy_request;
> -	if (WARN_ON(request == NULL))
> +	rq = ring->preallocated_request;
> +	if (WARN_ON(rq == NULL))
>  		return -ENOMEM;
>  
> +	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> +				   dev_priv->mm.interruptible,
> +				   &rq->reset_counter);
> +	if (ret)
> +		return ret;
> +
>  	/* Record the position of the start of the request so that
>  	 * should we detect the updated seqno part-way through the
>  	 * GPU processing the request, we never over-estimate the
> @@ -2341,10 +2414,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	request->seqno = intel_ring_get_seqno(ring);
> -	request->ring = ring;
> -	request->head = request_start;
> -	request->tail = request_ring_position;
> +	rq->head = request_start;
> +	rq->tail = request_ring_position;
>  
>  	/* Whilst this request exists, batch_obj will be on the
>  	 * active_list, and so will hold the active reference. Only when this
> @@ -2352,32 +2423,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	 * inactive_list and lose its active reference. Hence we do not need
>  	 * to explicitly hold another reference here.
>  	 */
> -	request->batch_obj = obj;
> +	rq->batch_obj = obj;
>  
>  	/* Hold a reference to the current context so that we can inspect
>  	 * it later in case a hangcheck error event fires.
>  	 */
> -	request->ctx = ring->last_context;
> -	if (request->ctx)
> -		i915_gem_context_reference(request->ctx);
> +	rq->ctx = ring->last_context;
> +	if (rq->ctx)
> +		i915_gem_context_reference(rq->ctx);
>  
> -	request->emitted_jiffies = jiffies;
> -	list_add_tail(&request->list, &ring->request_list);
> -	request->file_priv = NULL;
> +	rq->emitted_jiffies = jiffies;
> +	list_add_tail(&rq->list, &ring->request_list);
> +	rq->file_priv = NULL;
>  
>  	if (file) {
>  		struct drm_i915_file_private *file_priv = file->driver_priv;
>  
>  		spin_lock(&file_priv->mm.lock);
> -		request->file_priv = file_priv;
> -		list_add_tail(&request->client_list,
> +		rq->file_priv = file_priv;
> +		list_add_tail(&rq->client_list,
>  			      &file_priv->mm.request_list);
>  		spin_unlock(&file_priv->mm.lock);
>  	}
>  
> -	trace_i915_gem_request_add(ring, request->seqno);
> -	ring->outstanding_lazy_seqno = 0;
> -	ring->preallocated_lazy_request = NULL;
> +	trace_i915_gem_request_add(ring, rq->seqno);
> +	ring->preallocated_request = NULL;
>  
>  	if (!dev_priv->ums.mm_suspended) {
>  		i915_queue_hangcheck(ring->dev);
> @@ -2389,22 +2459,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  		intel_mark_busy(dev_priv->dev);
>  	}
>  
> -	if (out_seqno)
> -		*out_seqno = request->seqno;
>  	return 0;
>  }
>  
>  static inline void
> -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> +i915_gem_request_remove_from_client(struct i915_gem_request *rq)
>  {
> -	struct drm_i915_file_private *file_priv = request->file_priv;
> +	struct drm_i915_file_private *file_priv = rq->file_priv;
>  
>  	if (!file_priv)
>  		return;
>  
>  	spin_lock(&file_priv->mm.lock);
> -	list_del(&request->client_list);
> -	request->file_priv = NULL;
> +	list_del(&rq->client_list);
> +	rq->file_priv = NULL;
>  	spin_unlock(&file_priv->mm.lock);
>  }
>  
> @@ -2452,30 +2520,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
>  	}
>  }
>  
> -static void i915_gem_free_request(struct drm_i915_gem_request *request)
> +void __i915_request_free(struct kref *kref)
>  {
> -	list_del(&request->list);
> -	i915_gem_request_remove_from_client(request);
> +	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
> +	kfree(rq);
> +}
>  
> -	if (request->ctx)
> -		i915_gem_context_unreference(request->ctx);
> +static void i915_request_retire(struct i915_gem_request *rq)
> +{
> +	rq->completed = true;
>  
> -	kfree(request);
> +	list_del(&rq->list);
> +	i915_gem_request_remove_from_client(rq);
> +
> +	if (rq->ctx) {
> +		i915_gem_context_unreference(rq->ctx);
> +		rq->ctx = NULL;
> +	}
> +
> +	i915_request_put(rq);
>  }
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring)
>  {
> -	struct drm_i915_gem_request *request;
> -	u32 completed_seqno;
> -
> -	completed_seqno = ring->get_seqno(ring, false);
> +	struct i915_gem_request *rq;
>  
> -	list_for_each_entry(request, &ring->request_list, list) {
> -		if (i915_seqno_passed(completed_seqno, request->seqno))
> +	list_for_each_entry(rq, &ring->request_list, list) {
> +		if (i915_request_complete(rq))
>  			continue;
>  
> -		return request;
> +		return rq;
>  	}
>  
>  	return NULL;
> @@ -2484,33 +2559,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
>  static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
>  				       struct intel_engine_cs *ring)
>  {
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	bool ring_hung;
>  
> -	request = i915_gem_find_active_request(ring);
> +	rq = i915_gem_find_active_request(ring);
>  
> -	if (request == NULL)
> +	if (rq == NULL)
>  		return;
>  
>  	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>  
> -	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
> +	i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
>  
> -	list_for_each_entry_continue(request, &ring->request_list, list)
> -		i915_set_reset_status(dev_priv, request->ctx, false);
> +	list_for_each_entry_continue(rq, &ring->request_list, list)
> +		i915_set_reset_status(dev_priv, rq->ctx, false);
>  }
>  
>  static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>  					struct intel_engine_cs *ring)
>  {
> -	while (!list_empty(&ring->active_list)) {
> +	while (!list_empty(&ring->write_list)) {
>  		struct drm_i915_gem_object *obj;
>  
> -		obj = list_first_entry(&ring->active_list,
> +		obj = list_first_entry(&ring->write_list,
>  				       struct drm_i915_gem_object,
> -				       ring_list);
> +				       last_write.ring_list);
>  
> -		i915_gem_object_move_to_inactive(obj);
> +		i915_gem_object_retire__write(obj);
> +	}
> +
> +	while (!list_empty(&ring->fence_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->fence_list,
> +				       struct drm_i915_gem_object,
> +				       last_fence.ring_list);
> +
> +		i915_gem_object_retire__fence(obj);
> +	}
> +
> +	while (!list_empty(&ring->read_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->read_list,
> +				       struct drm_i915_gem_object,
> +				       last_read[ring->id].ring_list);
> +
> +		i915_gem_object_retire__read(obj, ring);
>  	}
>  
>  	/*
> @@ -2521,19 +2616,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>  	 * the request.
>  	 */
>  	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> +		rq = list_first_entry(&ring->request_list,
> +				      struct i915_gem_request,
> +				      list);
>  
> -		i915_gem_free_request(request);
> +		i915_request_retire(rq);
>  	}
>  
>  	/* These may not have been flush before the reset, do so now */
> -	kfree(ring->preallocated_lazy_request);
> -	ring->preallocated_lazy_request = NULL;
> -	ring->outstanding_lazy_seqno = 0;
> +	kfree(ring->preallocated_request);
> +	ring->preallocated_request = NULL;
>  }
>  
>  void i915_gem_restore_fences(struct drm_device *dev)
> @@ -2592,49 +2686,77 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  
>  	WARN_ON(i915_verify_lists(ring->dev));
>  
> -	seqno = ring->get_seqno(ring, true);
> +	seqno = ring->get_seqno(ring);
>  
>  	/* Move any buffers on the active list that are no longer referenced
>  	 * by the ringbuffer to the flushing/inactive lists as appropriate,
>  	 * before we free the context associated with the requests.
>  	 */
> -	while (!list_empty(&ring->active_list)) {
> +	while (!list_empty(&ring->write_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->write_list,
> +				       struct drm_i915_gem_object,
> +				       last_write.ring_list);
> +
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_write.request->seqno))
> +			break;
> +
> +		i915_gem_object_retire__write(obj);
> +	}
> +
> +	while (!list_empty(&ring->fence_list)) {
>  		struct drm_i915_gem_object *obj;
>  
> -		obj = list_first_entry(&ring->active_list,
> -				      struct drm_i915_gem_object,
> -				      ring_list);
> +		obj = list_first_entry(&ring->fence_list,
> +				       struct drm_i915_gem_object,
> +				       last_fence.ring_list);
>  
> -		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_fence.request->seqno))
>  			break;
>  
> -		i915_gem_object_move_to_inactive(obj);
> +		i915_gem_object_retire__fence(obj);
>  	}
>  
> +	while (!list_empty(&ring->read_list)) {
> +		struct drm_i915_gem_object *obj;
> +
> +		obj = list_first_entry(&ring->read_list,
> +				       struct drm_i915_gem_object,
> +				       last_read[ring->id].ring_list);
> +
> +		if (!__i915_seqno_passed(seqno,
> +					 obj->last_read[ring->id].request->seqno))
> +			break;
> +
> +		i915_gem_object_retire__read(obj, ring);
> +	}
>  
>  	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> +		rq = list_first_entry(&ring->request_list,
> +				      struct i915_gem_request,
> +				      list);
>  
> -		if (!i915_seqno_passed(seqno, request->seqno))
> +		if (!__i915_seqno_passed(seqno, rq->seqno))
>  			break;
>  
> -		trace_i915_gem_request_retire(ring, request->seqno);
> +		trace_i915_gem_request_retire(ring, rq->seqno);
>  		/* We know the GPU must have read the request to have
>  		 * sent us the seqno + interrupt, so use the position
>  		 * of tail of the request to update the last known position
>  		 * of the GPU head.
>  		 */
> -		ring->buffer->last_retired_head = request->tail;
> +		ring->buffer->last_retired_head = rq->tail;
>  
> -		i915_gem_free_request(request);
> +		i915_request_retire(rq);
>  	}
>  
>  	if (unlikely(ring->trace_irq_seqno &&
> -		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
> +		     __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
>  		ring->irq_put(ring);
>  		ring->trace_irq_seqno = 0;
>  	}
> @@ -2699,14 +2821,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  static int
>  i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  {
> -	int ret;
> +	int i;
>  
> -	if (obj->active) {
> -		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
> +	if (!obj->active)
> +		return 0;
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		struct i915_gem_request *rq = obj->last_read[i].request;
> +		int ret;
> +
> +		if (rq == NULL)
> +			continue;
> +
> +		ret = i915_gem_check_olr(rq);
>  		if (ret)
>  			return ret;
>  
> -		i915_gem_retire_requests_ring(obj->ring);
> +		i915_gem_retire_requests_ring(rq->ring);
>  	}
>  
>  	return 0;
> @@ -2737,14 +2868,11 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  int
>  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  {
> -	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_wait *args = data;
>  	struct drm_i915_gem_object *obj;
> -	struct intel_engine_cs *ring = NULL;
>  	struct timespec timeout_stack, *timeout = NULL;
> -	unsigned reset_counter;
> -	u32 seqno = 0;
> -	int ret = 0;
> +	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
> +	int i, n, ret = 0;
>  
>  	if (args->timeout_ns >= 0) {
>  		timeout_stack = ns_to_timespec(args->timeout_ns);
> @@ -2766,13 +2894,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	if (ret)
>  		goto out;
>  
> -	if (obj->active) {
> -		seqno = obj->last_read_seqno;
> -		ring = obj->ring;
> -	}
> -
> -	if (seqno == 0)
> -		 goto out;
> +	if (!obj->active)
> +		goto out;
>  
>  	/* Do this after OLR check to make sure we make forward progress polling
>  	 * on this IOCTL with a 0 timeout (like busy ioctl)
> @@ -2782,11 +2905,23 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto out;
>  	}
>  
> +	for (i = n = 0; i < I915_NUM_RINGS; i++) {
> +		if (obj->last_read[i].request == NULL)
> +			continue;
> +
> +		rq[n++] = i915_request_get(obj->last_read[i].request);
> +	}
> +
>  	drm_gem_object_unreference(&obj->base);
> -	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
> +	for (i = 0; i < n; i++) {
> +		if (ret == 0)
> +			ret = __wait_request(rq[i], true, timeout, file->driver_priv);
> +
> +		i915_request_put(rq[i]);
> +	}
> +
>  	if (timeout)
>  		args->timeout_ns = timespec_to_ns(timeout);
>  	return ret;
> @@ -2797,6 +2932,41 @@ out:
>  	return ret;
>  }
>  
> +static int
> +i915_request_sync(struct i915_gem_request *rq,
> +		  struct intel_engine_cs *to,
> +		  struct drm_i915_gem_object *obj)
> +{
> +	int ret, idx;
> +
> +	if (to == NULL)
> +		return i915_wait_request(rq);
> +
> +	idx = intel_ring_sync_index(rq->ring, to);
> +	if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
> +		return 0;
> +
> +	ret = i915_gem_check_olr(rq);
> +	if (ret)
> +		return ret;
> +
> +	if (!i915_request_complete(rq)) {
> +		trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
> +		ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* We must recheck last_read_request because sync_to()
> +	 * might have just caused seqno wrap under
> +	 * the radar.
> +	 */
> +	if (obj->last_read[rq->ring->id].request == rq)
> +		rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
> +
> +	return 0;
> +}
> +
>  /**
>   * i915_gem_object_sync - sync an object to a ring.
>   *
> @@ -2811,40 +2981,36 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to)
> +		     struct intel_engine_cs *to,
> +		     bool readonly)
>  {
> -	struct intel_engine_cs *from = obj->ring;
> -	u32 seqno;
> -	int ret, idx;
> -
> -	if (from == NULL || to == from)
> -		return 0;
> +	struct i915_gem_request *rq;
> +	struct intel_engine_cs *semaphore;
> +	int ret = 0, i;
>  
> -	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
> -		return i915_gem_object_wait_rendering(obj, false);
> +	semaphore = NULL;
> +	if (i915_semaphore_is_enabled(obj->base.dev))
> +		semaphore = to;
>  
> -	idx = intel_ring_sync_index(from, to);
> -
> -	seqno = obj->last_read_seqno;
> -	/* Optimization: Avoid semaphore sync when we are sure we already
> -	 * waited for an object with higher seqno */
> -	if (seqno <= from->semaphore.sync_seqno[idx])
> -		return 0;
> -
> -	ret = i915_gem_check_olr(obj->ring, seqno);
> -	if (ret)
> -		return ret;
> +	if (readonly) {
> +		rq = obj->last_write.request;
> +		if (rq != NULL && to != rq->ring)
> +			ret = i915_request_sync(rq, semaphore, obj);
> +	} else {
> +		for (i = 0; i < I915_NUM_RINGS; i++) {
> +			rq = obj->last_read[i].request;
> +			if (rq == NULL || to == rq->ring)
> +				continue;
>  
> -	trace_i915_gem_ring_sync_to(from, to, seqno);
> -	ret = to->semaphore.sync_to(to, from, seqno);
> -	if (!ret)
> -		/* We use last_read_seqno because sync_to()
> -		 * might have just caused seqno wrap under
> -		 * the radar.
> -		 */
> -		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
> +			ret = i915_request_sync(rq, semaphore, obj);
> +			if (ret)
> +				break;
> +		}
> +	}
>  
> +	i915_gem_object_retire(obj);
>  	return ret;
> +
>  }
>  
>  static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
> @@ -3150,14 +3316,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
>  static int
>  i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
>  {
> -	if (obj->last_fenced_seqno) {
> -		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
> -		if (ret)
> -			return ret;
> +	int ret;
>  
> -		obj->last_fenced_seqno = 0;
> -	}
> +	if (obj->last_fence.request == NULL)
> +		return 0;
>  
> +	ret = i915_wait_request(obj->last_fence.request);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_retire__fence(obj);
>  	return 0;
>  }
>  
> @@ -3822,11 +3990,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	bool was_pin_display;
>  	int ret;
>  
> -	if (pipelined != obj->ring) {
> -		ret = i915_gem_object_sync(obj, pipelined);
> -		if (ret)
> -			return ret;
> -	}
> +	ret = i915_gem_object_sync(obj, pipelined, true);
> +	if (ret)
> +		return ret;
>  
>  	/* Mark the pin_display early so that we account for the
>  	 * display coherency whilst setting up the cache domains.
> @@ -3974,38 +4140,35 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
>  	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
> -	struct drm_i915_gem_request *request;
> -	struct intel_engine_cs *ring = NULL;
> -	unsigned reset_counter;
> -	u32 seqno = 0;
> +	struct i915_gem_request *rq, *iter;
>  	int ret;
>  
>  	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
>  	if (ret)
>  		return ret;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
> -	if (ret)
> -		return ret;
> +	/* used for querying whethering the GPU is wedged by legacy userspace */
> +	if (i915_terminally_wedged(&dev_priv->gpu_error))
> +		return -EIO;
>  
>  	spin_lock(&file_priv->mm.lock);
> -	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
> -		if (time_after_eq(request->emitted_jiffies, recent_enough))
> +	rq = NULL;
> +	list_for_each_entry(iter, &file_priv->mm.request_list, client_list) {
> +		if (time_after_eq(iter->emitted_jiffies, recent_enough))
>  			break;
> -
> -		ring = request->ring;
> -		seqno = request->seqno;
> +		rq = iter;
>  	}
> -	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
> +	rq = i915_request_get(rq);
>  	spin_unlock(&file_priv->mm.lock);
>  
> -	if (seqno == 0)
> +	if (rq == NULL)
>  		return 0;
>  
> -	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
> +	ret = __wait_request(rq, true, NULL, NULL);
>  	if (ret == 0)
>  		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
>  
> +	i915_request_put(rq);
>  	return ret;
>  }
>  
> @@ -4219,7 +4382,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  {
>  	struct drm_i915_gem_busy *args = data;
>  	struct drm_i915_gem_object *obj;
> -	int ret;
> +	int ret, i;
>  
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
> @@ -4238,10 +4401,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	 */
>  	ret = i915_gem_object_flush_active(obj);
>  
> -	args->busy = obj->active;
> -	if (obj->ring) {
> +	args->busy = 0;
> +	if (obj->active) {
>  		BUILD_BUG_ON(I915_NUM_RINGS > 16);
> -		args->busy |= intel_ring_flag(obj->ring) << 16;
> +		args->busy |= 1;
> +		for (i = 0; i < I915_NUM_RINGS; i++)  {
> +			if (obj->last_read[i].request == NULL)
> +				continue;
> +
> +			args->busy |= 1 << (16 + i);
> +		}
>  	}
>  
>  	drm_gem_object_unreference(&obj->base);
> @@ -4307,8 +4476,13 @@ unlock:
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			  const struct drm_i915_gem_object_ops *ops)
>  {
> +	int i;
> +
>  	INIT_LIST_HEAD(&obj->global_list);
> -	INIT_LIST_HEAD(&obj->ring_list);
> +	INIT_LIST_HEAD(&obj->last_fence.ring_list);
> +	INIT_LIST_HEAD(&obj->last_write.ring_list);
> +	for (i = 0; i < I915_NUM_RINGS; i++)
> +		INIT_LIST_HEAD(&obj->last_read[i].ring_list);
>  	INIT_LIST_HEAD(&obj->obj_exec_link);
>  	INIT_LIST_HEAD(&obj->vma_list);
>  
> @@ -4876,7 +5050,9 @@ i915_gem_lastclose(struct drm_device *dev)
>  static void
>  init_ring_lists(struct intel_engine_cs *ring)
>  {
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
> +	INIT_LIST_HEAD(&ring->fence_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  }
>  
> @@ -4972,13 +5148,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  	 */
>  	spin_lock(&file_priv->mm.lock);
>  	while (!list_empty(&file_priv->mm.request_list)) {
> -		struct drm_i915_gem_request *request;
> +		struct i915_gem_request *rq;
>  
> -		request = list_first_entry(&file_priv->mm.request_list,
> -					   struct drm_i915_gem_request,
> -					   client_list);
> -		list_del(&request->client_list);
> -		request->file_priv = NULL;
> +		rq = list_first_entry(&file_priv->mm.request_list,
> +				      struct i915_gem_request,
> +				      client_list);
> +		list_del(&rq->client_list);
> +		rq->file_priv = NULL;
>  	}
>  	spin_unlock(&file_priv->mm.lock);
>  }
> @@ -5266,3 +5442,37 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
>  
>  	return vma;
>  }
> +
> +struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)
> +{
> +	u32 seqno = 0;
> +	struct i915_gem_request *rq = NULL;
> +	int i;
> +
> +	/* This is approximate as seqno cannot be used across rings */
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		if (obj->last_read[i].request == NULL)
> +			continue;
> +
> +		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
> +			rq = obj->last_read[i].request, seqno = rq->seqno;
> +	}
> +
> +	return rq;
> +}
> +
> +struct i915_gem_request *i915_gem_seqno_to_request(struct intel_engine_cs *ring,
> +						   u32 seqno)
> +{
> +	struct i915_gem_request *rq;
> +
> +	list_for_each_entry(rq, &ring->request_list, list) {
> +		if (rq->seqno == seqno)
> +			return rq;
> +
> +		if (__i915_seqno_passed(seqno, rq->seqno))
> +			break;
> +	}
> +
> +	return NULL;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9683e62..5cc1e98 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -303,13 +303,9 @@ void i915_gem_context_reset(struct drm_device *dev)
>  		if (!lctx)
>  			continue;
>  
> -		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
> +		if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
>  			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
>  						      get_context_alignment(dev), 0));
> -			/* Fake a finish/inactive */
> -			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
> -			dctx->legacy_hw_ctx.rcs_state->active = 0;
> -		}
>  
>  		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
>  			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
> @@ -385,7 +381,6 @@ void i915_gem_context_fini(struct drm_device *dev)
>  		WARN_ON(!dev_priv->ring[RCS].last_context);
>  		if (dev_priv->ring[RCS].last_context == dctx) {
>  			/* Fake switch to NULL context */
> -			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
>  			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
>  			i915_gem_context_unreference(dctx);
>  			dev_priv->ring[RCS].last_context = NULL;
> @@ -613,8 +608,11 @@ static int do_switch(struct intel_engine_cs *ring,
>  	 * MI_SET_CONTEXT instead of when the next seqno has completed.
>  	 */
>  	if (from != NULL) {
> -		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
> +		struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
> +
> +		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
> +
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -622,11 +620,10 @@ static int do_switch(struct intel_engine_cs *ring,
>  		 * able to defer doing this until we know the object would be
>  		 * swapped, but there is no way to do that yet.
>  		 */
> -		from->legacy_hw_ctx.rcs_state->dirty = 1;
> -		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
> +		from_obj->dirty = 1;
>  
>  		/* obj is kept alive until the next request by its active ref */
> -		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
> +		i915_gem_object_ggtt_unpin(from_obj);
>  		i915_gem_context_unreference(from);
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 1a0611b..13a2f13 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -832,7 +832,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
> -		ret = i915_gem_object_sync(obj, ring);
> +
> +		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
>  		if (ret)
>  			return ret;
>  
> @@ -946,40 +947,20 @@ void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  				   struct intel_engine_cs *ring)
>  {
> -	u32 seqno = intel_ring_get_seqno(ring);
>  	struct i915_vma *vma;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -		struct drm_i915_gem_object *obj = vma->obj;
> -		u32 old_read = obj->base.read_domains;
> -		u32 old_write = obj->base.write_domain;
> -
> -		obj->base.write_domain = obj->base.pending_write_domain;
> -		if (obj->base.write_domain == 0)
> -			obj->base.pending_read_domains |= obj->base.read_domains;
> -		obj->base.read_domains = obj->base.pending_read_domains;
> -
> -		i915_vma_move_to_active(vma, ring);
> -		if (obj->base.write_domain) {
> -			obj->dirty = 1;
> -			obj->last_write_seqno = seqno;
> +		unsigned fenced;
>  
> -			intel_fb_obj_invalidate(obj, ring);
> -
> -			/* update for the implicit flush after a batch */
> -			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> -		}
> +		fenced = 0;
>  		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> -			obj->last_fenced_seqno = seqno;
> -			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -				struct drm_i915_private *dev_priv = to_i915(ring->dev);
> -				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> -					       &dev_priv->mm.fence_list);
> -			}
> +			fenced |= VMA_IS_FENCED;
> +			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
> +				fenced |= VMA_HAS_FENCE;
>  		}
>  
> -		trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +		i915_vma_move_to_active(vma, ring, fenced);
>  	}
>  }
>  
> @@ -993,7 +974,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  	ring->gpu_caches_dirty = true;
>  
>  	/* Add a breadcrumb for the completion of the batch buffer */
> -	(void)__i915_add_request(ring, file, obj, NULL);
> +	(void)__i915_add_request(ring, file, obj);
>  }
>  
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index e60be3f..fc1223c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>  	if (ret)
>  		goto out;
>  
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
>  
> -	ret = __i915_add_request(ring, NULL, so.obj, NULL);
> +	ret = __i915_add_request(ring, NULL, so.obj);
>  	/* __i915_add_request moves object to inactive if it fails */
>  out:
>  	render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 7e623bf..a45651d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>  
>  		if (ret == 0) {
>  			obj->fence_dirty =
> -				obj->last_fenced_seqno ||
> +				obj->last_fence.request ||
>  				obj->fence_reg != I915_FENCE_REG_NONE;
>  
>  			obj->tiling_mode = args->tiling_mode;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 1e05414..fb1041f 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -661,11 +661,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  		       struct i915_vma *vma)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>  
>  	err->size = obj->base.size;
>  	err->name = obj->base.name;
> -	err->rseqno = obj->last_read_seqno;
> -	err->wseqno = obj->last_write_seqno;
> +	err->rseqno = i915_request_seqno(rq);
> +	err->wseqno = i915_request_seqno(obj->last_write.request);
>  	err->gtt_offset = vma->node.start;
>  	err->read_domains = obj->base.read_domains;
>  	err->write_domain = obj->base.write_domain;
> @@ -679,7 +680,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  	err->dirty = obj->dirty;
>  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
>  	err->userptr = obj->userptr.mm != NULL;
> -	err->ring = obj->ring ? obj->ring->id : -1;
> +	err->ring = i915_request_ring_id(rq);
>  	err->cache_level = obj->cache_level;
>  }
>  
> @@ -877,8 +878,8 @@ static void i915_record_ring_state(struct drm_device *dev,
>  
>  	ering->waiting = waitqueue_active(&ring->irq_queue);
>  	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
> -	ering->seqno = ring->get_seqno(ring, false);
>  	ering->acthd = intel_ring_get_active_head(ring);
> +	ering->seqno = ring->get_seqno(ring);
>  	ering->head = I915_READ_HEAD(ring);
>  	ering->tail = I915_READ_TAIL(ring);
>  	ering->ctl = I915_READ_CTL(ring);
> @@ -972,7 +973,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  				  struct drm_i915_error_state *error)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct i915_gem_request *rq;
>  	int i, count;
>  
>  	for (i = 0; i < I915_NUM_RINGS; i++) {
> @@ -987,13 +988,12 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  
>  		i915_record_ring_state(dev, error, ring, &error->ring[i]);
>  
> -		request = i915_gem_find_active_request(ring);
> -		if (request) {
> +		rq = i915_gem_find_active_request(ring);
> +		if (rq) {
>  			struct i915_address_space *vm;
>  
> -			vm = request->ctx && request->ctx->ppgtt ?
> -				&request->ctx->ppgtt->base :
> -				&dev_priv->gtt.base;
> +			vm = rq->ctx && rq->ctx->ppgtt ?
> +				&rq->ctx->ppgtt->base : &dev_priv->gtt.base;
>  
>  			/* We need to copy these to an anonymous buffer
>  			 * as the simplest method to avoid being overwritten
> @@ -1001,7 +1001,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  			 */
>  			error->ring[i].batchbuffer =
>  				i915_error_object_create(dev_priv,
> -							 request->batch_obj,
> +							 rq->batch_obj,
>  							 vm);
>  
>  			if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> @@ -1009,11 +1009,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  					i915_error_ggtt_object_create(dev_priv,
>  							     ring->scratch.obj);
>  
> -			if (request->file_priv) {
> +			if (rq->file_priv) {
>  				struct task_struct *task;
>  
>  				rcu_read_lock();
> -				task = pid_task(request->file_priv->file->pid,
> +				task = pid_task(rq->file_priv->file->pid,
>  						PIDTYPE_PID);
>  				if (task) {
>  					strcpy(error->ring[i].comm, task->comm);
> @@ -1032,7 +1032,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
>  		count = 0;
> -		list_for_each_entry(request, &ring->request_list, list)
> +		list_for_each_entry(rq, &ring->request_list, list)
>  			count++;
>  
>  		error->ring[i].num_requests = count;
> @@ -1045,13 +1045,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		}
>  
>  		count = 0;
> -		list_for_each_entry(request, &ring->request_list, list) {
> +		list_for_each_entry(rq, &ring->request_list, list) {
>  			struct drm_i915_error_request *erq;
>  
>  			erq = &error->ring[i].requests[count++];
> -			erq->seqno = request->seqno;
> -			erq->jiffies = request->emitted_jiffies;
> -			erq->tail = request->tail;
> +			erq->seqno = rq->seqno;
> +			erq->jiffies = rq->emitted_jiffies;
> +			erq->tail = rq->tail;
>  		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index b1bb88f..2dab019 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1265,9 +1265,6 @@ static void notify_ring(struct drm_device *dev,
>  
>  	trace_i915_gem_request_complete(ring);
>  
> -	if (drm_core_check_feature(dev, DRIVER_MODESET))
> -		intel_notify_mmio_flip(ring);
> -
>  	wake_up_all(&ring->irq_queue);
>  	i915_queue_hangcheck(dev);
>  }
> @@ -3041,18 +3038,15 @@ static void gen8_disable_vblank(struct drm_device *dev, int pipe)
>  	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
>  }
>  
> -static u32
> -ring_last_seqno(struct intel_engine_cs *ring)
> -{
> -	return list_entry(ring->request_list.prev,
> -			  struct drm_i915_gem_request, list)->seqno;
> -}
> -
>  static bool
> -ring_idle(struct intel_engine_cs *ring, u32 seqno)
> +ring_idle(struct intel_engine_cs *ring)
>  {
> -	return (list_empty(&ring->request_list) ||
> -		i915_seqno_passed(seqno, ring_last_seqno(ring)));
> +	if (list_empty(&ring->request_list))
> +		return true;
> +
> +	return i915_request_complete(list_entry(ring->request_list.prev,
> +						struct i915_gem_request,
> +						list));
>  }
>  
>  static bool
> @@ -3155,6 +3149,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  	struct intel_engine_cs *signaller;
> +	struct i915_gem_request *rq;
>  	u32 seqno;
>  
>  	ring->hangcheck.deadlock++;
> @@ -3167,7 +3162,8 @@ static int semaphore_passed(struct intel_engine_cs *ring)
>  	if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
>  		return -1;
>  
> -	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
> +	rq = i915_gem_seqno_to_request(ring, seqno);
> +	if (rq == NULL || i915_request_complete(rq))
>  		return 1;
>  
>  	/* cursory check for an unkickable deadlock */
> @@ -3268,11 +3264,11 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  
>  		semaphore_clear_deadlocks(dev_priv);
>  
> -		seqno = ring->get_seqno(ring, false);
>  		acthd = intel_ring_get_active_head(ring);
> +		seqno = ring->get_seqno(ring);
>  
>  		if (ring->hangcheck.seqno == seqno) {
> -			if (ring_idle(ring, seqno)) {
> +			if (ring_idle(ring)) {
>  				ring->hangcheck.action = HANGCHECK_IDLE;
>  
>  				if (waitqueue_active(&ring->irq_queue)) {
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index f5aa006..0072d17 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -365,7 +365,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
>  	    TP_fast_assign(
>  			   __entry->dev = ring->dev->primary->index;
>  			   __entry->ring = ring->id;
> -			   __entry->seqno = seqno;
> +			   __entry->seqno = intel_ring_get_seqno(ring);
>  			   __entry->flags = flags;
>  			   i915_trace_irq_get(ring, seqno);
>  			   ),
> @@ -435,7 +435,7 @@ TRACE_EVENT(i915_gem_request_complete,
>  	    TP_fast_assign(
>  			   __entry->dev = ring->dev->primary->index;
>  			   __entry->ring = ring->id;
> -			   __entry->seqno = ring->get_seqno(ring, false);
> +			   __entry->seqno = ring->get_seqno(ring);
>  			   ),
>  
>  	    TP_printk("dev=%u, ring=%u, seqno=%u",
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index a1cf052..4432fe8 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9065,6 +9065,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
>  	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
>  	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
>  
> +	i915_request_put(work->flip_queued_request);
>  	kfree(work);
>  }
>  
> @@ -9455,7 +9456,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
>  	else if (i915.enable_execlists)
>  		return true;
>  	else
> -		return ring != obj->ring;
> +		return ring != i915_request_ring(obj->last_write.request);
>  }
>  
>  static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> @@ -9486,94 +9487,54 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
>  	POSTING_READ(DSPSURF(intel_crtc->plane));
>  }
>  
> -static int intel_postpone_flip(struct drm_i915_gem_object *obj)
> -{
> -	struct intel_engine_cs *ring;
> -	int ret;
> -
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
> -
> -	if (!obj->last_write_seqno)
> -		return 0;
> -
> -	ring = obj->ring;
> -
> -	if (i915_seqno_passed(ring->get_seqno(ring, true),
> -			      obj->last_write_seqno))
> -		return 0;
> -
> -	ret = i915_gem_check_olr(ring, obj->last_write_seqno);
> -	if (ret)
> -		return ret;
> -
> -	if (WARN_ON(!ring->irq_get(ring)))
> -		return 0;
> -
> -	return 1;
> -}
> +struct flip_work {
> +	struct work_struct work;
> +	struct i915_gem_request *rq;
> +	struct intel_crtc *crtc;
> +};
>  
> -void intel_notify_mmio_flip(struct intel_engine_cs *ring)
> +static void intel_mmio_flip_work(struct work_struct *work)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> -	struct intel_crtc *intel_crtc;
> -	unsigned long irq_flags;
> -	u32 seqno;
> -
> -	seqno = ring->get_seqno(ring, false);
> -
> -	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> -	for_each_intel_crtc(ring->dev, intel_crtc) {
> -		struct intel_mmio_flip *mmio_flip;
> +	struct flip_work *flip = container_of(work, struct flip_work, work);
>  
> -		mmio_flip = &intel_crtc->mmio_flip;
> -		if (mmio_flip->seqno == 0)
> -			continue;
> -
> -		if (ring->id != mmio_flip->ring_id)
> -			continue;
> +	if (__i915_request_wait(flip->rq, false) == 0)
> +		intel_do_mmio_flip(flip->crtc);
>  
> -		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
> -			intel_do_mmio_flip(intel_crtc);
> -			mmio_flip->seqno = 0;
> -			ring->irq_put(ring);
> -		}
> -	}
> -	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +	i915_request_put(flip->rq);
> +	kfree(flip);
>  }
>  
> -static int intel_queue_mmio_flip(struct drm_device *dev,
> -				 struct drm_crtc *crtc,
> -				 struct drm_framebuffer *fb,
> -				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> -				 uint32_t flags)
> +static int intel_queue_mmio_flip(struct intel_crtc *crtc,
> +				 struct i915_gem_request *rq)
>  {
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	unsigned long irq_flags;
> +	struct flip_work *flip;
>  	int ret;
>  
> -	if (WARN_ON(intel_crtc->mmio_flip.seqno))
> +	if (WARN_ON(crtc->mmio_flip))
>  		return -EBUSY;
>  
> -	ret = intel_postpone_flip(obj);
> -	if (ret < 0)
> +	if (rq == NULL) {
> +		intel_do_mmio_flip(crtc);
> +		return 0;
> +	}
> +
> +	ret = i915_gem_check_olr(rq);
> +	if (ret)
>  		return ret;
> -	if (ret == 0) {
> -		intel_do_mmio_flip(intel_crtc);
> +
> +	if (i915_request_complete(rq)) {
> +		intel_do_mmio_flip(crtc);
>  		return 0;
>  	}
>  
> -	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> -	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
> -	intel_crtc->mmio_flip.ring_id = obj->ring->id;
> -	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +	flip = kmalloc(sizeof(*flip), GFP_KERNEL);
> +	if (flip == NULL)
> +		return -ENOMEM;
>  
> -	/*
> -	 * Double check to catch cases where irq fired before
> -	 * mmio flip data was ready
> -	 */
> -	intel_notify_mmio_flip(obj->ring);
> +	INIT_WORK(&flip->work, intel_mmio_flip_work);
> +	flip->rq = i915_request_get(rq);
> +	flip->crtc = crtc;
> +	schedule_work(&flip->work);
>  	return 0;
>  }
>  
> @@ -9587,6 +9548,7 @@ static int intel_default_queue_flip(struct drm_device *dev,
>  	return -ENODEV;
>  }
>  
> +
>  static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  				struct drm_framebuffer *fb,
>  				struct drm_pending_vblank_event *event,
> @@ -9600,6 +9562,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	enum pipe pipe = intel_crtc->pipe;
>  	struct intel_unpin_work *work;
>  	struct intel_engine_cs *ring;
> +	struct i915_gem_request *rq;
>  	unsigned long flags;
>  	int ret;
>  
> @@ -9684,28 +9647,44 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	} else if (IS_IVYBRIDGE(dev)) {
>  		ring = &dev_priv->ring[BCS];
>  	} else if (INTEL_INFO(dev)->gen >= 7) {
> -		ring = obj->ring;
> +		ring = i915_request_ring(obj->last_write.request);
>  		if (ring == NULL || ring->id != RCS)
>  			ring = &dev_priv->ring[BCS];
>  	} else {
>  		ring = &dev_priv->ring[RCS];
>  	}
>  
> -	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
> -	if (ret)
> -		goto cleanup_pending;
> +	if (use_mmio_flip(ring, obj)) {
> +		rq = obj->last_write.request;
>  
> -	work->gtt_offset =
> -		i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
> +		ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(rq));
> +		if (ret)
> +			goto cleanup_pending;
> +
> +		work->gtt_offset =
> +			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
> +
> +		ret = intel_queue_mmio_flip(intel_crtc, rq);
> +		if (ret)
> +			goto cleanup_unpin;
> +	} else {
> +		ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
> +		if (ret)
> +			goto cleanup_pending;
> +
> +		work->gtt_offset =
> +			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
>  
> -	if (use_mmio_flip(ring, obj))
> -		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring,
> -					    page_flip_flags);
> -	else
>  		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
> -				page_flip_flags);
> -	if (ret)
> -		goto cleanup_unpin;
> +						   page_flip_flags);
> +		if (ret)
> +			goto cleanup_unpin;
> +
> +		rq = intel_ring_get_request(ring);
> +	}
> +
> +	work->flip_queued_request = i915_request_get(rq);
> +	work->enable_stall_check = true;
>  
>  	i915_gem_track_fb(work->old_fb_obj, obj,
>  			  INTEL_FRONTBUFFER_PRIMARY(pipe));
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 1b3d1d7..617af38 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -372,11 +372,6 @@ struct intel_pipe_wm {
>  	bool sprites_scaled;
>  };
>  
> -struct intel_mmio_flip {
> -	u32 seqno;
> -	u32 ring_id;
> -};
> -
>  struct intel_crtc {
>  	struct drm_crtc base;
>  	enum pipe pipe;
> @@ -426,7 +421,7 @@ struct intel_crtc {
>  	} wm;
>  
>  	int scanline_offset;
> -	struct intel_mmio_flip mmio_flip;
> +	struct i915_gem_request *mmio_flip;
>  };
>  
>  struct intel_plane_wm_parameters {
> @@ -657,6 +652,7 @@ struct intel_unpin_work {
>  #define INTEL_FLIP_COMPLETE	2
>  	u32 flip_count;
>  	u32 gtt_offset;
> +	struct i915_gem_request *flip_queued_request;
>  	bool enable_stall_check;
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 6b5f416..bbcc0e6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -122,7 +122,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
>  
> -		ret = i915_gem_object_sync(obj, ring);
> +		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
>  		if (ret)
>  			return ret;
>  
> @@ -262,30 +262,11 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
>  	/* TODO: how to submit a context to the ELSP is not here yet */
>  }
>  
> -static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
> -{
> -	if (ring->outstanding_lazy_seqno)
> -		return 0;
> -
> -	if (ring->preallocated_lazy_request == NULL) {
> -		struct drm_i915_gem_request *request;
> -
> -		request = kmalloc(sizeof(*request), GFP_KERNEL);
> -		if (request == NULL)
> -			return -ENOMEM;
> -
> -		ring->preallocated_lazy_request = request;
> -	}
> -
> -	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> -}
> -
> -static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
> -				     int bytes)
> +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
> +				       int bytes)
>  {
>  	struct intel_engine_cs *ring = ringbuf->ring;
> -	struct drm_i915_gem_request *request;
> -	u32 seqno = 0;
> +	struct i915_gem_request *rq;
>  	int ret;
>  
>  	if (ringbuf->last_retired_head != -1) {
> @@ -297,24 +278,20 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
>  			return 0;
>  	}
>  
> -	list_for_each_entry(request, &ring->request_list, list) {
> -		if (__intel_ring_space(request->tail, ringbuf->tail,
> -				       ringbuf->size) >= bytes) {
> -			seqno = request->seqno;
> +	list_for_each_entry(rq, &ring->request_list, list)
> +		if (__intel_ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= bytes)
>  			break;
> -		}
> -	}
>  
> -	if (seqno == 0)
> +	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
>  		return -ENOSPC;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> +	ret = i915_wait_request(rq);
>  	if (ret)
>  		return ret;
>  
> +	i915_gem_retire_requests_ring(ring);
>  	/* TODO: make sure we update the right ringbuffer's last_retired_head
>  	 * when retiring requests */
> -	i915_gem_retire_requests_ring(ring);
>  	ringbuf->head = ringbuf->last_retired_head;
>  	ringbuf->last_retired_head = -1;
>  
> @@ -322,58 +299,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
>  	return 0;
>  }
>  
> -static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
> -				       int bytes)
> -{
> -	struct intel_engine_cs *ring = ringbuf->ring;
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	unsigned long end;
> -	int ret;
> -
> -	ret = logical_ring_wait_request(ringbuf, bytes);
> -	if (ret != -ENOSPC)
> -		return ret;
> -
> -	/* Force the context submission in case we have been skipping it */
> -	intel_logical_ring_advance_and_submit(ringbuf);
> -
> -	/* With GEM the hangcheck timer should kick us out of the loop,
> -	 * leaving it early runs the risk of corrupting GEM state (due
> -	 * to running on almost untested codepaths). But on resume
> -	 * timers don't work yet, so prevent a complete hang in that
> -	 * case by choosing an insanely large timeout. */
> -	end = jiffies + 60 * HZ;
> -
> -	do {
> -		ringbuf->head = I915_READ_HEAD(ring);
> -		ringbuf->space = intel_ring_space(ringbuf);
> -		if (ringbuf->space >= bytes) {
> -			ret = 0;
> -			break;
> -		}
> -
> -		msleep(1);
> -
> -		if (dev_priv->mm.interruptible && signal_pending(current)) {
> -			ret = -ERESTARTSYS;
> -			break;
> -		}
> -
> -		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> -					   dev_priv->mm.interruptible);
> -		if (ret)
> -			break;
> -
> -		if (time_after(jiffies, end)) {
> -			ret = -EBUSY;
> -			break;
> -		}
> -	} while (1);
> -
> -	return ret;
> -}
> -
>  static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
>  {
>  	uint32_t __iomem *virt;
> @@ -419,21 +344,14 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
>  int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
>  {
>  	struct intel_engine_cs *ring = ringbuf->ring;
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int ret;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> -				   dev_priv->mm.interruptible);
> -	if (ret)
> -		return ret;
> -
>  	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
>  	if (ret)
>  		return ret;
>  
>  	/* Preallocate the olr before touching the ring */
> -	ret = logical_ring_alloc_seqno(ring);
> +	ret = intel_ring_alloc_request(ring);
>  	if (ret)
>  		return ret;
>  
> @@ -620,7 +538,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
>  	return 0;
>  }
>  
> -static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +static u32 gen8_get_seqno(struct intel_engine_cs *ring)
>  {
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
> @@ -648,7 +566,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
>  				(ring->status_page.gfx_addr +
>  				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
>  	intel_logical_ring_emit(ringbuf, 0);
> -	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
> +	intel_logical_ring_emit(ringbuf, intel_ring_get_seqno(ring));
>  	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
>  	intel_logical_ring_emit(ringbuf, MI_NOOP);
>  	intel_logical_ring_advance_and_submit(ringbuf);
> @@ -665,8 +583,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
>  
>  	intel_logical_ring_stop(ring);
>  	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
> -	ring->preallocated_lazy_request = NULL;
> -	ring->outstanding_lazy_seqno = 0;
> +
> +	kfree(ring->preallocated_request);
> +	ring->preallocated_request = NULL;
>  
>  	if (ring->cleanup)
>  		ring->cleanup(ring);
> @@ -689,7 +608,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>  	ring->buffer = NULL;
>  
>  	ring->dev = dev;
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
> +	INIT_LIST_HEAD(&ring->fence_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  	init_waitqueue_head(&ring->irq_queue);
>  
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index dc2f4f2..42ebbf9 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -182,7 +182,7 @@ struct intel_overlay {
>  	u32 flip_addr;
>  	struct drm_i915_gem_object *reg_bo;
>  	/* flip handling */
> -	uint32_t last_flip_req;
> +	struct i915_gem_request *flip_request;
>  	void (*flip_tail)(struct intel_overlay *);
>  };
>  
> @@ -208,29 +208,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
>  		io_mapping_unmap(regs);
>  }
>  
> -static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> -					 void (*tail)(struct intel_overlay *))
> +/* recover from an interruption due to a signal
> + * We have to be careful not to repeat work forever an make forward progess. */
> +static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
>  {
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
>  	int ret;
>  
> -	BUG_ON(overlay->last_flip_req);
> -	ret = i915_add_request(ring, &overlay->last_flip_req);
> -	if (ret)
> -		return ret;
> +	if (overlay->flip_request == NULL)
> +		return 0;
>  
> -	overlay->flip_tail = tail;
> -	ret = i915_wait_seqno(ring, overlay->last_flip_req);
> +	ret = i915_wait_request(overlay->flip_request);
>  	if (ret)
>  		return ret;
> -	i915_gem_retire_requests(dev);
>  
> -	overlay->last_flip_req = 0;
> +	i915_request_put(overlay->flip_request);
> +	overlay->flip_request = NULL;
> +
> +	i915_gem_retire_requests(overlay->dev);
> +
> +	if (overlay->flip_tail)
> +		overlay->flip_tail(overlay);
> +
>  	return 0;
>  }
>  
> +static int intel_overlay_add_request(struct intel_overlay *overlay,
> +				     struct intel_engine_cs *ring,
> +				     void (*tail)(struct intel_overlay *))
> +{
> +	BUG_ON(overlay->flip_request);
> +	overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
> +	overlay->flip_tail = tail;
> +
> +	return i915_add_request(ring);
> +}
> +
> +static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> +					 struct intel_engine_cs *ring,
> +					 void (*tail)(struct intel_overlay *))
> +{
> +	intel_overlay_add_request(overlay, ring, tail);
> +	return intel_overlay_recover_from_interrupt(overlay);
> +}
> +
>  /* overlay needs to be disable in OCMD reg */
>  static int intel_overlay_on(struct intel_overlay *overlay)
>  {
> @@ -252,9 +272,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>  	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
>  	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  	intel_ring_emit(ring, MI_NOOP);
> -	intel_ring_advance(ring);
> +	__intel_ring_advance(ring);
>  
> -	return intel_overlay_do_wait_request(overlay, NULL);
> +	return intel_overlay_do_wait_request(overlay, ring, NULL);
>  }
>  
>  /* overlay needs to be enabled in OCMD reg */
> @@ -284,15 +304,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>  
>  	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
>  	intel_ring_emit(ring, flip_addr);
> -	intel_ring_advance(ring);
> +	__intel_ring_advance(ring);
>  
> -	return i915_add_request(ring, &overlay->last_flip_req);
> +	return intel_overlay_add_request(overlay, ring, NULL);
>  }
>  
>  static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
>  {
>  	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
>  
> +	i915_gem_track_fb(obj, NULL,
> +			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> +
>  	i915_gem_object_ggtt_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);
>  
> @@ -352,33 +375,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>  		intel_ring_emit(ring, flip_addr);
>  		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  	}
> -	intel_ring_advance(ring);
> -
> -	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> -}
> -
> -/* recover from an interruption due to a signal
> - * We have to be careful not to repeat work forever an make forward progess. */
> -static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
> -{
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -	int ret;
> -
> -	if (overlay->last_flip_req == 0)
> -		return 0;
> +	__intel_ring_advance(ring);
>  
> -	ret = i915_wait_seqno(ring, overlay->last_flip_req);
> -	if (ret)
> -		return ret;
> -	i915_gem_retire_requests(dev);
> -
> -	if (overlay->flip_tail)
> -		overlay->flip_tail(overlay);
> -
> -	overlay->last_flip_req = 0;
> -	return 0;
> +	return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
>  }
>  
>  /* Wait for pending overlay flip and release old frame.
> @@ -387,10 +386,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
>   */
>  static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  {
> -	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -	int ret;
> +	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
> +	int ret = 0;
>  
>  	/* Only wait if there is actually an old frame to release to
>  	 * guarantee forward progress.
> @@ -399,6 +396,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  		return 0;
>  
>  	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
> +		struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +
>  		/* synchronous slowpath */
>  		ret = intel_ring_begin(ring, 2);
>  		if (ret)
> @@ -406,20 +405,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  
>  		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>  		intel_ring_emit(ring, MI_NOOP);
> -		intel_ring_advance(ring);
> +		__intel_ring_advance(ring);
>  
> -		ret = intel_overlay_do_wait_request(overlay,
> +		ret = intel_overlay_do_wait_request(overlay, ring,
>  						    intel_overlay_release_old_vid_tail);
> -		if (ret)
> -			return ret;
> -	}
> -
> -	intel_overlay_release_old_vid_tail(overlay);
> +	} else
> +		intel_overlay_release_old_vid_tail(overlay);
>  
> -
> -	i915_gem_track_fb(overlay->old_vid_bo, NULL,
> -			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> -	return 0;
> +	return ret;
>  }
>  
>  struct put_image_params {
> @@ -821,12 +814,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
>  	iowrite32(0, &regs->OCMD);
>  	intel_overlay_unmap_regs(overlay, regs);
>  
> -	ret = intel_overlay_off(overlay);
> -	if (ret != 0)
> -		return ret;
> -
> -	intel_overlay_off_tail(overlay);
> -	return 0;
> +	return intel_overlay_off(overlay);
>  }
>  
>  static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 13543f8..ee656ea 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -750,7 +750,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>  					   PIPE_CONTROL_FLUSH_ENABLE);
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +		intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
>  		intel_ring_emit(signaller, 0);
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->id));
> @@ -787,7 +787,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
>  					   MI_FLUSH_DW_USE_GTT);
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +		intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->id));
>  		intel_ring_emit(signaller, 0);
> @@ -818,7 +818,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>  		if (mbox_reg != GEN6_NOSYNC) {
>  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>  			intel_ring_emit(signaller, mbox_reg);
> -			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +			intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
>  		}
>  	}
>  
> @@ -853,7 +853,7 @@ gen6_add_request(struct intel_engine_cs *ring)
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, intel_ring_get_seqno(ring));
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -971,7 +971,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_WRITE_FLUSH |
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, intel_ring_get_seqno(ring));
>  	intel_ring_emit(ring, 0);
>  	PIPE_CONTROL_FLUSH(ring, scratch_addr);
>  	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -990,7 +990,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
>  			PIPE_CONTROL_NOTIFY);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, intel_ring_get_seqno(ring));
>  	intel_ring_emit(ring, 0);
>  	__intel_ring_advance(ring);
>  
> @@ -998,21 +998,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  }
>  
>  static u32
> -gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> -{
> -	/* Workaround to force correct ordering between irq and seqno writes on
> -	 * ivb (and maybe also on snb) by reading from a CS register (like
> -	 * ACTHD) before reading the status page. */
> -	if (!lazy_coherency) {
> -		struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -		POSTING_READ(RING_ACTHD(ring->mmio_base));
> -	}
> -
> -	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> -}
> -
> -static u32
> -ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +ring_get_seqno(struct intel_engine_cs *ring)
>  {
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
> @@ -1024,7 +1010,7 @@ ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
>  }
>  
>  static u32
> -pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +pc_render_get_seqno(struct intel_engine_cs *ring)
>  {
>  	return ring->scratch.cpu_page[0];
>  }
> @@ -1230,7 +1216,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +	intel_ring_emit(ring, intel_ring_get_seqno(ring));
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -1247,6 +1233,11 @@ gen6_ring_get_irq(struct intel_engine_cs *ring)
>  	if (!dev->irq_enabled)
>  	       return false;
>  
> +	/* It looks like we need to prevent the gt from suspending while waiting
> +	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
> +	 * blt/bsd rings on ivb. */
> +	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
> +
>  	spin_lock_irqsave(&dev_priv->irq_lock, flags);
>  	if (ring->irq_refcount++ == 0) {
>  		if (HAS_L3_DPF(dev) && ring->id == RCS)
> @@ -1278,6 +1269,8 @@ gen6_ring_put_irq(struct intel_engine_cs *ring)
>  		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
>  	}
>  	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
> +
> +	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>  }
>  
>  static bool
> @@ -1610,7 +1603,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  	}
>  
>  	ring->dev = dev;
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  	ringbuf->size = 32 * PAGE_SIZE;
>  	ringbuf->ring = ring;
> @@ -1671,8 +1665,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
>  
>  	intel_destroy_ringbuffer_obj(ringbuf);
> -	ring->preallocated_lazy_request = NULL;
> -	ring->outstanding_lazy_seqno = 0;
> +	ring->preallocated_request = NULL;
>  
>  	if (ring->cleanup)
>  		ring->cleanup(ring);
> @@ -1688,8 +1681,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
>  {
>  	struct intel_ringbuffer *ringbuf = ring->buffer;
> -	struct drm_i915_gem_request *request;
> -	u32 seqno = 0;
> +	struct i915_gem_request *rq;
>  	int ret;
>  
>  	if (ringbuf->last_retired_head != -1) {
> @@ -1701,18 +1693,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
>  			return 0;
>  	}
>  
> -	list_for_each_entry(request, &ring->request_list, list) {
> -		if (__intel_ring_space(request->tail, ringbuf->tail,
> -				       ringbuf->size) >= n) {
> -			seqno = request->seqno;
> +	list_for_each_entry(rq, &ring->request_list, list)
> +		if (__intel_ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
>  			break;
> -		}
> -	}
>  
> -	if (seqno == 0)
> +	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
>  		return -ENOSPC;
>  
> -	ret = i915_wait_seqno(ring, seqno);
> +	ret = i915_wait_request(rq);
>  	if (ret)
>  		return ret;
>  
> @@ -1729,6 +1717,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_ringbuffer *ringbuf = ring->buffer;
> +	unsigned reset_counter;
>  	unsigned long end;
>  	int ret;
>  
> @@ -1739,6 +1728,13 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>  	/* force the tail write in case we have been skipping them */
>  	__intel_ring_advance(ring);
>  
> +	reset_counter = 0;
> +	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> +				   dev_priv->mm.interruptible,
> +				   &reset_counter);
> +	if (ret)
> +		return ret;
> +
>  	/* With GEM the hangcheck timer should kick us out of the loop,
>  	 * leaving it early runs the risk of corrupting GEM state (due
>  	 * to running on almost untested codepaths). But on resume
> @@ -1755,6 +1751,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>  			break;
>  		}
>  
> +		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> +					   dev_priv->mm.interruptible,
> +					   &reset_counter);
> +		if (ret)
> +			return ret;
> +
>  		if (!drm_core_check_feature(dev, DRIVER_MODESET) &&
>  		    dev->primary->master) {
>  			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
> @@ -1764,16 +1766,6 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>  
>  		msleep(1);
>  
> -		if (dev_priv->mm.interruptible && signal_pending(current)) {
> -			ret = -ERESTARTSYS;
> -			break;
> -		}
> -
> -		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> -					   dev_priv->mm.interruptible);
> -		if (ret)
> -			break;
> -
>  		if (time_after(jiffies, end)) {
>  			ret = -EBUSY;
>  			break;
> @@ -1808,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>  
>  int intel_ring_idle(struct intel_engine_cs *ring)
>  {
> -	u32 seqno;
>  	int ret;
>  
>  	/* We need to add any requests required to flush the objects and ring */
> -	if (ring->outstanding_lazy_seqno) {
> -		ret = i915_add_request(ring, NULL);
> +	if (ring->preallocated_request) {
> +		ret = i915_add_request(ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -1822,30 +1813,46 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>  	if (list_empty(&ring->request_list))
>  		return 0;
>  
> -	seqno = list_entry(ring->request_list.prev,
> -			   struct drm_i915_gem_request,
> -			   list)->seqno;
> -
> -	return i915_wait_seqno(ring, seqno);
> +	return i915_wait_request(container_of(ring->request_list.prev,
> +					      struct i915_gem_request,
> +					      list));
>  }
>  
> -static int
> -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
> +int
> +intel_ring_alloc_request(struct intel_engine_cs *ring)
>  {
> -	if (ring->outstanding_lazy_seqno)
> +	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +	struct i915_gem_request *rq;
> +	int ret;
> +
> +	if (ring->preallocated_request)
>  		return 0;
>  
> -	if (ring->preallocated_lazy_request == NULL) {
> -		struct drm_i915_gem_request *request;
> +	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
> +	if (rq == NULL)
> +		return -ENOMEM;
>  
> -		request = kmalloc(sizeof(*request), GFP_KERNEL);
> -		if (request == NULL)
> -			return -ENOMEM;
> +	rq->reset_counter = 0;
> +	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> +				   dev_priv->mm.interruptible,
> +				   &rq->reset_counter);
> +	if (ret)
> +		goto err;
>  
> -		ring->preallocated_lazy_request = request;
> -	}
> +	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
> +	if (ret)
> +		goto err;
>  
> -	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> +	kref_init(&rq->kref);
> +	rq->ring = ring;
> +	rq->completed = false;
> +
> +	ring->preallocated_request = rq;
> +	return 0;
> +
> +err:
> +	kfree(rq);
> +	return ret;
>  }
>  
>  static int __intel_ring_prepare(struct intel_engine_cs *ring,
> @@ -1872,20 +1879,20 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
>  int intel_ring_begin(struct intel_engine_cs *ring,
>  		     int num_dwords)
>  {
> -	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  	int ret;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> -				   dev_priv->mm.interruptible);
> +	/* Preallocate the olr before touching the ring, */
> +	ret = intel_ring_alloc_request(ring);
>  	if (ret)
>  		return ret;
>  
> +	/* and by holding the seqno before we prepare we prevent recursion */
>  	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
>  	if (ret)
>  		return ret;
>  
> -	/* Preallocate the olr before touching the ring */
> -	ret = intel_ring_alloc_seqno(ring);
> +	/* but we may flush the seqno during prepare. */
> +	ret = intel_ring_alloc_request(ring);
>  	if (ret)
>  		return ret;
>  
> @@ -1920,7 +1927,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	BUG_ON(ring->outstanding_lazy_seqno);
> +	BUG_ON(ring->preallocated_request);
>  
>  	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
>  		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
> @@ -2140,7 +2147,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  		ring->irq_get = gen8_ring_get_irq;
>  		ring->irq_put = gen8_ring_put_irq;
>  		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> -		ring->get_seqno = gen6_ring_get_seqno;
> +		ring->get_seqno = ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (i915_semaphore_is_enabled(dev)) {
>  			WARN_ON(!dev_priv->semaphore_obj);
> @@ -2156,7 +2163,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  		ring->irq_get = gen6_ring_get_irq;
>  		ring->irq_put = gen6_ring_put_irq;
>  		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> -		ring->get_seqno = gen6_ring_get_seqno;
> +		ring->get_seqno = ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (i915_semaphore_is_enabled(dev)) {
>  			ring->semaphore.sync_to = gen6_ring_sync;
> @@ -2297,7 +2304,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
>  	ring->cleanup = render_ring_cleanup;
>  
>  	ring->dev = dev;
> -	INIT_LIST_HEAD(&ring->active_list);
> +	INIT_LIST_HEAD(&ring->read_list);
> +	INIT_LIST_HEAD(&ring->write_list);
>  	INIT_LIST_HEAD(&ring->request_list);
>  
>  	ringbuf->size = size;
> @@ -2345,7 +2353,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
>  			ring->write_tail = gen6_bsd_ring_write_tail;
>  		ring->flush = gen6_bsd_ring_flush;
>  		ring->add_request = gen6_add_request;
> -		ring->get_seqno = gen6_ring_get_seqno;
> +		ring->get_seqno = ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (INTEL_INFO(dev)->gen >= 8) {
>  			ring->irq_enable_mask =
> @@ -2423,7 +2431,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
>  	ring->mmio_base = GEN8_BSD2_RING_BASE;
>  	ring->flush = gen6_bsd_ring_flush;
>  	ring->add_request = gen6_add_request;
> -	ring->get_seqno = gen6_ring_get_seqno;
> +	ring->get_seqno = ring_get_seqno;
>  	ring->set_seqno = ring_set_seqno;
>  	ring->irq_enable_mask =
>  			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
> @@ -2453,7 +2461,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
>  	ring->write_tail = ring_write_tail;
>  	ring->flush = gen6_ring_flush;
>  	ring->add_request = gen6_add_request;
> -	ring->get_seqno = gen6_ring_get_seqno;
> +	ring->get_seqno = ring_get_seqno;
>  	ring->set_seqno = ring_set_seqno;
>  	if (INTEL_INFO(dev)->gen >= 8) {
>  		ring->irq_enable_mask =
> @@ -2510,7 +2518,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
>  	ring->write_tail = ring_write_tail;
>  	ring->flush = gen6_ring_flush;
>  	ring->add_request = gen6_add_request;
> -	ring->get_seqno = gen6_ring_get_seqno;
> +	ring->get_seqno = ring_get_seqno;
>  	ring->set_seqno = ring_set_seqno;
>  
>  	if (INTEL_INFO(dev)->gen >= 8) {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 24437da..eb4875a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -126,6 +126,7 @@ struct  intel_engine_cs {
>  		VCS2
>  	} id;
>  #define I915_NUM_RINGS 5
> +#define I915_NUM_RING_BITS 4
>  #define LAST_USER_RING (VECS + 1)
>  	u32		mmio_base;
>  	struct		drm_device *dev;
> @@ -153,8 +154,7 @@ struct  intel_engine_cs {
>  	 * seen value is good enough. Note that the seqno will always be
>  	 * monotonic, even if not coherent.
>  	 */
> -	u32		(*get_seqno)(struct intel_engine_cs *ring,
> -				     bool lazy_coherency);
> +	u32		(*get_seqno)(struct intel_engine_cs *ring);
>  	void		(*set_seqno)(struct intel_engine_cs *ring,
>  				     u32 seqno);
>  	int		(*dispatch_execbuffer)(struct intel_engine_cs *ring,
> @@ -242,7 +242,7 @@ struct  intel_engine_cs {
>  	 *
>  	 * A reference is held on the buffer while on this list.
>  	 */
> -	struct list_head active_list;
> +	struct list_head read_list, write_list, fence_list;
>  
>  	/**
>  	 * List of breadcrumbs associated with GPU requests currently
> @@ -253,8 +253,7 @@ struct  intel_engine_cs {
>  	/**
>  	 * Do we have some not yet emitted requests outstanding?
>  	 */
> -	struct drm_i915_gem_request *preallocated_lazy_request;
> -	u32 outstanding_lazy_seqno;
> +	struct i915_gem_request *preallocated_request;
>  	bool gpu_caches_dirty;
>  	bool fbc_dirty;
>  
> @@ -395,6 +394,7 @@ int intel_ring_space(struct intel_ringbuffer *ringbuf);
>  bool intel_ring_stopped(struct intel_engine_cs *ring);
>  void __intel_ring_advance(struct intel_engine_cs *ring);
>  
> +int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring);
>  int __must_check intel_ring_idle(struct intel_engine_cs *ring);
>  void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
>  int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
> @@ -417,12 +417,15 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>  	return ringbuf->tail;
>  }
>  
> -static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
> +static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
>  {
> -	BUG_ON(ring->outstanding_lazy_seqno == 0);
> -	return ring->outstanding_lazy_seqno;
> +	BUG_ON(ring->preallocated_request == 0);
> +	return ring->preallocated_request;
>  }
>  
> +/* C - the bringer of joy */
> +#define intel_ring_get_seqno(ring) intel_ring_get_request(ring)->seqno
> +
>  static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
>  {
>  	if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))
> -- 
> 1.9.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects
  2014-08-27  9:55   ` Daniel Vetter
@ 2014-08-27 10:39     ` Chris Wilson
  2014-09-02 10:06       ` John Harrison
  0 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2014-08-27 10:39 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx, Brad Volkin

On Wed, Aug 27, 2014 at 11:55:34AM +0200, Daniel Vetter wrote:
> On Tue, Aug 12, 2014 at 08:05:51PM +0100, Chris Wilson wrote:
> > At the heart of this change is that the seqno is a too low level of an
> > abstraction to handle the growing complexities of command tracking, both
> > with the introduction of multiple command queues with execbuffer and the
> > potential for reordering with a scheduler. On top of the seqno we have
> > the request. Conceptually this is just a fence, but it also has
> > substantial bookkeeping of its own in order to track the context and
> > batch in flight, for example. It is the central structure upon which we
> > can extend with dependency tracking et al.
> > 
> > As regards the objects, they were using the seqno as a simple fence,
> > upon which is check or even wait upon for command completion. This patch
> > exchanges that seqno/ring pair with the request itself. For the
> > majority, lifetime of the request is ordered by how we retire objects
> > then requests. However, both the unlocked waits and probing elsewhere do
> > not tie into the normal request lifetimes and so we need to introduce a
> > kref. Extending the objects to use the request as the fence naturally
> > extends to segregrating read/write fence tracking. This has significance
> > for it reduces the number of semaphores we need to emit, reducing the
> > likelihood of #54226, and improving performance overall.
> > 
> > v2: Rebase and split out the othogonal tweaks.
> > 
> > A silly happened with this patch. It seemed to nullify our earlier
> > seqno-vs-interrupt w/a. I could not spot why, but gen6+ started to fail
> > with missed interrupts (a good test of our robustness handling). So I
> > ripped out the existing ACTHD read and replaced it with a RING_HEAD to
> > manually check whether the request is complete. That also had the nice
> > consequence of forcing __wait_request() to being the central arbiter of
> > request completion.
> > 
> > The keener eyed reviewr will also spot that the reset_counter is moved
> > into the request simplifing __wait_request() callsites and reducing the
> > number of atomic reads by virtue of moving the check for a pending GPU
> > reset to the endpoints of GPU access.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Oscar Mateo <oscar.mateo@intel.com>
> > Cc: Brad Volkin <bradley.d.volkin@intel.com>
> > Cc: "Kukanova, Svetlana" <svetlana.kukanova@intel.com>
> 
> So I've tried to split this up and totally failed. Non-complete list of
> things I didn't manage to untangle:
> 
> - The mmio flip refactoring.

Yeah, that's a fairly instrumental part of the patch. It's not
complicated but it does benefit a lot from using requests to both make
the decision cleaner and the tracking correct.

> - The overlay request tracking refactoring.

Again, the api changes allow the code to be compacted.

> - The switch to multiple parallel readers with the resulting cascading
>   changes all over.

I thought that was fairly isolated to the gem object. It's glossed over
in errors/debugfs for simplicity, which deserves to be fixed given a
compact representation of all the requests, and so would kill the icky
code to find the "last" request.

> - The missed irq w/a prep changes. It's easy to split out the change to
>   re-add the rc6 reference and to ditch the ACT_HEAD read, but the commit
>   message talks about instead reading the RING_HEAD, and I just didn't
>   spot the changes relevant to that in this big diff. Was probably looking
>   in the wrong place.

I did mention that I tried that earlier on on the ml, but missed saying
that it the forcewake reference didn't unbreak the old w/a in the
changelog.

> - The move_to_active/retire refactoring. There's a pile of code movement
>   in there, but I couldn't spot really what's just refactoring and what is
>   real changed needed for the s/seqno/request/ change.

move-to() everything since that now operates on the request. retire(), not
a lot changes there, just the extra requests being tracked and the strict
lifetime ordering of the reference the object holds onto the requests.

> - De-duping some of the logical_ring_ functions. Spotted because it
>   conflicted (but was easy to hack around), still this shouldn't really be
>   part of this.

> Things I've spotted which could be split out but amount to a decent
> rewrite of the patch:
> - Getting at the ring of the last write to an object. Although I guess
>   without the multi-reader stuff and the pageflip refactoring that would
>   pretty much disappear.

Why? Who uses the ring of the last_write request? We compare engines in
pageflip but that's about it.

> - Probably similar helpers for seqno if we don't switch to parallel writes
>   in the same patch.
> 
> Splitting out the renames was easy, but that reduced the diff by less than
> 5% in size. So didn't help in reviewing the patch at all.

The actual rename patch is larger than this one (v2).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects
  2014-08-27 10:39     ` Chris Wilson
@ 2014-09-02 10:06       ` John Harrison
  2014-09-06  9:12         ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: John Harrison @ 2014-09-02 10:06 UTC (permalink / raw)
  To: intel-gfx

Hello,

Is this patch going to be split up into more manageable pieces? I tried 
to apply it to a tree fetched yesterday and got a very large number of 
conflicts. I don't know whether that is because more execlist patches 
have been merged or if it is other random changes that have broken it or 
if I am just missing earlier patches in the set.

The patch has been sent with subjects of '[PATCH]', '[PATCH 5/5]' and 
'[PATCH 3/3]'. However, all three emails seem to be the same humongous 
single part patch and I can't find any 0/3, 4/5, etc. emails. Am I 
missing some prep work patches without which the final monster patch is 
never going to apply?

Thanks,
John.


On 27/08/2014 11:39, Chris Wilson wrote:
> On Wed, Aug 27, 2014 at 11:55:34AM +0200, Daniel Vetter wrote:
>> On Tue, Aug 12, 2014 at 08:05:51PM +0100, Chris Wilson wrote:
>>> At the heart of this change is that the seqno is a too low level of an
>>> abstraction to handle the growing complexities of command tracking, both
>>> with the introduction of multiple command queues with execbuffer and the
>>> potential for reordering with a scheduler. On top of the seqno we have
>>> the request. Conceptually this is just a fence, but it also has
>>> substantial bookkeeping of its own in order to track the context and
>>> batch in flight, for example. It is the central structure upon which we
>>> can extend with dependency tracking et al.
>>>
>>> As regards the objects, they were using the seqno as a simple fence,
>>> upon which is check or even wait upon for command completion. This patch
>>> exchanges that seqno/ring pair with the request itself. For the
>>> majority, lifetime of the request is ordered by how we retire objects
>>> then requests. However, both the unlocked waits and probing elsewhere do
>>> not tie into the normal request lifetimes and so we need to introduce a
>>> kref. Extending the objects to use the request as the fence naturally
>>> extends to segregrating read/write fence tracking. This has significance
>>> for it reduces the number of semaphores we need to emit, reducing the
>>> likelihood of #54226, and improving performance overall.
>>>
>>> v2: Rebase and split out the othogonal tweaks.
>>>
>>> A silly happened with this patch. It seemed to nullify our earlier
>>> seqno-vs-interrupt w/a. I could not spot why, but gen6+ started to fail
>>> with missed interrupts (a good test of our robustness handling). So I
>>> ripped out the existing ACTHD read and replaced it with a RING_HEAD to
>>> manually check whether the request is complete. That also had the nice
>>> consequence of forcing __wait_request() to being the central arbiter of
>>> request completion.
>>>
>>> The keener eyed reviewr will also spot that the reset_counter is moved
>>> into the request simplifing __wait_request() callsites and reducing the
>>> number of atomic reads by virtue of moving the check for a pending GPU
>>> reset to the endpoints of GPU access.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Cc: Oscar Mateo <oscar.mateo@intel.com>
>>> Cc: Brad Volkin <bradley.d.volkin@intel.com>
>>> Cc: "Kukanova, Svetlana" <svetlana.kukanova@intel.com>
>> So I've tried to split this up and totally failed. Non-complete list of
>> things I didn't manage to untangle:
>>
>> - The mmio flip refactoring.
> Yeah, that's a fairly instrumental part of the patch. It's not
> complicated but it does benefit a lot from using requests to both make
> the decision cleaner and the tracking correct.
>
>> - The overlay request tracking refactoring.
> Again, the api changes allow the code to be compacted.
>
>> - The switch to multiple parallel readers with the resulting cascading
>>    changes all over.
> I thought that was fairly isolated to the gem object. It's glossed over
> in errors/debugfs for simplicity, which deserves to be fixed given a
> compact representation of all the requests, and so would kill the icky
> code to find the "last" request.
>
>> - The missed irq w/a prep changes. It's easy to split out the change to
>>    re-add the rc6 reference and to ditch the ACT_HEAD read, but the commit
>>    message talks about instead reading the RING_HEAD, and I just didn't
>>    spot the changes relevant to that in this big diff. Was probably looking
>>    in the wrong place.
> I did mention that I tried that earlier on on the ml, but missed saying
> that it the forcewake reference didn't unbreak the old w/a in the
> changelog.
>
>> - The move_to_active/retire refactoring. There's a pile of code movement
>>    in there, but I couldn't spot really what's just refactoring and what is
>>    real changed needed for the s/seqno/request/ change.
> move-to() everything since that now operates on the request. retire(), not
> a lot changes there, just the extra requests being tracked and the strict
> lifetime ordering of the reference the object holds onto the requests.
>
>> - De-duping some of the logical_ring_ functions. Spotted because it
>>    conflicted (but was easy to hack around), still this shouldn't really be
>>    part of this.
>> Things I've spotted which could be split out but amount to a decent
>> rewrite of the patch:
>> - Getting at the ring of the last write to an object. Although I guess
>>    without the multi-reader stuff and the pageflip refactoring that would
>>    pretty much disappear.
> Why? Who uses the ring of the last_write request? We compare engines in
> pageflip but that's about it.
>
>> - Probably similar helpers for seqno if we don't switch to parallel writes
>>    in the same patch.
>>
>> Splitting out the renames was easy, but that reduced the diff by less than
>> 5% in size. So didn't help in reviewing the patch at all.
> The actual rename patch is larger than this one (v2).
> -Chris
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects
  2014-09-02 10:06       ` John Harrison
@ 2014-09-06  9:12         ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2014-09-06  9:12 UTC (permalink / raw)
  To: John Harrison; +Cc: intel-gfx

On Tue, Sep 02, 2014 at 11:06:29AM +0100, John Harrison wrote:
> Hello,
> 
> Is this patch going to be split up into more manageable pieces? I
> tried to apply it to a tree fetched yesterday and got a very large
> number of conflicts. I don't know whether that is because more
> execlist patches have been merged or if it is other random changes
> that have broken it or if I am just missing earlier patches in the
> set.
> 
> The patch has been sent with subjects of '[PATCH]', '[PATCH 5/5]'
> and '[PATCH 3/3]'. However, all three emails seem to be the same
> humongous single part patch and I can't find any 0/3, 4/5, etc.
> emails. Am I missing some prep work patches without which the final
> monster patch is never going to apply?

The earlier patches were already upstream, but then execlists caused
further conflicts.

There's a fairly mechanical and mundane API conversion spread over
i915_gem*.c which should be easy to skim over. The most subtle part is
defining the order in which engine, contexts and rings are created and
enabled. The patch splits out the setup and enabling so that rings can
be created as children of both engines and contects, and so that the
resume path is clearly defined and split out from the setup. Given the
new api, we can then de-duplicate all the execlist code spread across
i915_gem*.c, and part of that is moving more engine specific code out of
gem. Finally, requests work as fences.

The problem is that, as I see it, defining requests to be independent of
the submission mechanism requires changes in how the rings are accessed
right the way through to how the requests are used themselves, and for
the most part there is no intermediate step. But as usually happens I
cannot see the wood for the trees.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2014-09-06  9:12 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
2014-08-14 14:51   ` Mika Kuoppala
2014-08-14 19:35     ` Chris Wilson
2014-08-15 11:11   ` Mika Kuoppala
2014-08-15 18:07     ` Mika Kuoppala
2014-08-12 19:05 ` [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create() Chris Wilson
2014-08-15 18:07   ` Mika Kuoppala
2014-08-12 19:05 ` [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang Chris Wilson
2014-08-15 18:09   ` Mika Kuoppala
2014-08-25 21:27     ` Daniel Vetter
2014-08-12 19:05 ` [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
2014-08-27  9:55   ` Daniel Vetter
2014-08-27 10:39     ` Chris Wilson
2014-09-02 10:06       ` John Harrison
2014-09-06  9:12         ` Chris Wilson
2014-08-13 14:50 ` [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Mika Kuoppala
2014-08-14  6:50   ` Chris Wilson
2014-08-14 10:18     ` Mika Kuoppala
2014-08-14 15:03       ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.