All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Remove OLR
@ 2014-12-19 14:41 John.C.Harrison
  2014-12-19 14:43 ` John Harrison
  2015-01-06 13:52 ` Daniel Vetter
  0 siblings, 2 replies; 3+ messages in thread
From: John.C.Harrison @ 2014-12-19 14:41 UTC (permalink / raw)
  To: Intel-GFX

From: John Harrison <John.C.Harrison@Intel.com>

The outstanding lazy request mechanism does not really work well with
a GPU scheduler. The scheduler expects each work packet, i.e. request
structure, to be a complete entity and to belong to one and only one
submitter. Whereas the whole lazy mechanism allows lots of work from
lots of different places to all be lumped together into a single
request. It also means that work is floating around in the system
unowned and untracked at various random points in time. This all
causes headaches for the scheduler.

This patch removes the need for the outstanding lazy request. It
converts all functions which would otherwise be relying on the OLR to
explicitly manage the request. Either by allocating, passing and
submitting the request if they are the top level owner. Or by simply
taking a request in as a parameter rather than pulling it out of the
magic global variable if they are a client. The OLR itself is left in
along with a bunch of sanity check asserts that it matches the request
being passed in as a parameter. However, it should now be safe to
remove completely.

Note that this patch is not intended as a final, shipping, isn't it
gorgeous, end product. It is merely a quick hack that I went through
as being the simplest way to actually work out what the real sequence
of events and the real ownership of work is in certain circumstances.
Most particularly to do with display and overlay work. However, I
would like to get agreement that it is a good direction to go in and
that removing the OLR would be a good thing. Or, to put it another
way, is it worth me trying to break this patch into a set of
manageable items or do I just abandon it and give up?

Note also that the patch is based on a tree including the scheduler
prep-work patches posted earlier. So it will not apply to a clean
nightly tree.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h              |   29 ++--
 drivers/gpu/drm/i915/i915_gem.c              |  182 ++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_context.c      |   69 +++-----
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   62 +++----
 drivers/gpu/drm/i915/i915_gem_gtt.c          |   64 ++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h          |    3 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   10 +-
 drivers/gpu/drm/i915/i915_gem_render_state.h |    2 +-
 drivers/gpu/drm/i915/intel_display.c         |   68 ++++----
 drivers/gpu/drm/i915/intel_lrc.c             |  145 +++++++++-------
 drivers/gpu/drm/i915/intel_lrc.h             |    8 +-
 drivers/gpu/drm/i915/intel_overlay.c         |   58 ++++---
 drivers/gpu/drm/i915/intel_pm.c              |   33 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  228 ++++++++++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      |   38 ++---
 15 files changed, 553 insertions(+), 446 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 511f55f..7b4309e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -513,7 +513,7 @@ struct drm_i915_display_funcs {
 	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
 			  struct drm_framebuffer *fb,
 			  struct drm_i915_gem_object *obj,
-			  struct intel_engine_cs *ring,
+			  struct drm_i915_gem_request *req,
 			  uint32_t flags);
 	void (*update_primary_plane)(struct drm_crtc *crtc,
 				     struct drm_framebuffer *fb,
@@ -1796,7 +1796,8 @@ struct drm_i915_private {
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
 		int (*alloc_request)(struct intel_engine_cs *ring,
-				     struct intel_context *ctx);
+				     struct intel_context *ctx,
+				     struct drm_i915_gem_request **req_out);
 		int (*do_execbuf)(struct i915_execbuffer_params *params,
 				  struct drm_i915_gem_execbuffer2 *args,
 				  struct list_head *vmas);
@@ -2511,10 +2512,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
 void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
-					struct intel_engine_cs *ring);
+					struct drm_i915_gem_request *req);
 void i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 					 struct drm_file *file,
-					 struct intel_engine_cs *ring,
+					 struct drm_i915_gem_request *req,
 					 struct drm_i915_gem_object *obj);
 void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj);
 int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe,
@@ -2609,9 +2610,9 @@ int __must_check __i915_mutex_lock_interruptible(struct drm_device *dev, const c
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 #endif
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to, bool add_request);
+			 struct drm_i915_gem_request *to_req);
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring);
+			     struct drm_i915_gem_request *req);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
@@ -2678,19 +2679,19 @@ int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
+int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
-int __i915_add_request(struct intel_engine_cs *ring,
+int __i915_add_request(struct drm_i915_gem_request *req,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *batch_obj,
 		       bool flush_caches);
-#define i915_add_request(ring) \
-	__i915_add_request(ring, NULL, NULL, true)
-#define i915_add_request_no_flush(ring) \
-	__i915_add_request(ring, NULL, NULL, false)
+#define i915_add_request(req) \
+	__i915_add_request(req, NULL, NULL, true)
+#define i915_add_request_no_flush(req) \
+	__i915_add_request(req, NULL, NULL, false)
 int __i915_wait_request(struct drm_i915_gem_request *req,
 			unsigned reset_counter,
 			bool interruptible,
@@ -2810,9 +2811,9 @@ int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_fini(struct drm_device *dev);
 void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
-int i915_gem_context_enable(struct drm_i915_private *dev_priv);
+int i915_gem_context_enable(struct drm_i915_gem_request *req);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
-int i915_switch_context(struct intel_engine_cs *ring,
+int i915_switch_context(struct drm_i915_gem_request *req,
 			struct intel_context *to);
 struct intel_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1d2cbfb..dbfb4e5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1178,7 +1178,7 @@ i915_gem_check_olr(struct drm_i915_gem_request *req)
 
 	ret = 0;
 	if (req == req->ring->outstanding_lazy_request)
-		ret = i915_add_request(req->ring);
+		ret = i915_add_request(req);
 
 	return ret;
 }
@@ -2294,17 +2294,16 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 
 static void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *ring)
+			       struct drm_i915_gem_request *req)
 {
-	struct drm_i915_gem_request *req;
-	struct intel_engine_cs *old_ring;
+	struct intel_engine_cs *new_ring, *old_ring;
 
-	BUG_ON(ring == NULL);
+	BUG_ON(req == NULL);
 
-	req = intel_ring_get_request(ring);
+	new_ring = i915_gem_request_get_ring(req);
 	old_ring = i915_gem_request_get_ring(obj->last_read_req);
 
-	if (old_ring != ring && obj->last_write_req) {
+	if (old_ring != new_ring && obj->last_write_req) {
 		/* Keep the request relative to the current ring */
 		i915_gem_request_assign(&obj->last_write_req, req);
 	}
@@ -2315,17 +2314,17 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 		obj->active = 1;
 	}
 
-	list_move_tail(&obj->ring_list, &ring->active_list);
+	list_move_tail(&obj->ring_list, &new_ring->active_list);
 
-	//printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, ring->name, obj, req);
+	//printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, new_ring->name, obj, req);
 	i915_gem_request_assign(&obj->last_read_req, req);
 }
 
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring)
+			     struct drm_i915_gem_request *req)
 {
 	list_move_tail(&vma->mm_list, &vma->vm->active_list);
-	return i915_gem_object_move_to_active(vma->obj, ring);
+	return i915_gem_object_move_to_active(vma->obj, req);
 }
 
 static void
@@ -2440,26 +2439,35 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 	return 0;
 }
 
-int __i915_add_request(struct intel_engine_cs *ring,
+int __i915_add_request(struct drm_i915_gem_request *request,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *obj,
 		       bool flush_caches)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct intel_engine_cs *ring;
+	struct drm_i915_private *dev_priv;
 	struct intel_ringbuffer *ringbuf;
 	u32 request_ring_position, request_start;
 	int ret;
 
-	request = ring->outstanding_lazy_request;
+	/*printk( KERN_ERR "<%s> request %c %d:%d, OLR %c %d:%d\n",
+		request ? request->ring->name : "???",
+		request ? '=' : '?',
+		request ? request->uniq : -1,
+		request ? request->seqno : 0,
+		request->ring->outstanding_lazy_request ? '=' : '?',
+		request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->uniq : -1,
+		request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->seqno : 0);*/
+	//dump_stack();
+
 	if (WARN_ON(request == NULL))
 		return -ENOMEM;
 
-	if (i915.enable_execlists) {
-		struct intel_context *ctx = request->ctx;
-		ringbuf = ctx->engine[ring->id].ringbuf;
-	} else
-		ringbuf = ring->buffer;
+	ring = request->ring;
+	dev_priv = ring->dev->dev_private;
+	ringbuf = request->ringbuf;
+
+	WARN_ON(request != ring->outstanding_lazy_request);
 
 	request_start = intel_ring_get_tail(ringbuf);
 	/*
@@ -2471,9 +2479,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 */
 	if (flush_caches) {
 		if (i915.enable_execlists)
-			ret = logical_ring_flush_all_caches(ringbuf);
+			ret = logical_ring_flush_all_caches(request);
 		else
-			ret = intel_ring_flush_all_caches(ring);
+			ret = intel_ring_flush_all_caches(request);
 		if (ret)
 			return ret;
 	}
@@ -2488,9 +2496,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	request_ring_position = intel_ring_get_tail(ringbuf);
 
 	if (i915.enable_execlists)
-		ret = ring->emit_request(ringbuf);
+		ret = ring->emit_request(request);
 	else
-		ret = ring->add_request(ring);
+		ret = ring->add_request(request);
 	if (ret)
 		return ret;
 
@@ -2504,7 +2512,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
-	request->batch_obj = obj;
+	if (obj)
+		request->batch_obj = obj;
 
 	if (!i915.enable_execlists) {
 		/* Hold a reference to the current context so that we can inspect
@@ -2744,6 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 #endif
 
 	/* This may not have been flushed before the reset, so clean it now */
+	WARN_ON(ring->outstanding_lazy_request);
 	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
 }
 
@@ -3114,8 +3124,6 @@ out:
  *
  * @obj: object which may be in use on another ring.
  * @to: ring we wish to use the object on. May be NULL.
- * @add_request: do we need to add a request to track operations
- *    submitted on ring with sync_to function
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
@@ -3125,8 +3133,9 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to, bool add_request)
+		     struct drm_i915_gem_request *to_req)
 {
+	struct intel_engine_cs *to = to_req->ring;
 	struct intel_engine_cs *from;
 	u32 seqno;
 	int ret, idx;
@@ -3152,7 +3161,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		return ret;
 
 	trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
-	ret = to->semaphore.sync_to(to, from, seqno);
+	ret = to->semaphore.sync_to(to_req, from, seqno);
 	if (!ret) {
 		/* We use last_read_req because sync_to()
 		 * might have just caused seqno wrap under
@@ -3160,8 +3169,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		 */
 		from->semaphore.sync_seqno[idx] =
 				i915_gem_request_get_seqno(obj->last_read_req);
-		if (add_request)
-			i915_add_request_no_flush(to);
 	}
 
 	return ret;
@@ -3266,18 +3273,23 @@ int i915_gpu_idle(struct drm_device *dev)
 	/* Flush everything onto the inactive list. */
 	for_each_ring(ring, dev_priv, i) {
 		if (!i915.enable_execlists) {
-			ret = i915_switch_context(ring, ring->default_context);
+			struct drm_i915_gem_request *req;
+
+			ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
 			if (ret)
 				return ret;
-		}
 
-		/* Make sure the context switch (if one actually happened)
-		 * gets wrapped up and finished rather than hanging around
-		 * and confusing things later. */
-		if (ring->outstanding_lazy_request) {
-			ret = i915_add_request(ring);
-			if (ret)
+			ret = i915_switch_context(req, ring->default_context);
+			if (ret) {
+				i915_gem_request_unreference(req);
 				return ret;
+			}
+
+			ret = i915_add_request_no_flush(req);
+			if (ret) {
+				i915_gem_request_unreference(req);
+				return ret;
+			}
 		}
 
 		ret = intel_ring_idle(ring);
@@ -4099,8 +4111,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	bool was_pin_display;
 	int ret;
 
-	if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
-		ret = i915_gem_object_sync(obj, pipelined, true);
+	if (pipelined && (pipelined != i915_gem_request_get_ring(obj->last_read_req))) {
+		struct drm_i915_private *dev_priv = pipelined->dev->dev_private;
+		struct drm_i915_gem_request *req;
+
+		ret = dev_priv->gt.alloc_request(pipelined, pipelined->default_context, &req);
+		if (ret)
+			return ret;
+
+		ret = i915_gem_object_sync(obj, req);
+		if (ret)
+			return ret;
+
+		ret = i915_add_request_no_flush(req);
 		if (ret)
 			return ret;
 	}
@@ -4771,8 +4794,9 @@ err:
 	return ret;
 }
 
-int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
+int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
@@ -4782,7 +4806,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
 	if (!HAS_L3_DPF(dev) || !remap_info)
 		return 0;
 
-	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
+	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
 	if (ret)
 		return ret;
 
@@ -4962,37 +4986,67 @@ i915_gem_init_hw(struct drm_device *dev)
 	 */
 	init_unused_rings(dev);
 
+	BUG_ON(!dev_priv->ring[RCS].default_context);
+
+	ret = i915_ppgtt_init_hw(dev);
+	if (ret) {
+		DRM_ERROR("PPGTT enable failed %d\n", ret);
+		i915_gem_cleanup_ringbuffer(dev);
+		return ret;
+	}
+
 	for_each_ring(ring, dev_priv, i) {
+		struct drm_i915_gem_request *req;
+
 		ret = ring->init_hw(ring);
 		if (ret)
 			return ret;
-	}
 
-	for (i = 0; i < NUM_L3_SLICES(dev); i++)
-		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
+		if (!ring->default_context)
+			continue;
 
-	/*
-	 * XXX: Contexts should only be initialized once. Doing a switch to the
-	 * default context switch however is something we'd like to do after
-	 * reset or thaw (the latter may not actually be necessary for HW, but
-	 * goes with our code better). Context switching requires rings (for
-	 * the do_switch), but before enabling PPGTT. So don't move this.
-	 */
-	ret = i915_gem_context_enable(dev_priv);
-	if (ret && ret != -EIO) {
-		DRM_ERROR("Context enable failed %d\n", ret);
-		i915_gem_cleanup_ringbuffer(dev);
+		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
+		if (ret)
+			return ret;
 
-		return ret;
-	}
+		if (ring->id == RCS) {
+			for (i = 0; i < NUM_L3_SLICES(dev); i++)
+				i915_gem_l3_remap(req, i);
+		}
 
-	ret = i915_ppgtt_init_hw(dev);
-	if (ret && ret != -EIO) {
-		DRM_ERROR("PPGTT enable failed %d\n", ret);
-		i915_gem_cleanup_ringbuffer(dev);
+		/*
+		 * XXX: Contexts should only be initialized once. Doing a switch to the
+		 * default context switch however is something we'd like to do after
+		 * reset or thaw (the latter may not actually be necessary for HW, but
+		 * goes with our code better). Context switching requires rings (for
+		 * the do_switch), but before enabling PPGTT. So don't move this.
+		 */
+		ret = i915_gem_context_enable(req);
+		if (ret && ret != -EIO) {
+			DRM_ERROR("Context enable failed %d\n", ret);
+			i915_gem_request_unreference(req);
+			i915_gem_cleanup_ringbuffer(dev);
+
+			return ret;
+		}
+
+		ret = i915_ppgtt_init_ring(req);
+		if (ret && ret != -EIO) {
+			DRM_ERROR("PPGTT enable failed %d\n", ret);
+			i915_gem_request_unreference(req);
+			i915_gem_cleanup_ringbuffer(dev);
+		}
+
+		ret = i915_add_request_no_flush(req);
+		if (ret) {
+			DRM_ERROR("Add request failed: %d\n", ret);
+			i915_gem_request_unreference(req);
+			i915_gem_cleanup_ringbuffer(dev);
+			return ret;
+		}
 	}
 
-	return ret;
+	return 0;
 }
 
 int i915_gem_init(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index c5e1bfc..72e280b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -401,41 +401,23 @@ void i915_gem_context_fini(struct drm_device *dev)
 	i915_gem_context_unreference(dctx);
 }
 
-int i915_gem_context_enable(struct drm_i915_private *dev_priv)
+int i915_gem_context_enable(struct drm_i915_gem_request *req)
 {
-	struct intel_engine_cs *ring;
-	int ret, i;
-
-	BUG_ON(!dev_priv->ring[RCS].default_context);
+	struct intel_engine_cs *ring = req->ring;
+	int ret;
 
 	if (i915.enable_execlists) {
-		for_each_ring(ring, dev_priv, i) {
-			if (ring->init_context) {
-				ret = ring->init_context(ring,
-						ring->default_context);
-				if (ret) {
-					DRM_ERROR("ring init context: %d\n",
-							ret);
-					return ret;
-				}
-			}
-		}
+		if (ring->init_context == NULL)
+			return 0;
 
+		ret = ring->init_context(req, ring->default_context);
 	} else
-		for_each_ring(ring, dev_priv, i) {
-			ret = i915_switch_context(ring, ring->default_context);
-			if (ret)
-				return ret;
-
-			/* Make sure the context switch (if one actually happened)
-			 * gets wrapped up and finished rather than hanging around
-			 * and confusing things later. */
-			if (ring->outstanding_lazy_request) {
-				ret = i915_add_request_no_flush(ring);
-				if (ret)
-					return ret;
-			}
-		}
+		ret = i915_switch_context(req, ring->default_context);
+
+	if (ret) {
+		DRM_ERROR("ring init context: %d\n", ret);
+		return ret;
+	}
 
 	return 0;
 }
@@ -488,10 +470,11 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
 }
 
 static inline int
-mi_set_context(struct intel_engine_cs *ring,
+mi_set_context(struct drm_i915_gem_request *req,
 	       struct intel_context *new_context,
 	       u32 hw_flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	u32 flags = hw_flags | MI_MM_SPACE_GTT;
 	int ret;
 
@@ -501,7 +484,7 @@ mi_set_context(struct intel_engine_cs *ring,
 	 * itlb_before_ctx_switch.
 	 */
 	if (IS_GEN6(ring->dev)) {
-		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
+		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
 		if (ret)
 			return ret;
 	}
@@ -510,7 +493,7 @@ mi_set_context(struct intel_engine_cs *ring,
 	if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
 		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -540,9 +523,10 @@ mi_set_context(struct intel_engine_cs *ring,
 	return ret;
 }
 
-static int do_switch(struct intel_engine_cs *ring,
+static int do_switch(struct drm_i915_gem_request *req,
 		     struct intel_context *to)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_context *from = ring->last_context;
 	u32 hw_flags = 0;
@@ -577,7 +561,7 @@ static int do_switch(struct intel_engine_cs *ring,
 
 	if (to->ppgtt) {
 		trace_switch_mm(ring, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, ring);
+		ret = to->ppgtt->switch_mm(to->ppgtt, req);
 		if (ret)
 			goto unpin_out;
 	}
@@ -608,7 +592,7 @@ static int do_switch(struct intel_engine_cs *ring,
 	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
 
-	ret = mi_set_context(ring, to, hw_flags);
+	ret = mi_set_context(req, to, hw_flags);
 	if (ret)
 		goto unpin_out;
 
@@ -616,7 +600,7 @@ static int do_switch(struct intel_engine_cs *ring,
 		if (!(to->remap_slice & (1<<i)))
 			continue;
 
-		ret = i915_gem_l3_remap(ring, i);
+		ret = i915_gem_l3_remap(req, i);
 		/* If it failed, try again next round */
 		if (ret)
 			DRM_DEBUG_DRIVER("L3 remapping failed\n");
@@ -632,7 +616,7 @@ static int do_switch(struct intel_engine_cs *ring,
 	 */
 	if (from != NULL) {
 		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -658,12 +642,12 @@ done:
 
 	if (uninitialized) {
 		if (ring->init_context) {
-			ret = ring->init_context(ring, to);
+			ret = ring->init_context(req, to);
 			if (ret)
 				DRM_ERROR("ring init context: %d\n", ret);
 		}
 
-		ret = i915_gem_render_state_init(ring);
+		ret = i915_gem_render_state_init(req);
 		if (ret)
 			DRM_ERROR("init render state: %d\n", ret);
 	}
@@ -690,9 +674,10 @@ unpin_out:
  * switched by writing to the ELSP and requests keep a reference to their
  * context.
  */
-int i915_switch_context(struct intel_engine_cs *ring,
+int i915_switch_context(struct drm_i915_gem_request *req,
 			struct intel_context *to)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
 	WARN_ON(i915.enable_execlists);
@@ -708,7 +693,7 @@ int i915_switch_context(struct intel_engine_cs *ring,
 		return 0;
 	}
 
-	return do_switch(ring, to);
+	return do_switch(req, to);
 }
 
 static bool contexts_enabled(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ca31673..5caa2a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -822,7 +822,7 @@ err:
 }
 
 static int
-i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
+i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				struct list_head *vmas)
 {
 	struct i915_vma *vma;
@@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring, false);
+		ret = i915_gem_object_sync(obj, req);
 		if (ret)
 			return ret;
 
@@ -843,7 +843,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 	}
 
 	if (flush_chipset)
-		i915_gem_chipset_flush(ring->dev);
+		i915_gem_chipset_flush(req->ring->dev);
 
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
 		wmb();
@@ -941,9 +941,9 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
 
 void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
-				   struct intel_engine_cs *ring)
+				   struct drm_i915_gem_request *req)
 {
-	struct drm_i915_gem_request *req = intel_ring_get_request(ring);
+	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, vmas, exec_list) {
@@ -957,7 +957,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 			obj->base.pending_read_domains |= obj->base.read_domains;
 		obj->base.read_domains = obj->base.pending_read_domains;
 
-		i915_vma_move_to_active(vma, ring);
+		i915_vma_move_to_active(vma, req);
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
 			i915_gem_request_assign(&obj->last_write_req, req);
@@ -983,20 +983,21 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 void
 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct drm_file *file,
-				    struct intel_engine_cs *ring,
+				    struct drm_i915_gem_request *req,
 				    struct drm_i915_gem_object *obj)
 {
 	/* Unconditionally force add_request to emit a full flush. */
-	ring->gpu_caches_dirty = true;
+	req->ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, true);
+	(void)__i915_add_request(req, file, obj, true);
 }
 
 static int
 i915_reset_gen7_sol_offsets(struct drm_device *dev,
-			    struct intel_engine_cs *ring)
+			    struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret, i;
 
@@ -1005,7 +1006,7 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	ret = intel_ring_begin(ring, 4 * 3);
+	ret = intel_ring_begin(req, 4 * 3);
 	if (ret)
 		return ret;
 
@@ -1021,10 +1022,11 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 }
 
 static int
-i915_emit_box(struct intel_engine_cs *ring,
+i915_emit_box(struct drm_i915_gem_request *req,
 	      struct drm_clip_rect *box,
 	      int DR1, int DR4)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
@@ -1035,7 +1037,7 @@ i915_emit_box(struct intel_engine_cs *ring,
 	}
 
 	if (INTEL_INFO(ring->dev)->gen >= 4) {
-		ret = intel_ring_begin(ring, 4);
+		ret = intel_ring_begin(req, 4);
 		if (ret)
 			return ret;
 
@@ -1044,7 +1046,7 @@ i915_emit_box(struct intel_engine_cs *ring,
 		intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
 		intel_ring_emit(ring, DR4);
 	} else {
-		ret = intel_ring_begin(ring, 6);
+		ret = intel_ring_begin(req, 6);
 		if (ret)
 			return ret;
 
@@ -1151,11 +1153,11 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
 		goto error;
 	}
 
-	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
+	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
 	if (ret)
 		goto error;
 
-	i915_gem_execbuffer_move_to_active(vmas, ring);
+	i915_gem_execbuffer_move_to_active(vmas, params->request);
 
 	/* Make sure the OLR hasn't advanced (which would indicate a flush
 	 * of the work in progress which in turn would be a Bad Thing). */
@@ -1200,18 +1202,18 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
-	ret = intel_ring_invalidate_all_caches(ring);
+	ret = intel_ring_invalidate_all_caches(params->request);
 	if (ret)
 		goto error;
 
 	/* Switch to the correct context for the batch */
-	ret = i915_switch_context(ring, params->ctx);
+	ret = i915_switch_context(params->request, params->ctx);
 	if (ret)
 		goto error;
 
 	if (ring == &dev_priv->ring[RCS] &&
 			params->instp_mode != dev_priv->relative_constants_mode) {
-		ret = intel_ring_begin(ring, 4);
+		ret = intel_ring_begin(params->request, 4);
 		if (ret)
 			goto error;
 
@@ -1225,7 +1227,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
 	}
 
 	if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) {
-		ret = i915_reset_gen7_sol_offsets(params->dev, ring);
+		ret = i915_reset_gen7_sol_offsets(params->dev, params->request);
 		if (ret)
 			goto error;
 	}
@@ -1236,29 +1238,31 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
 
 	if (params->cliprects) {
 		for (i = 0; i < params->args_num_cliprects; i++) {
-			ret = i915_emit_box(ring, &params->cliprects[i],
-					    params->args_DR1, params->args_DR4);
+			ret = i915_emit_box(params->request,
+					    &params->cliprects[i],
+					    params->args_DR1,
+					    params->args_DR4);
 			if (ret)
 				goto error;
 
-			ret = ring->dispatch_execbuffer(ring,
+			ret = ring->dispatch_execbuffer(params->request,
 							exec_start, exec_len,
 							params->dispatch_flags);
 			if (ret)
 				goto error;
 		}
 	} else {
-		ret = ring->dispatch_execbuffer(ring,
+		ret = ring->dispatch_execbuffer(params->request,
 						exec_start, exec_len,
 						params->dispatch_flags);
 		if (ret)
 			goto error;
 	}
 
-	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags);
+	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
 
-	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring,
-					    params->batch_obj);
+	i915_gem_execbuffer_retire_commands(params->dev, params->file,
+					    params->request, params->batch_obj);
 
 error:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
@@ -1490,10 +1494,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
 
 	/* Allocate a request for this batch buffer nice and early. */
-	ret = dev_priv->gt.alloc_request(ring, ctx);
+	ret = dev_priv->gt.alloc_request(ring, ctx, &params->request);
 	if (ret)
 		goto err;
-	params->request = ring->outstanding_lazy_request;
+	WARN_ON(params->request != ring->outstanding_lazy_request);
 
 	/* Save assorted stuff away to pass through to *_submission_final() */
 	params->dev                     = dev;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7eead93..776776e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -213,14 +213,15 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
-static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
-			   uint64_t val)
+static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry,
+			  uint64_t val)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	BUG_ON(entry >= 4);
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -236,7 +237,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
 }
 
 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring)
+			  struct drm_i915_gem_request *req)
 {
 	int i, ret;
 
@@ -245,7 +246,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
 	for (i = used_pd - 1; i >= 0; i--) {
 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
-		ret = gen8_write_pdp(ring, i, addr);
+		ret = gen8_write_pdp(req, i, addr);
 		if (ret)
 			return ret;
 	}
@@ -710,16 +711,17 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 }
 
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_engine_cs *ring)
+			 struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -735,16 +737,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 }
 
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring)
+			  struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -758,7 +761,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
 	if (ring->id != RCS) {
-		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 		if (ret)
 			return ret;
 	}
@@ -767,8 +770,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 }
 
 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring)
+			  struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -1125,11 +1129,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 
 int i915_ppgtt_init_hw(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	int i, ret = 0;
-
 	/* In the case of execlists, PPGTT is enabled by the context descriptor
 	 * and the PDPs are contained within the context itself.  We don't
 	 * need to do anything here. */
@@ -1148,25 +1147,20 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
 	else
 		WARN_ON(1);
 
-	if (ppgtt) {
-		for_each_ring(ring, dev_priv, i) {
-			ret = ppgtt->switch_mm(ppgtt, ring);
-			if (ret != 0)
-				return ret;
-
-			/* Make sure the context switch (if one actually happened)
-			 * gets wrapped up and finished rather than hanging around
-			 * and confusing things later. */
-			if (ring->outstanding_lazy_request) {
-				ret = i915_add_request_no_flush(ring);
-				if (ret)
-					return ret;
-			}
-		}
-	}
+	return 0;
+}
 
-	return ret;
+int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
+{
+	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+
+	if (!ppgtt)
+		return 0;
+
+	return ppgtt->switch_mm(ppgtt, req);
 }
+
 struct i915_hw_ppgtt *
 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index dd849df..bee3e2a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -267,7 +267,7 @@ struct i915_hw_ppgtt {
 
 	int (*enable)(struct i915_hw_ppgtt *ppgtt);
 	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_engine_cs *ring);
+			 struct drm_i915_gem_request *req);
 	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
 };
 
@@ -278,6 +278,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev);
 
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
 int i915_ppgtt_init_hw(struct drm_device *dev);
+int i915_ppgtt_init_ring(struct drm_i915_gem_request *req);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
 					struct drm_i915_file_private *fpriv);
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index aba39c3..0e0c23fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -152,8 +152,9 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
 	return 0;
 }
 
-int i915_gem_render_state_init(struct intel_engine_cs *ring)
+int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
 	struct render_state so;
 	int ret;
 
@@ -164,16 +165,17 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
 	if (so.rodata == NULL)
 		return 0;
 
-	ret = ring->dispatch_execbuffer(ring,
+	ret = ring->dispatch_execbuffer(req,
 					so.ggtt_offset,
 					so.rodata->batch_items * 4,
 					I915_DISPATCH_SECURE);
 	if (ret)
 		goto out;
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
-	ret = __i915_add_request(ring, NULL, so.obj, true);
+//	ret = __i915_add_request(req, NULL, so.obj, true);
+	req->batch_obj = so.obj;
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	i915_gem_render_state_fini(&so);
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index c44961e..7aa7372 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -39,7 +39,7 @@ struct render_state {
 	int gen;
 };
 
-int i915_gem_render_state_init(struct intel_engine_cs *ring);
+int i915_gem_render_state_init(struct drm_i915_gem_request *req);
 void i915_gem_render_state_fini(struct render_state *so);
 int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
 				  struct render_state *so);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f0cf421..c0b0e37 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9089,14 +9089,15 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
+				 struct drm_i915_gem_request *req,
 				 uint32_t flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	u32 flip_mask;
 	int ret;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -9116,7 +9117,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
 
 	intel_mark_page_flip_active(intel_crtc);
-	i915_add_request_no_flush(ring);
+	i915_add_request_no_flush(req);
 	return 0;
 }
 
@@ -9124,14 +9125,15 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
+				 struct drm_i915_gem_request *req,
 				 uint32_t flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	u32 flip_mask;
 	int ret;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -9148,7 +9150,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, MI_NOOP);
 
 	intel_mark_page_flip_active(intel_crtc);
-	i915_add_request_no_flush(ring);
+	i915_add_request_no_flush(req);
 	return 0;
 }
 
@@ -9156,15 +9158,16 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
+				 struct drm_i915_gem_request *req,
 				 uint32_t flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t pf, pipesrc;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -9187,7 +9190,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, pf | pipesrc);
 
 	intel_mark_page_flip_active(intel_crtc);
-	i915_add_request_no_flush(ring);
+	i915_add_request_no_flush(req);
 	return 0;
 }
 
@@ -9195,15 +9198,16 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
+				 struct drm_i915_gem_request *req,
 				 uint32_t flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t pf, pipesrc;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -9223,7 +9227,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, pf | pipesrc);
 
 	intel_mark_page_flip_active(intel_crtc);
-	i915_add_request_no_flush(ring);
+	i915_add_request_no_flush(req);
 	return 0;
 }
 
@@ -9231,9 +9235,10 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
+				 struct drm_i915_gem_request *req,
 				 uint32_t flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t plane_bit = 0;
 	int len, ret;
@@ -9275,11 +9280,11 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	 * then do the cacheline alignment, and finally emit the
 	 * MI_DISPLAY_FLIP.
 	 */
-	ret = intel_ring_cacheline_align(ring);
+	ret = intel_ring_cacheline_align(req);
 	if (ret)
 		return ret;
 
-	ret = intel_ring_begin(ring, len);
+	ret = intel_ring_begin(req, len);
 	if (ret)
 		return ret;
 
@@ -9318,7 +9323,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, (MI_NOOP));
 
 	intel_mark_page_flip_active(intel_crtc);
-	i915_add_request_no_flush(ring);
+	i915_add_request_no_flush(req);
 	return 0;
 }
 
@@ -9474,9 +9479,10 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
+				 struct drm_i915_gem_request *req,
 				 uint32_t flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t plane = 0, stride;
 	int ret;
@@ -9508,7 +9514,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
 		return -ENODEV;
 	}
 
-	ret = intel_ring_begin(ring, 10);
+	ret = intel_ring_begin(req, 10);
 	if (ret)
 		return ret;
 
@@ -9528,7 +9534,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
 
 	intel_mark_page_flip_active(intel_crtc);
-	i915_add_request_no_flush(ring);
+	i915_add_request_no_flush(req);
 
 	return 0;
 }
@@ -9537,7 +9543,7 @@ static int intel_default_queue_flip(struct drm_device *dev,
 				    struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
 				    struct drm_i915_gem_object *obj,
-				    struct intel_engine_cs *ring,
+				    struct drm_i915_gem_request *req,
 				    uint32_t flags)
 {
 	return -ENODEV;
@@ -9729,22 +9735,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		i915_gem_request_assign(&work->flip_queued_req,
 					obj->last_write_req);
 	} else {
-		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
+		struct drm_i915_gem_request *req;
+
+		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
+		if (ret)
+			return ret;
+
+		i915_gem_request_assign(&work->flip_queued_req, req);
+
+		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, req,
 						   page_flip_flags);
 		if (ret)
 			goto cleanup_unpin;
-
-		/* Borked: need to get the seqno for the request submitted in
-		 * 'queue_flip()' above. However, either the request has been
-		 * posted already and the seqno is gone (q_f calls add_request),
-		 * or the request never gets posted and is merged into whatever
-		 * render comes along next (q_f calls ring_advance).
-		 *
-		 * On the other hand, seqnos are going away soon anyway! So
-		 * hopefully the problem will disappear...
-		 */
-		i915_gem_request_assign(&work->flip_queued_req,
-					ring->outstanding_lazy_request ? intel_ring_get_request(ring) : NULL);
 	}
 
 	work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 80cb87e..5077a77 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -203,6 +203,10 @@ enum {
 };
 #define GEN8_CTX_ID_SHIFT 32
 
+static int intel_logical_ring_begin(struct drm_i915_gem_request *req,
+				    int num_dwords);
+static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req,
+					      struct intel_context *ctx);
 static int intel_lr_context_pin(struct intel_engine_cs *ring,
 		struct intel_context *ctx);
 
@@ -587,9 +591,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 	return 0;
 }
 
-static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
+static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
 {
-	struct intel_engine_cs *ring = ringbuf->ring;
+	struct intel_engine_cs *ring = req->ring;
 	uint32_t flush_domains;
 	int ret;
 
@@ -597,7 +601,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
 	if (ring->gpu_caches_dirty)
 		flush_domains = I915_GEM_GPU_DOMAINS;
 
-	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
+	ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
 	if (ret)
 		return ret;
 
@@ -605,10 +609,9 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
 	return 0;
 }
 
-static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
+static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 				 struct list_head *vmas)
 {
-	struct intel_engine_cs *ring = ringbuf->ring;
 	struct i915_vma *vma;
 	uint32_t flush_domains = 0;
 	bool flush_chipset = false;
@@ -617,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		ret = i915_gem_object_sync(obj, ring, true);
+		ret = i915_gem_object_sync(obj, req);
 		if (ret)
 			return ret;
 
@@ -657,7 +660,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
 	struct drm_device       *dev = params->dev;
 	struct intel_engine_cs  *ring = params->ring;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
 	int ret;
 
 	params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
@@ -706,11 +708,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
 		return -EINVAL;
 	}
 
-	ret = execlists_move_to_gpu(ringbuf, vmas);
+	ret = execlists_move_to_gpu(params->request, vmas);
 	if (ret)
 		return ret;
 
-	i915_gem_execbuffer_move_to_active(vmas, ring);
+	i915_gem_execbuffer_move_to_active(vmas, params->request);
 
 	ret = dev_priv->gt.do_execfinal(params);
 	if (ret)
@@ -742,13 +744,13 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
-	ret = logical_ring_invalidate_all_caches(ringbuf);
+	ret = logical_ring_invalidate_all_caches(params->request);
 	if (ret)
 		return ret;
 
 	if (ring == &dev_priv->ring[RCS] &&
 	    params->instp_mode != dev_priv->relative_constants_mode) {
-		ret = intel_logical_ring_begin(ringbuf, 4);
+		ret = intel_logical_ring_begin(params->request, 4);
 		if (ret)
 			return ret;
 
@@ -764,13 +766,14 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
 	exec_start = params->batch_obj_vm_offset +
 		     params->args_batch_start_offset;
 
-	ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags);
+	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
 	if (ret)
 		return ret;
 
-	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags);
+	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
 
-	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, params->batch_obj);
+	i915_gem_execbuffer_retire_commands(params->dev, params->file,
+					    params->request, params->batch_obj);
 
 	return 0;
 }
@@ -827,15 +830,15 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
 	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
-int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
+int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
 {
-	struct intel_engine_cs *ring = ringbuf->ring;
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	if (!ring->gpu_caches_dirty)
 		return 0;
 
-	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
+	ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
@@ -910,13 +913,17 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring,
 }
 
 int intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
-				     struct intel_context *ctx)
+				     struct intel_context *ctx,
+				     struct drm_i915_gem_request **req_out)
 {
 	struct drm_i915_gem_request *request;
 	struct drm_i915_private *dev_private = ring->dev->dev_private;
 	int ret;
 
-	if (ring->outstanding_lazy_request)
+	if (!req_out)
+		return -EINVAL;
+
+	if ((*req_out = ring->outstanding_lazy_request) != NULL)
 		return 0;
 
 	request = kzalloc(sizeof(*request), GFP_KERNEL);
@@ -953,7 +960,7 @@ int intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
 	i915_gem_context_reference(request->ctx);
 	request->ringbuf = ctx->engine[ring->id].ringbuf;
 
-	ring->outstanding_lazy_request = request;
+	*req_out = ring->outstanding_lazy_request = request;
 	return 0;
 }
 
@@ -1090,7 +1097,7 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
 /**
  * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
  *
- * @ringbuf: Logical ringbuffer.
+ * @request: The request to start some new work for
  * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
  *
  * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
@@ -1100,8 +1107,9 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
  *
  * Return: non-zero if the ringbuffer is not ready to be written to.
  */
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
+static int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1116,38 +1124,28 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
 	if (ret)
 		return ret;
 
-	if(!ring->outstanding_lazy_request) {
-		printk(KERN_INFO "%s:%d> \x1B[31;1mring->outstanding_lazy_request = 0x%p\x1B[0m\n", __func__, __LINE__, ring->outstanding_lazy_request);
-		dump_stack();
-	}
-
-	/* Preallocate the olr before touching the ring */
-	ret = intel_logical_ring_alloc_request(ring, ringbuf->FIXME_lrc_ctx);
-	if (ret)
-		return ret;
-
 	ringbuf->space -= num_dwords * sizeof(uint32_t);
 	return 0;
 }
 
-static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
+static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req,
 					       struct intel_context *ctx)
 {
 	int ret, i;
-	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
-	struct drm_device *dev = ring->dev;
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
+	struct drm_device *dev = req->ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_workarounds *w = &dev_priv->workarounds;
 
 	if (WARN_ON(w->count == 0))
 		return 0;
 
-	ring->gpu_caches_dirty = true;
-	ret = logical_ring_flush_all_caches(ringbuf);
+	req->ring->gpu_caches_dirty = true;
+	ret = logical_ring_flush_all_caches(req);
 	if (ret)
 		return ret;
 
-	ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
+	ret = intel_logical_ring_begin(req, w->count * 2 + 2);
 	if (ret)
 		return ret;
 
@@ -1160,8 +1158,8 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
 
 	intel_logical_ring_advance(ringbuf);
 
-	ring->gpu_caches_dirty = true;
-	ret = logical_ring_flush_all_caches(ringbuf);
+	req->ring->gpu_caches_dirty = true;
+	ret = logical_ring_flush_all_caches(req);
 	if (ret)
 		return ret;
 
@@ -1210,13 +1208,14 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring)
 	return init_workarounds_ring(ring);
 }
 
-static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
+static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
 			      u64 offset, unsigned flags)
 {
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
 	int ret;
 
-	ret = intel_logical_ring_begin(ringbuf, 4);
+	ret = intel_logical_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -1263,17 +1262,18 @@ static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
-static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
+static int gen8_emit_flush(struct drm_i915_gem_request *req,
 			   u32 invalidate_domains,
 			   u32 unused)
 {
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t cmd;
 	int ret;
 
-	ret = intel_logical_ring_begin(ringbuf, 4);
+	ret = intel_logical_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -1301,10 +1301,11 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
 	return 0;
 }
 
-static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
+static int gen8_emit_flush_render(struct drm_i915_gem_request *req,
 				  u32 invalidate_domains,
 				  u32 flush_domains)
 {
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	struct intel_engine_cs *ring = ringbuf->ring;
 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	u32 flags = 0;
@@ -1328,7 +1329,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 	}
 
-	ret = intel_logical_ring_begin(ringbuf, 6);
+	ret = intel_logical_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -1353,13 +1354,14 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
 }
 
-static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
+static int gen8_emit_request(struct drm_i915_gem_request *req)
 {
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	struct intel_engine_cs *ring = ringbuf->ring;
 	u32 cmd;
 	int ret;
 
-	ret = intel_logical_ring_begin(ringbuf, 6);
+	ret = intel_logical_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -1371,8 +1373,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
 				(ring->status_page.gfx_addr +
 				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
 	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf,
-		i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(req));
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	intel_logical_ring_advance_and_submit(ringbuf);
@@ -1380,16 +1381,20 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
 	return 0;
 }
 
-static int gen8_init_rcs_context(struct intel_engine_cs *ring,
-		       struct intel_context *ctx)
+static int gen8_init_rcs_context(struct drm_i915_gem_request *req,
+				 struct intel_context *ctx)
 {
 	int ret;
 
-	ret = intel_logical_ring_workarounds_emit(ring, ctx);
+	ret = intel_logical_ring_workarounds_emit(req, ctx);
 	if (ret)
 		return ret;
 
-	return intel_lr_context_render_state_init(ring, ctx);
+	ret = intel_lr_context_render_state_init(req, ctx);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 /**
@@ -1409,6 +1414,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 
 	intel_logical_ring_stop(ring);
 	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
+	WARN_ON(ring->outstanding_lazy_request);
 	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
 
 	if (ring->cleanup)
@@ -1648,10 +1654,10 @@ cleanup_render_ring:
 	return ret;
 }
 
-int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
-				       struct intel_context *ctx)
+static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req,
+					      struct intel_context *ctx)
 {
-	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
 	struct render_state so;
 	struct drm_i915_file_private *file_priv = ctx->file_priv;
 	struct drm_file *file = file_priv ? file_priv->file : NULL;
@@ -1664,15 +1670,13 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
 	if (so.rodata == NULL)
 		return 0;
 
-	ret = ring->emit_bb_start(ringbuf,
-			so.ggtt_offset,
-			I915_DISPATCH_SECURE);
+	ret = ring->emit_bb_start(req, so.ggtt_offset, I915_DISPATCH_SECURE);
 	if (ret)
 		goto out;
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
-	ret = __i915_add_request(ring, file, so.obj, true);
+	ret = __i915_add_request(req, file, so.obj, true);
 	/* intel_logical_ring_add_request moves object to inactive if it
 	 * fails */
 out:
@@ -1883,6 +1887,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring)
 {
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	const bool is_global_default_ctx = (ctx == ring->default_context);
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_gem_object *ctx_obj;
@@ -1964,13 +1969,27 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 		lrc_setup_hardware_status_page(ring, ctx_obj);
 	else if (ring->id == RCS && !ctx->rcs_initialized) {
 		if (ring->init_context) {
-			ret = ring->init_context(ring, ctx);
+			struct drm_i915_gem_request *req;
+
+			ret = dev_priv->gt.alloc_request(ring, ctx, &req);
+			if (ret)
+				return ret;
+
+			ret = ring->init_context(req, ctx);
 			if (ret) {
 				DRM_ERROR("ring init context: %d\n", ret);
+				i915_gem_request_unreference(req);
 				ctx->engine[ring->id].ringbuf = NULL;
 				ctx->engine[ring->id].state = NULL;
 				goto error;
 			}
+
+			ret = i915_add_request_no_flush(req);
+			if (ret) {
+				DRM_ERROR("ring init context: %d\n", ret);
+				i915_gem_request_unreference(req);
+				goto error;
+			}
 		}
 
 		ctx->rcs_initialized = true;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index ea083d9..a2981ba 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -35,12 +35,13 @@
 
 /* Logical Rings */
 int __must_check intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
-						  struct intel_context *ctx);
+						  struct intel_context *ctx,
+						  struct drm_i915_gem_request **req_out);
 void intel_logical_ring_stop(struct intel_engine_cs *ring);
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_init(struct drm_device *dev);
 
-int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
+int logical_ring_flush_all_caches(struct drm_i915_gem_request *req);
 void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
 /**
  * intel_logical_ring_advance() - advance the ringbuffer tail
@@ -63,11 +64,8 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
 	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
 
 /* Logical Ring Contexts */
-int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
-				       struct intel_context *ctx);
 void intel_lr_context_free(struct intel_context *ctx);
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 973c9de..2d2ce59 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -209,17 +209,15 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
 }
 
 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
+					 struct drm_i915_gem_request *req,
 					 void (*tail)(struct intel_overlay *))
 {
 	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
 	BUG_ON(overlay->last_flip_req);
-	i915_gem_request_assign(&overlay->last_flip_req,
-					     ring->outstanding_lazy_request);
-	ret = i915_add_request(ring);
+	i915_gem_request_assign(&overlay->last_flip_req, req);
+	ret = i915_add_request(overlay->last_flip_req);
 	if (ret)
 		return ret;
 
@@ -239,6 +237,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+	struct drm_i915_gem_request *req;
 	int ret;
 
 	BUG_ON(overlay->active);
@@ -246,17 +245,21 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 
 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
 
-	ret = intel_ring_begin(ring, 4);
+	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
-	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
-	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	ret = intel_ring_begin(req, 4);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(req->ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
+	intel_ring_emit(req->ring, overlay->flip_addr | OFC_UPDATE);
+	intel_ring_emit(req->ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+	intel_ring_emit(req->ring, MI_NOOP);
+	intel_ring_advance(req->ring);
 
-	return intel_overlay_do_wait_request(overlay, NULL);
+	return intel_overlay_do_wait_request(overlay, req, NULL);
 }
 
 /* overlay needs to be enabled in OCMD reg */
@@ -266,6 +269,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+	struct drm_i915_gem_request *req;
 	u32 flip_addr = overlay->flip_addr;
 	u32 tmp;
 	int ret;
@@ -280,7 +284,11 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
 
-	ret = intel_ring_begin(ring, 2);
+	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
+	if (ret)
+		return ret;
+
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -289,9 +297,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	intel_ring_advance(ring);
 
 	WARN_ON(overlay->last_flip_req);
-	i915_gem_request_assign(&overlay->last_flip_req,
-					     ring->outstanding_lazy_request);
-	return i915_add_request(ring);
+	i915_gem_request_assign(&overlay->last_flip_req, req);
+	return i915_add_request(req);
 }
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
@@ -326,6 +333,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+	struct drm_i915_gem_request *req;
 	u32 flip_addr = overlay->flip_addr;
 	int ret;
 
@@ -337,7 +345,11 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	 * of the hw. Do it in both cases */
 	flip_addr |= OFC_UPDATE;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
+	if (ret)
+		return ret;
+
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -359,7 +371,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	}
 	intel_ring_advance(ring);
 
-	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
+	return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail);
 }
 
 /* recover from an interruption due to a signal
@@ -404,7 +416,13 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 
 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
 		/* synchronous slowpath */
-		ret = intel_ring_begin(ring, 2);
+		struct drm_i915_gem_request *req;
+
+		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
+		if (ret)
+			return ret;
+
+		ret = intel_ring_begin(req, 2);
 		if (ret)
 			return ret;
 
@@ -412,7 +430,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_advance(ring);
 
-		ret = intel_overlay_do_wait_request(overlay,
+		ret = intel_overlay_do_wait_request(overlay, req,
 						    intel_overlay_release_old_vid_tail);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 78911e2..5905fa5 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5506,6 +5506,7 @@ static void ironlake_enable_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+	struct drm_i915_gem_request *req = NULL;
 	bool was_interruptible;
 	int ret;
 
@@ -5524,16 +5525,17 @@ static void ironlake_enable_rc6(struct drm_device *dev)
 	was_interruptible = dev_priv->mm.interruptible;
 	dev_priv->mm.interruptible = false;
 
+	ret = dev_priv->gt.alloc_request(ring, NULL, &req);
+	if (ret)
+		goto err;
+
 	/*
 	 * GPU can automatically power down the render unit if given a page
 	 * to save state.
 	 */
-	ret = intel_ring_begin(ring, 6);
-	if (ret) {
-		ironlake_teardown_rc6(dev);
-		dev_priv->mm.interruptible = was_interruptible;
-		return;
-	}
+	ret = intel_ring_begin(req, 6);
+	if (ret)
+		goto err;
 
 	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
 	intel_ring_emit(ring, MI_SET_CONTEXT);
@@ -5547,6 +5549,11 @@ static void ironlake_enable_rc6(struct drm_device *dev)
 	intel_ring_emit(ring, MI_FLUSH);
 	intel_ring_advance(ring);
 
+	ret = i915_add_request_no_flush(req);
+	if (ret)
+		goto err;
+	req = NULL;
+
 	/*
 	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
 	 * does an implicit flush, combined with MI_FLUSH above, it should be
@@ -5554,16 +5561,20 @@ static void ironlake_enable_rc6(struct drm_device *dev)
 	 */
 	ret = intel_ring_idle(ring);
 	dev_priv->mm.interruptible = was_interruptible;
-	if (ret) {
-		DRM_ERROR("failed to enable ironlake power savings\n");
-		ironlake_teardown_rc6(dev);
-		return;
-	}
+	if (ret)
+		goto err;
 
 	I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
 
 	intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
+
+err:
+	DRM_ERROR("failed to enable ironlake power savings\n");
+	ironlake_teardown_rc6(dev);
+	dev_priv->mm.interruptible = was_interruptible;
+	if (req)
+		i915_gem_request_unreference(req);
 }
 
 static unsigned long intel_pxfreq(u32 vidfreq)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b60e59b..e6e7bb5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring)
 }
 
 static int
-gen2_render_ring_flush(struct intel_engine_cs *ring,
+gen2_render_ring_flush(struct drm_i915_gem_request *req,
 		       u32	invalidate_domains,
 		       u32	flush_domains)
 {
+	struct intel_engine_cs *ring = req->ring;
 	u32 cmd;
 	int ret;
 
@@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
 		cmd |= MI_READ_FLUSH;
 
-	ret = intel_ring_begin(ring, 2);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-gen4_render_ring_flush(struct intel_engine_cs *ring,
+gen4_render_ring_flush(struct drm_i915_gem_request *req,
 		       u32	invalidate_domains,
 		       u32	flush_domains)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_device *dev = ring->dev;
 	u32 cmd;
 	int ret;
@@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
 	    (IS_G4X(dev) || IS_GEN5(dev)))
 		cmd |= MI_INVALIDATE_ISP;
 
-	ret = intel_ring_begin(ring, 2);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -212,12 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
  * really our business.  That leaves only stall at scoreboard.
  */
 static int
-intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
+intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -230,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -246,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
 }
 
 static int
-gen6_render_ring_flush(struct intel_engine_cs *ring,
-                         u32 invalidate_domains, u32 flush_domains)
+gen6_render_ring_flush(struct drm_i915_gem_request *req,
+                       u32 invalidate_domains, u32 flush_domains)
 {
+	struct intel_engine_cs *ring = req->ring;
 	u32 flags = 0;
 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
 	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(ring);
+	ret = intel_emit_post_sync_nonzero_flush(req);
 	if (ret)
 		return ret;
 
@@ -284,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	}
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -298,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
+gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -316,14 +321,15 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
 	return 0;
 }
 
-static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
+static int gen7_ring_fbc_flush(struct drm_i915_gem_request *req, u32 value)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	if (!ring->fbc_dirty)
 		return 0;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 	/* WaFbcNukeOn3DBlt:ivb/hsw */
@@ -340,9 +346,10 @@ static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
 }
 
 static int
-gen7_render_ring_flush(struct intel_engine_cs *ring,
+gen7_render_ring_flush(struct drm_i915_gem_request *req,
 		       u32 invalidate_domains, u32 flush_domains)
 {
+	struct intel_engine_cs *ring = req->ring;
 	u32 flags = 0;
 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
@@ -381,10 +388,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
 		/* Workaround: we must issue a pipe_control with CS-stall bit
 		 * set before a pipe_control command that has the state cache
 		 * invalidate bit set. */
-		gen7_render_ring_cs_stall_wa(ring);
+		gen7_render_ring_cs_stall_wa(req);
 	}
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -395,18 +402,19 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
 	intel_ring_advance(ring);
 
 	if (!invalidate_domains && flush_domains)
-		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
+		return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
 
 	return 0;
 }
 
 static int
-gen8_emit_pipe_control(struct intel_engine_cs *ring,
+gen8_emit_pipe_control(struct drm_i915_gem_request *req,
 		       u32 flags, u32 scratch_addr)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -422,11 +430,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring,
 }
 
 static int
-gen8_render_ring_flush(struct intel_engine_cs *ring,
+gen8_render_ring_flush(struct drm_i915_gem_request *req,
 		       u32 invalidate_domains, u32 flush_domains)
 {
 	u32 flags = 0;
-	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
 	flags |= PIPE_CONTROL_CS_STALL;
@@ -446,7 +454,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
 		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
-		ret = gen8_emit_pipe_control(ring,
+		ret = gen8_emit_pipe_control(req,
 					     PIPE_CONTROL_CS_STALL |
 					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
 					     0);
@@ -454,12 +462,12 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
 			return ret;
 	}
 
-	ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
+	ret = gen8_emit_pipe_control(req, flags, scratch_addr);
 	if (ret)
 		return ret;
 
 	if (!invalidate_domains && flush_domains)
-		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
+		return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
 
 	return 0;
 }
@@ -670,9 +678,10 @@ err:
 	return ret;
 }
 
-static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
+static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req,
 				       struct intel_context *ctx)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret, i;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -682,11 +691,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
 		return 0;
 
 	ring->gpu_caches_dirty = true;
-	ret = intel_ring_flush_all_caches(ring);
+	ret = intel_ring_flush_all_caches(req);
 	if (ret)
 		return ret;
 
-	ret = intel_ring_begin(ring, (w->count * 2 + 2));
+	ret = intel_ring_begin(req, (w->count * 2 + 2));
 	if (ret)
 		return ret;
 
@@ -700,7 +709,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
 	intel_ring_advance(ring);
 
 	ring->gpu_caches_dirty = true;
-	ret = intel_ring_flush_all_caches(ring);
+	ret = intel_ring_flush_all_caches(req);
 	if (ret)
 		return ret;
 
@@ -898,10 +907,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring)
 	intel_fini_pipe_control(ring);
 }
 
-static int gen8_rcs_signal(struct intel_engine_cs *signaller,
+static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
 			   unsigned int num_dwords)
 {
 #define MBOX_UPDATE_DWORDS 8
+	struct intel_engine_cs *signaller = signaller_req->ring;
 	struct drm_device *dev = signaller->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *waiter;
@@ -911,7 +921,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
 #undef MBOX_UPDATE_DWORDS
 
-	ret = intel_ring_begin(signaller, num_dwords);
+	ret = intel_ring_begin(signaller_req, num_dwords);
 	if (ret)
 		return ret;
 
@@ -921,8 +931,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
 			continue;
 
-		seqno = i915_gem_request_get_seqno(
-					   signaller->outstanding_lazy_request);
+		seqno = i915_gem_request_get_seqno(signaller_req);
 		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
 		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
 					   PIPE_CONTROL_QW_WRITE |
@@ -939,10 +948,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 	return 0;
 }
 
-static int gen8_xcs_signal(struct intel_engine_cs *signaller,
+static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
 			   unsigned int num_dwords)
 {
 #define MBOX_UPDATE_DWORDS 6
+	struct intel_engine_cs *signaller = signaller_req->ring;
 	struct drm_device *dev = signaller->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *waiter;
@@ -952,7 +962,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
 #undef MBOX_UPDATE_DWORDS
 
-	ret = intel_ring_begin(signaller, num_dwords);
+	ret = intel_ring_begin(signaller_req, num_dwords);
 	if (ret)
 		return ret;
 
@@ -962,8 +972,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
 			continue;
 
-		seqno = i915_gem_request_get_seqno(
-					   signaller->outstanding_lazy_request);
+		seqno = i915_gem_request_get_seqno(signaller_req);
 		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
 					   MI_FLUSH_DW_OP_STOREDW);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
@@ -978,9 +987,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 	return 0;
 }
 
-static int gen6_signal(struct intel_engine_cs *signaller,
+static int gen6_signal(struct drm_i915_gem_request *signaller_req,
 		       unsigned int num_dwords)
 {
+	struct intel_engine_cs *signaller = signaller_req->ring;
 	struct drm_device *dev = signaller->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *useless;
@@ -991,15 +1001,14 @@ static int gen6_signal(struct intel_engine_cs *signaller,
 	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
 #undef MBOX_UPDATE_DWORDS
 
-	ret = intel_ring_begin(signaller, num_dwords);
+	ret = intel_ring_begin(signaller_req, num_dwords);
 	if (ret)
 		return ret;
 
 	for_each_ring(useless, dev_priv, i) {
 		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
 		if (mbox_reg != GEN6_NOSYNC) {
-			u32 seqno = i915_gem_request_get_seqno(
-					   signaller->outstanding_lazy_request);
+			u32 seqno = i915_gem_request_get_seqno(signaller_req);
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit(signaller, mbox_reg);
 			intel_ring_emit(signaller, seqno);
@@ -1016,29 +1025,28 @@ static int gen6_signal(struct intel_engine_cs *signaller,
 /**
  * gen6_add_request - Update the semaphore mailbox registers
  *
- * @ring - ring that is adding a request
- * @seqno - return seqno stuck into the ring
+ * @request - request to write to the ring
  *
  * Update the mailbox registers in the *other* rings with the current seqno.
  * This acts like a signal in the canonical semaphore.
  */
 static int
-gen6_add_request(struct intel_engine_cs *ring)
+gen6_add_request(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	if (ring->semaphore.signal)
-		ret = ring->semaphore.signal(ring, 4);
+		ret = ring->semaphore.signal(req, 4);
 	else
-		ret = intel_ring_begin(ring, 4);
+		ret = intel_ring_begin(req, 4);
 
 	if (ret)
 		return ret;
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring,
-		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -1061,14 +1069,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
  */
 
 static int
-gen8_ring_sync(struct intel_engine_cs *waiter,
+gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
 	       struct intel_engine_cs *signaller,
 	       u32 seqno)
 {
+	struct intel_engine_cs *waiter = waiter_req->ring;
 	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
 	int ret;
 
-	ret = intel_ring_begin(waiter, 4);
+	ret = intel_ring_begin(waiter_req, 4);
 	if (ret)
 		return ret;
 
@@ -1086,10 +1095,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter,
 }
 
 static int
-gen6_ring_sync(struct intel_engine_cs *waiter,
+gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
 	       struct intel_engine_cs *signaller,
 	       u32 seqno)
 {
+	struct intel_engine_cs *waiter = waiter_req->ring;
 	u32 dw1 = MI_SEMAPHORE_MBOX |
 		  MI_SEMAPHORE_COMPARE |
 		  MI_SEMAPHORE_REGISTER;
@@ -1104,7 +1114,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter,
 
 	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
 
-	ret = intel_ring_begin(waiter, 4);
+	ret = intel_ring_begin(waiter_req, 4);
 	if (ret)
 		return ret;
 
@@ -1135,8 +1145,9 @@ do {									\
 } while (0)
 
 static int
-pc_render_add_request(struct intel_engine_cs *ring)
+pc_render_add_request(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
@@ -1148,7 +1159,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
 	 * memory before requesting an interrupt.
 	 */
-	ret = intel_ring_begin(ring, 32);
+	ret = intel_ring_begin(req, 32);
 	if (ret)
 		return ret;
 
@@ -1156,8 +1167,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring,
-		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -1176,8 +1186,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring,
-		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
 	intel_ring_emit(ring, 0);
 	__intel_ring_advance(ring);
 
@@ -1390,13 +1399,14 @@ void intel_ring_setup_status_page(struct intel_engine_cs *ring)
 }
 
 static int
-bsd_ring_flush(struct intel_engine_cs *ring,
+bsd_ring_flush(struct drm_i915_gem_request *req,
 	       u32     invalidate_domains,
 	       u32     flush_domains)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 2);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -1407,18 +1417,18 @@ bsd_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-i9xx_add_request(struct intel_engine_cs *ring)
+i9xx_add_request(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring,
-		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -1550,13 +1560,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring)
 }
 
 static int
-i965_dispatch_execbuffer(struct intel_engine_cs *ring,
+i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
 			 u64 offset, u32 length,
 			 unsigned flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 2);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -1575,14 +1586,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
 #define I830_TLB_ENTRIES (2)
 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
 static int
-i830_dispatch_execbuffer(struct intel_engine_cs *ring,
-				u64 offset, u32 len,
-				unsigned flags)
+i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
+			 u64 offset, u32 len,
+			 unsigned flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	u32 cs_offset = ring->scratch.gtt_offset;
 	int ret;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(req, 6);
 	if (ret)
 		return ret;
 
@@ -1599,7 +1611,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
 		if (len > I830_BATCH_LIMIT)
 			return -ENOSPC;
 
-		ret = intel_ring_begin(ring, 6 + 2);
+		ret = intel_ring_begin(req, 6 + 2);
 		if (ret)
 			return ret;
 
@@ -1622,7 +1634,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
 		offset = cs_offset;
 	}
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -1636,13 +1648,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
 }
 
 static int
-i915_dispatch_execbuffer(struct intel_engine_cs *ring,
+i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
 			 u64 offset, u32 len,
 			 unsigned flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 2);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -1885,6 +1898,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 
 	intel_unpin_ringbuffer_obj(ringbuf);
 	intel_destroy_ringbuffer_obj(ringbuf);
+	WARN_ON(ring->outstanding_lazy_request);
 	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
 
 	if (ring->cleanup)
@@ -2007,8 +2021,9 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 	int ret;
 
 	/* We need to add any requests required to flush the objects and ring */
+	WARN_ON(ring->outstanding_lazy_request);
 	if (ring->outstanding_lazy_request) {
-		ret = i915_add_request(ring);
+		ret = i915_add_request(ring->outstanding_lazy_request);
 		if (ret)
 			return ret;
 	}
@@ -2025,13 +2040,18 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 }
 
 int
-intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx)
+intel_ring_alloc_request(struct intel_engine_cs *ring,
+			 struct intel_context *ctx,
+			 struct drm_i915_gem_request **req_out)
 {
 	int ret;
 	struct drm_i915_gem_request *request;
 	struct drm_i915_private *dev_private = ring->dev->dev_private;
 
-	if (ring->outstanding_lazy_request)
+	if (!req_out)
+		return -EINVAL;
+
+	if ((*req_out = ring->outstanding_lazy_request) != NULL)
 		return 0;
 
 	request = kzalloc(sizeof(*request), GFP_KERNEL);
@@ -2053,7 +2073,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx
 	spewThisReq(request, "\x1B[32mCreated: %d:%d, ref => %d\x1B[0m", request->uniq, request->seqno, request->ref.refcount.counter);
 
 	//printk(KERN_INFO "%s:%d> <%s> OLR = 0x%p, uniq = %d, seqno = %d\n", __func__, __LINE__, ring->name, request, request->uniq, request->seqno);
-	ring->outstanding_lazy_request = request;
+	*req_out = ring->outstanding_lazy_request = request;
 	return 0;
 }
 
@@ -2078,9 +2098,10 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
 	return 0;
 }
 
-int intel_ring_begin(struct intel_engine_cs *ring,
+int intel_ring_begin(struct drm_i915_gem_request *req,
 		     int num_dwords)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	int ret;
 
@@ -2093,18 +2114,14 @@ int intel_ring_begin(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	/* Preallocate the olr before touching the ring */
-	ret = intel_ring_alloc_request(ring, NULL);
-	if (ret)
-		return ret;
-
 	ring->buffer->space -= num_dwords * sizeof(uint32_t);
 	return 0;
 }
 
 /* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct intel_engine_cs *ring)
+int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	int ret;
 
@@ -2112,7 +2129,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring)
 		return 0;
 
 	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
-	ret = intel_ring_begin(ring, num_dwords);
+	ret = intel_ring_begin(req, num_dwords);
 	if (ret)
 		return ret;
 
@@ -2176,13 +2193,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
 }
 
-static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
+static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
 			       u32 invalidate, u32 flush)
 {
+	struct intel_engine_cs *ring = req->ring;
 	uint32_t cmd;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -2212,14 +2230,15 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
 			      u64 offset, u32 len,
 			      unsigned flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -2234,13 +2253,14 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 }
 
 static int
-hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
 			      u64 offset, u32 len,
 			      unsigned flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 2);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -2256,13 +2276,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 }
 
 static int
-gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
 			      u64 offset, u32 len,
 			      unsigned flags)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
-	ret = intel_ring_begin(ring, 2);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
@@ -2278,15 +2299,16 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 
 /* Blitter support (SandyBridge+) */
 
-static int gen6_ring_flush(struct intel_engine_cs *ring,
+static int gen6_ring_flush(struct drm_i915_gem_request *req,
 			   u32 invalidate, u32 flush)
 {
+	struct intel_engine_cs *ring = req->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t cmd;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(req, 4);
 	if (ret)
 		return ret;
 
@@ -2315,7 +2337,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
 
 	if (!invalidate && flush) {
 		if (IS_GEN7(dev))
-			return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
+			return gen7_ring_fbc_flush(req, FBC_REND_CACHE_CLEAN);
 		else if (IS_BROADWELL(dev))
 			dev_priv->fbc.need_sw_cache_clean = true;
 	}
@@ -2696,14 +2718,15 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 }
 
 int
-intel_ring_flush_all_caches(struct intel_engine_cs *ring)
+intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	int ret;
 
 	if (!ring->gpu_caches_dirty)
 		return 0;
 
-	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
@@ -2714,8 +2737,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs *ring)
 }
 
 int
-intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
+intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *ring = req->ring;
 	uint32_t flush_domains;
 	int ret;
 
@@ -2723,7 +2747,7 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
 	if (ring->gpu_caches_dirty)
 		flush_domains = I915_GEM_GPU_DOMAINS;
 
-	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 48cbb00..a7e47ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -154,15 +154,15 @@ struct  intel_engine_cs {
 
 	int		(*init_hw)(struct intel_engine_cs *ring);
 
-	int		(*init_context)(struct intel_engine_cs *ring,
+	int		(*init_context)(struct drm_i915_gem_request *req,
 					struct intel_context *ctx);
 
 	void		(*write_tail)(struct intel_engine_cs *ring,
 				      u32 value);
-	int __must_check (*flush)(struct intel_engine_cs *ring,
+	int __must_check (*flush)(struct drm_i915_gem_request *req,
 				  u32	invalidate_domains,
 				  u32	flush_domains);
-	int		(*add_request)(struct intel_engine_cs *ring);
+	int		(*add_request)(struct drm_i915_gem_request *req);
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
@@ -173,7 +173,7 @@ struct  intel_engine_cs {
 				     bool lazy_coherency);
 	void		(*set_seqno)(struct intel_engine_cs *ring,
 				     u32 seqno);
-	int		(*dispatch_execbuffer)(struct intel_engine_cs *ring,
+	int		(*dispatch_execbuffer)(struct drm_i915_gem_request *req,
 					       u64 offset, u32 length,
 					       unsigned dispatch_flags);
 #define I915_DISPATCH_SECURE 0x1
@@ -231,10 +231,10 @@ struct  intel_engine_cs {
 		};
 
 		/* AKA wait() */
-		int	(*sync_to)(struct intel_engine_cs *ring,
-				   struct intel_engine_cs *to,
+		int	(*sync_to)(struct drm_i915_gem_request *to_req,
+				   struct intel_engine_cs *from,
 				   u32 seqno);
-		int	(*signal)(struct intel_engine_cs *signaller,
+		int	(*signal)(struct drm_i915_gem_request *signaller_req,
 				  /* num_dwords needed by caller */
 				  unsigned int num_dwords);
 	} semaphore;
@@ -245,11 +245,11 @@ struct  intel_engine_cs {
 	struct list_head execlist_retired_req_list;
 	u8 next_context_status_buffer;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
-	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
-	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
+	int		(*emit_request)(struct drm_i915_gem_request *req);
+	int		(*emit_flush)(struct drm_i915_gem_request *req,
 				      u32 invalidate_domains,
 				      u32 flush_domains);
-	int		(*emit_bb_start)(struct intel_ringbuffer *ringbuf,
+	int		(*emit_bb_start)(struct drm_i915_gem_request *req,
 					 u64 offset, unsigned flags);
 
 	/**
@@ -433,10 +433,11 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 void intel_stop_ring_buffer(struct intel_engine_cs *ring);
 void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 
-int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
-int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
+int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
+int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
 int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring,
-					  struct intel_context *ctx);
+					  struct intel_context *ctx,
+					  struct drm_i915_gem_request **req_out);
 static inline void intel_ring_emit(struct intel_engine_cs *ring,
 				   u32 data)
 {
@@ -457,8 +458,8 @@ void __intel_ring_advance(struct intel_engine_cs *ring);
 
 int __must_check intel_ring_idle(struct intel_engine_cs *ring);
 void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
-int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
-int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
+int intel_ring_flush_all_caches(struct drm_i915_gem_request *req);
+int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
 
 void intel_fini_pipe_control(struct intel_engine_cs *ring);
 int intel_init_pipe_control(struct intel_engine_cs *ring);
@@ -479,11 +480,4 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 	return ringbuf->tail;
 }
 
-static inline struct drm_i915_gem_request *
-intel_ring_get_request(struct intel_engine_cs *ring)
-{
-	BUG_ON(ring->outstanding_lazy_request == NULL);
-	return ring->outstanding_lazy_request;
-}
-
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/i915: Remove OLR
  2014-12-19 14:41 [PATCH] drm/i915: Remove OLR John.C.Harrison
@ 2014-12-19 14:43 ` John Harrison
  2015-01-06 13:52 ` Daniel Vetter
  1 sibling, 0 replies; 3+ messages in thread
From: John Harrison @ 2014-12-19 14:43 UTC (permalink / raw)
  To: Intel-GFX

Doh! The subject was meant to be RFC not PATCH.

On 19/12/2014 14:41, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> The outstanding lazy request mechanism does not really work well with
> a GPU scheduler. The scheduler expects each work packet, i.e. request
> structure, to be a complete entity and to belong to one and only one
> submitter. Whereas the whole lazy mechanism allows lots of work from
> lots of different places to all be lumped together into a single
> request. It also means that work is floating around in the system
> unowned and untracked at various random points in time. This all
> causes headaches for the scheduler.
>
> This patch removes the need for the outstanding lazy request. It
> converts all functions which would otherwise be relying on the OLR to
> explicitly manage the request. Either by allocating, passing and
> submitting the request if they are the top level owner. Or by simply
> taking a request in as a parameter rather than pulling it out of the
> magic global variable if they are a client. The OLR itself is left in
> along with a bunch of sanity check asserts that it matches the request
> being passed in as a parameter. However, it should now be safe to
> remove completely.
>
> Note that this patch is not intended as a final, shipping, isn't it
> gorgeous, end product. It is merely a quick hack that I went through
> as being the simplest way to actually work out what the real sequence
> of events and the real ownership of work is in certain circumstances.
> Most particularly to do with display and overlay work. However, I
> would like to get agreement that it is a good direction to go in and
> that removing the OLR would be a good thing. Or, to put it another
> way, is it worth me trying to break this patch into a set of
> manageable items or do I just abandon it and give up?
>
> Note also that the patch is based on a tree including the scheduler
> prep-work patches posted earlier. So it will not apply to a clean
> nightly tree.
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h              |   29 ++--
>   drivers/gpu/drm/i915/i915_gem.c              |  182 ++++++++++++--------
>   drivers/gpu/drm/i915/i915_gem_context.c      |   69 +++-----
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   62 +++----
>   drivers/gpu/drm/i915/i915_gem_gtt.c          |   64 ++++----
>   drivers/gpu/drm/i915/i915_gem_gtt.h          |    3 +-
>   drivers/gpu/drm/i915/i915_gem_render_state.c |   10 +-
>   drivers/gpu/drm/i915/i915_gem_render_state.h |    2 +-
>   drivers/gpu/drm/i915/intel_display.c         |   68 ++++----
>   drivers/gpu/drm/i915/intel_lrc.c             |  145 +++++++++-------
>   drivers/gpu/drm/i915/intel_lrc.h             |    8 +-
>   drivers/gpu/drm/i915/intel_overlay.c         |   58 ++++---
>   drivers/gpu/drm/i915/intel_pm.c              |   33 ++--
>   drivers/gpu/drm/i915/intel_ringbuffer.c      |  228 ++++++++++++++------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h      |   38 ++---
>   15 files changed, 553 insertions(+), 446 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 511f55f..7b4309e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -513,7 +513,7 @@ struct drm_i915_display_funcs {
>   	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
>   			  struct drm_framebuffer *fb,
>   			  struct drm_i915_gem_object *obj,
> -			  struct intel_engine_cs *ring,
> +			  struct drm_i915_gem_request *req,
>   			  uint32_t flags);
>   	void (*update_primary_plane)(struct drm_crtc *crtc,
>   				     struct drm_framebuffer *fb,
> @@ -1796,7 +1796,8 @@ struct drm_i915_private {
>   	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>   	struct {
>   		int (*alloc_request)(struct intel_engine_cs *ring,
> -				     struct intel_context *ctx);
> +				     struct intel_context *ctx,
> +				     struct drm_i915_gem_request **req_out);
>   		int (*do_execbuf)(struct i915_execbuffer_params *params,
>   				  struct drm_i915_gem_execbuffer2 *args,
>   				  struct list_head *vmas);
> @@ -2511,10 +2512,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>   			     struct drm_file *file_priv);
>   void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> -					struct intel_engine_cs *ring);
> +					struct drm_i915_gem_request *req);
>   void i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>   					 struct drm_file *file,
> -					 struct intel_engine_cs *ring,
> +					 struct drm_i915_gem_request *req,
>   					 struct drm_i915_gem_object *obj);
>   void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj);
>   int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe,
> @@ -2609,9 +2610,9 @@ int __must_check __i915_mutex_lock_interruptible(struct drm_device *dev, const c
>   int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>   #endif
>   int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -			 struct intel_engine_cs *to, bool add_request);
> +			 struct drm_i915_gem_request *to_req);
>   void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring);
> +			     struct drm_i915_gem_request *req);
>   int i915_gem_dumb_create(struct drm_file *file_priv,
>   			 struct drm_device *dev,
>   			 struct drm_mode_create_dumb *args);
> @@ -2678,19 +2679,19 @@ int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
>   int __must_check i915_gem_init(struct drm_device *dev);
>   int i915_gem_init_rings(struct drm_device *dev);
>   int __must_check i915_gem_init_hw(struct drm_device *dev);
> -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
> +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>   void i915_gem_init_swizzling(struct drm_device *dev);
>   void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>   int __must_check i915_gpu_idle(struct drm_device *dev);
>   int __must_check i915_gem_suspend(struct drm_device *dev);
> -int __i915_add_request(struct intel_engine_cs *ring,
> +int __i915_add_request(struct drm_i915_gem_request *req,
>   		       struct drm_file *file,
>   		       struct drm_i915_gem_object *batch_obj,
>   		       bool flush_caches);
> -#define i915_add_request(ring) \
> -	__i915_add_request(ring, NULL, NULL, true)
> -#define i915_add_request_no_flush(ring) \
> -	__i915_add_request(ring, NULL, NULL, false)
> +#define i915_add_request(req) \
> +	__i915_add_request(req, NULL, NULL, true)
> +#define i915_add_request_no_flush(req) \
> +	__i915_add_request(req, NULL, NULL, false)
>   int __i915_wait_request(struct drm_i915_gem_request *req,
>   			unsigned reset_counter,
>   			bool interruptible,
> @@ -2810,9 +2811,9 @@ int __must_check i915_gem_context_init(struct drm_device *dev);
>   void i915_gem_context_fini(struct drm_device *dev);
>   void i915_gem_context_reset(struct drm_device *dev);
>   int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> -int i915_gem_context_enable(struct drm_i915_private *dev_priv);
> +int i915_gem_context_enable(struct drm_i915_gem_request *req);
>   void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
> -int i915_switch_context(struct intel_engine_cs *ring,
> +int i915_switch_context(struct drm_i915_gem_request *req,
>   			struct intel_context *to);
>   struct intel_context *
>   i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1d2cbfb..dbfb4e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1178,7 +1178,7 @@ i915_gem_check_olr(struct drm_i915_gem_request *req)
>   
>   	ret = 0;
>   	if (req == req->ring->outstanding_lazy_request)
> -		ret = i915_add_request(req->ring);
> +		ret = i915_add_request(req);
>   
>   	return ret;
>   }
> @@ -2294,17 +2294,16 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   
>   static void
>   i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> -			       struct intel_engine_cs *ring)
> +			       struct drm_i915_gem_request *req)
>   {
> -	struct drm_i915_gem_request *req;
> -	struct intel_engine_cs *old_ring;
> +	struct intel_engine_cs *new_ring, *old_ring;
>   
> -	BUG_ON(ring == NULL);
> +	BUG_ON(req == NULL);
>   
> -	req = intel_ring_get_request(ring);
> +	new_ring = i915_gem_request_get_ring(req);
>   	old_ring = i915_gem_request_get_ring(obj->last_read_req);
>   
> -	if (old_ring != ring && obj->last_write_req) {
> +	if (old_ring != new_ring && obj->last_write_req) {
>   		/* Keep the request relative to the current ring */
>   		i915_gem_request_assign(&obj->last_write_req, req);
>   	}
> @@ -2315,17 +2314,17 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>   		obj->active = 1;
>   	}
>   
> -	list_move_tail(&obj->ring_list, &ring->active_list);
> +	list_move_tail(&obj->ring_list, &new_ring->active_list);
>   
> -	//printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, ring->name, obj, req);
> +	//printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, new_ring->name, obj, req);
>   	i915_gem_request_assign(&obj->last_read_req, req);
>   }
>   
>   void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring)
> +			     struct drm_i915_gem_request *req)
>   {
>   	list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -	return i915_gem_object_move_to_active(vma->obj, ring);
> +	return i915_gem_object_move_to_active(vma->obj, req);
>   }
>   
>   static void
> @@ -2440,26 +2439,35 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>   	return 0;
>   }
>   
> -int __i915_add_request(struct intel_engine_cs *ring,
> +int __i915_add_request(struct drm_i915_gem_request *request,
>   		       struct drm_file *file,
>   		       struct drm_i915_gem_object *obj,
>   		       bool flush_caches)
>   {
> -	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct intel_engine_cs *ring;
> +	struct drm_i915_private *dev_priv;
>   	struct intel_ringbuffer *ringbuf;
>   	u32 request_ring_position, request_start;
>   	int ret;
>   
> -	request = ring->outstanding_lazy_request;
> +	/*printk( KERN_ERR "<%s> request %c %d:%d, OLR %c %d:%d\n",
> +		request ? request->ring->name : "???",
> +		request ? '=' : '?',
> +		request ? request->uniq : -1,
> +		request ? request->seqno : 0,
> +		request->ring->outstanding_lazy_request ? '=' : '?',
> +		request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->uniq : -1,
> +		request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->seqno : 0);*/
> +	//dump_stack();
> +
>   	if (WARN_ON(request == NULL))
>   		return -ENOMEM;
>   
> -	if (i915.enable_execlists) {
> -		struct intel_context *ctx = request->ctx;
> -		ringbuf = ctx->engine[ring->id].ringbuf;
> -	} else
> -		ringbuf = ring->buffer;
> +	ring = request->ring;
> +	dev_priv = ring->dev->dev_private;
> +	ringbuf = request->ringbuf;
> +
> +	WARN_ON(request != ring->outstanding_lazy_request);
>   
>   	request_start = intel_ring_get_tail(ringbuf);
>   	/*
> @@ -2471,9 +2479,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
>   	 */
>   	if (flush_caches) {
>   		if (i915.enable_execlists)
> -			ret = logical_ring_flush_all_caches(ringbuf);
> +			ret = logical_ring_flush_all_caches(request);
>   		else
> -			ret = intel_ring_flush_all_caches(ring);
> +			ret = intel_ring_flush_all_caches(request);
>   		if (ret)
>   			return ret;
>   	}
> @@ -2488,9 +2496,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
>   	request_ring_position = intel_ring_get_tail(ringbuf);
>   
>   	if (i915.enable_execlists)
> -		ret = ring->emit_request(ringbuf);
> +		ret = ring->emit_request(request);
>   	else
> -		ret = ring->add_request(ring);
> +		ret = ring->add_request(request);
>   	if (ret)
>   		return ret;
>   
> @@ -2504,7 +2512,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>   	 * inactive_list and lose its active reference. Hence we do not need
>   	 * to explicitly hold another reference here.
>   	 */
> -	request->batch_obj = obj;
> +	if (obj)
> +		request->batch_obj = obj;
>   
>   	if (!i915.enable_execlists) {
>   		/* Hold a reference to the current context so that we can inspect
> @@ -2744,6 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>   #endif
>   
>   	/* This may not have been flushed before the reset, so clean it now */
> +	WARN_ON(ring->outstanding_lazy_request);
>   	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>   }
>   
> @@ -3114,8 +3124,6 @@ out:
>    *
>    * @obj: object which may be in use on another ring.
>    * @to: ring we wish to use the object on. May be NULL.
> - * @add_request: do we need to add a request to track operations
> - *    submitted on ring with sync_to function
>    *
>    * This code is meant to abstract object synchronization with the GPU.
>    * Calling with NULL implies synchronizing the object with the CPU
> @@ -3125,8 +3133,9 @@ out:
>    */
>   int
>   i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to, bool add_request)
> +		     struct drm_i915_gem_request *to_req)
>   {
> +	struct intel_engine_cs *to = to_req->ring;
>   	struct intel_engine_cs *from;
>   	u32 seqno;
>   	int ret, idx;
> @@ -3152,7 +3161,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   		return ret;
>   
>   	trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
> -	ret = to->semaphore.sync_to(to, from, seqno);
> +	ret = to->semaphore.sync_to(to_req, from, seqno);
>   	if (!ret) {
>   		/* We use last_read_req because sync_to()
>   		 * might have just caused seqno wrap under
> @@ -3160,8 +3169,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   		 */
>   		from->semaphore.sync_seqno[idx] =
>   				i915_gem_request_get_seqno(obj->last_read_req);
> -		if (add_request)
> -			i915_add_request_no_flush(to);
>   	}
>   
>   	return ret;
> @@ -3266,18 +3273,23 @@ int i915_gpu_idle(struct drm_device *dev)
>   	/* Flush everything onto the inactive list. */
>   	for_each_ring(ring, dev_priv, i) {
>   		if (!i915.enable_execlists) {
> -			ret = i915_switch_context(ring, ring->default_context);
> +			struct drm_i915_gem_request *req;
> +
> +			ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
>   			if (ret)
>   				return ret;
> -		}
>   
> -		/* Make sure the context switch (if one actually happened)
> -		 * gets wrapped up and finished rather than hanging around
> -		 * and confusing things later. */
> -		if (ring->outstanding_lazy_request) {
> -			ret = i915_add_request(ring);
> -			if (ret)
> +			ret = i915_switch_context(req, ring->default_context);
> +			if (ret) {
> +				i915_gem_request_unreference(req);
>   				return ret;
> +			}
> +
> +			ret = i915_add_request_no_flush(req);
> +			if (ret) {
> +				i915_gem_request_unreference(req);
> +				return ret;
> +			}
>   		}
>   
>   		ret = intel_ring_idle(ring);
> @@ -4099,8 +4111,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>   	bool was_pin_display;
>   	int ret;
>   
> -	if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
> -		ret = i915_gem_object_sync(obj, pipelined, true);
> +	if (pipelined && (pipelined != i915_gem_request_get_ring(obj->last_read_req))) {
> +		struct drm_i915_private *dev_priv = pipelined->dev->dev_private;
> +		struct drm_i915_gem_request *req;
> +
> +		ret = dev_priv->gt.alloc_request(pipelined, pipelined->default_context, &req);
> +		if (ret)
> +			return ret;
> +
> +		ret = i915_gem_object_sync(obj, req);
> +		if (ret)
> +			return ret;
> +
> +		ret = i915_add_request_no_flush(req);
>   		if (ret)
>   			return ret;
>   	}
> @@ -4771,8 +4794,9 @@ err:
>   	return ret;
>   }
>   
> -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
> +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
> @@ -4782,7 +4806,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
>   	if (!HAS_L3_DPF(dev) || !remap_info)
>   		return 0;
>   
> -	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
> +	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
>   	if (ret)
>   		return ret;
>   
> @@ -4962,37 +4986,67 @@ i915_gem_init_hw(struct drm_device *dev)
>   	 */
>   	init_unused_rings(dev);
>   
> +	BUG_ON(!dev_priv->ring[RCS].default_context);
> +
> +	ret = i915_ppgtt_init_hw(dev);
> +	if (ret) {
> +		DRM_ERROR("PPGTT enable failed %d\n", ret);
> +		i915_gem_cleanup_ringbuffer(dev);
> +		return ret;
> +	}
> +
>   	for_each_ring(ring, dev_priv, i) {
> +		struct drm_i915_gem_request *req;
> +
>   		ret = ring->init_hw(ring);
>   		if (ret)
>   			return ret;
> -	}
>   
> -	for (i = 0; i < NUM_L3_SLICES(dev); i++)
> -		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
> +		if (!ring->default_context)
> +			continue;
>   
> -	/*
> -	 * XXX: Contexts should only be initialized once. Doing a switch to the
> -	 * default context switch however is something we'd like to do after
> -	 * reset or thaw (the latter may not actually be necessary for HW, but
> -	 * goes with our code better). Context switching requires rings (for
> -	 * the do_switch), but before enabling PPGTT. So don't move this.
> -	 */
> -	ret = i915_gem_context_enable(dev_priv);
> -	if (ret && ret != -EIO) {
> -		DRM_ERROR("Context enable failed %d\n", ret);
> -		i915_gem_cleanup_ringbuffer(dev);
> +		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +		if (ret)
> +			return ret;
>   
> -		return ret;
> -	}
> +		if (ring->id == RCS) {
> +			for (i = 0; i < NUM_L3_SLICES(dev); i++)
> +				i915_gem_l3_remap(req, i);
> +		}
>   
> -	ret = i915_ppgtt_init_hw(dev);
> -	if (ret && ret != -EIO) {
> -		DRM_ERROR("PPGTT enable failed %d\n", ret);
> -		i915_gem_cleanup_ringbuffer(dev);
> +		/*
> +		 * XXX: Contexts should only be initialized once. Doing a switch to the
> +		 * default context switch however is something we'd like to do after
> +		 * reset or thaw (the latter may not actually be necessary for HW, but
> +		 * goes with our code better). Context switching requires rings (for
> +		 * the do_switch), but before enabling PPGTT. So don't move this.
> +		 */
> +		ret = i915_gem_context_enable(req);
> +		if (ret && ret != -EIO) {
> +			DRM_ERROR("Context enable failed %d\n", ret);
> +			i915_gem_request_unreference(req);
> +			i915_gem_cleanup_ringbuffer(dev);
> +
> +			return ret;
> +		}
> +
> +		ret = i915_ppgtt_init_ring(req);
> +		if (ret && ret != -EIO) {
> +			DRM_ERROR("PPGTT enable failed %d\n", ret);
> +			i915_gem_request_unreference(req);
> +			i915_gem_cleanup_ringbuffer(dev);
> +		}
> +
> +		ret = i915_add_request_no_flush(req);
> +		if (ret) {
> +			DRM_ERROR("Add request failed: %d\n", ret);
> +			i915_gem_request_unreference(req);
> +			i915_gem_cleanup_ringbuffer(dev);
> +			return ret;
> +		}
>   	}
>   
> -	return ret;
> +	return 0;
>   }
>   
>   int i915_gem_init(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index c5e1bfc..72e280b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -401,41 +401,23 @@ void i915_gem_context_fini(struct drm_device *dev)
>   	i915_gem_context_unreference(dctx);
>   }
>   
> -int i915_gem_context_enable(struct drm_i915_private *dev_priv)
> +int i915_gem_context_enable(struct drm_i915_gem_request *req)
>   {
> -	struct intel_engine_cs *ring;
> -	int ret, i;
> -
> -	BUG_ON(!dev_priv->ring[RCS].default_context);
> +	struct intel_engine_cs *ring = req->ring;
> +	int ret;
>   
>   	if (i915.enable_execlists) {
> -		for_each_ring(ring, dev_priv, i) {
> -			if (ring->init_context) {
> -				ret = ring->init_context(ring,
> -						ring->default_context);
> -				if (ret) {
> -					DRM_ERROR("ring init context: %d\n",
> -							ret);
> -					return ret;
> -				}
> -			}
> -		}
> +		if (ring->init_context == NULL)
> +			return 0;
>   
> +		ret = ring->init_context(req, ring->default_context);
>   	} else
> -		for_each_ring(ring, dev_priv, i) {
> -			ret = i915_switch_context(ring, ring->default_context);
> -			if (ret)
> -				return ret;
> -
> -			/* Make sure the context switch (if one actually happened)
> -			 * gets wrapped up and finished rather than hanging around
> -			 * and confusing things later. */
> -			if (ring->outstanding_lazy_request) {
> -				ret = i915_add_request_no_flush(ring);
> -				if (ret)
> -					return ret;
> -			}
> -		}
> +		ret = i915_switch_context(req, ring->default_context);
> +
> +	if (ret) {
> +		DRM_ERROR("ring init context: %d\n", ret);
> +		return ret;
> +	}
>   
>   	return 0;
>   }
> @@ -488,10 +470,11 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
>   }
>   
>   static inline int
> -mi_set_context(struct intel_engine_cs *ring,
> +mi_set_context(struct drm_i915_gem_request *req,
>   	       struct intel_context *new_context,
>   	       u32 hw_flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	u32 flags = hw_flags | MI_MM_SPACE_GTT;
>   	int ret;
>   
> @@ -501,7 +484,7 @@ mi_set_context(struct intel_engine_cs *ring,
>   	 * itlb_before_ctx_switch.
>   	 */
>   	if (IS_GEN6(ring->dev)) {
> -		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
> +		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
>   		if (ret)
>   			return ret;
>   	}
> @@ -510,7 +493,7 @@ mi_set_context(struct intel_engine_cs *ring,
>   	if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
>   		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -540,9 +523,10 @@ mi_set_context(struct intel_engine_cs *ring,
>   	return ret;
>   }
>   
> -static int do_switch(struct intel_engine_cs *ring,
> +static int do_switch(struct drm_i915_gem_request *req,
>   		     struct intel_context *to)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   	struct intel_context *from = ring->last_context;
>   	u32 hw_flags = 0;
> @@ -577,7 +561,7 @@ static int do_switch(struct intel_engine_cs *ring,
>   
>   	if (to->ppgtt) {
>   		trace_switch_mm(ring, to);
> -		ret = to->ppgtt->switch_mm(to->ppgtt, ring);
> +		ret = to->ppgtt->switch_mm(to->ppgtt, req);
>   		if (ret)
>   			goto unpin_out;
>   	}
> @@ -608,7 +592,7 @@ static int do_switch(struct intel_engine_cs *ring,
>   	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
>   		hw_flags |= MI_RESTORE_INHIBIT;
>   
> -	ret = mi_set_context(ring, to, hw_flags);
> +	ret = mi_set_context(req, to, hw_flags);
>   	if (ret)
>   		goto unpin_out;
>   
> @@ -616,7 +600,7 @@ static int do_switch(struct intel_engine_cs *ring,
>   		if (!(to->remap_slice & (1<<i)))
>   			continue;
>   
> -		ret = i915_gem_l3_remap(ring, i);
> +		ret = i915_gem_l3_remap(req, i);
>   		/* If it failed, try again next round */
>   		if (ret)
>   			DRM_DEBUG_DRIVER("L3 remapping failed\n");
> @@ -632,7 +616,7 @@ static int do_switch(struct intel_engine_cs *ring,
>   	 */
>   	if (from != NULL) {
>   		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
> +		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req);
>   		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>   		 * whole damn pipeline, we don't need to explicitly mark the
>   		 * object dirty. The only exception is that the context must be
> @@ -658,12 +642,12 @@ done:
>   
>   	if (uninitialized) {
>   		if (ring->init_context) {
> -			ret = ring->init_context(ring, to);
> +			ret = ring->init_context(req, to);
>   			if (ret)
>   				DRM_ERROR("ring init context: %d\n", ret);
>   		}
>   
> -		ret = i915_gem_render_state_init(ring);
> +		ret = i915_gem_render_state_init(req);
>   		if (ret)
>   			DRM_ERROR("init render state: %d\n", ret);
>   	}
> @@ -690,9 +674,10 @@ unpin_out:
>    * switched by writing to the ELSP and requests keep a reference to their
>    * context.
>    */
> -int i915_switch_context(struct intel_engine_cs *ring,
> +int i915_switch_context(struct drm_i915_gem_request *req,
>   			struct intel_context *to)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   
>   	WARN_ON(i915.enable_execlists);
> @@ -708,7 +693,7 @@ int i915_switch_context(struct intel_engine_cs *ring,
>   		return 0;
>   	}
>   
> -	return do_switch(ring, to);
> +	return do_switch(req, to);
>   }
>   
>   static bool contexts_enabled(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index ca31673..5caa2a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -822,7 +822,7 @@ err:
>   }
>   
>   static int
> -i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
> +i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>   				struct list_head *vmas)
>   {
>   	struct i915_vma *vma;
> @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>   
>   	list_for_each_entry(vma, vmas, exec_list) {
>   		struct drm_i915_gem_object *obj = vma->obj;
> -		ret = i915_gem_object_sync(obj, ring, false);
> +		ret = i915_gem_object_sync(obj, req);
>   		if (ret)
>   			return ret;
>   
> @@ -843,7 +843,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>   	}
>   
>   	if (flush_chipset)
> -		i915_gem_chipset_flush(ring->dev);
> +		i915_gem_chipset_flush(req->ring->dev);
>   
>   	if (flush_domains & I915_GEM_DOMAIN_GTT)
>   		wmb();
> @@ -941,9 +941,9 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
>   
>   void
>   i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> -				   struct intel_engine_cs *ring)
> +				   struct drm_i915_gem_request *req)
>   {
> -	struct drm_i915_gem_request *req = intel_ring_get_request(ring);
> +	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>   	struct i915_vma *vma;
>   
>   	list_for_each_entry(vma, vmas, exec_list) {
> @@ -957,7 +957,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>   			obj->base.pending_read_domains |= obj->base.read_domains;
>   		obj->base.read_domains = obj->base.pending_read_domains;
>   
> -		i915_vma_move_to_active(vma, ring);
> +		i915_vma_move_to_active(vma, req);
>   		if (obj->base.write_domain) {
>   			obj->dirty = 1;
>   			i915_gem_request_assign(&obj->last_write_req, req);
> @@ -983,20 +983,21 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>   void
>   i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>   				    struct drm_file *file,
> -				    struct intel_engine_cs *ring,
> +				    struct drm_i915_gem_request *req,
>   				    struct drm_i915_gem_object *obj)
>   {
>   	/* Unconditionally force add_request to emit a full flush. */
> -	ring->gpu_caches_dirty = true;
> +	req->ring->gpu_caches_dirty = true;
>   
>   	/* Add a breadcrumb for the completion of the batch buffer */
> -	(void)__i915_add_request(ring, file, obj, true);
> +	(void)__i915_add_request(req, file, obj, true);
>   }
>   
>   static int
>   i915_reset_gen7_sol_offsets(struct drm_device *dev,
> -			    struct intel_engine_cs *ring)
> +			    struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	int ret, i;
>   
> @@ -1005,7 +1006,7 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>   		return -EINVAL;
>   	}
>   
> -	ret = intel_ring_begin(ring, 4 * 3);
> +	ret = intel_ring_begin(req, 4 * 3);
>   	if (ret)
>   		return ret;
>   
> @@ -1021,10 +1022,11 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>   }
>   
>   static int
> -i915_emit_box(struct intel_engine_cs *ring,
> +i915_emit_box(struct drm_i915_gem_request *req,
>   	      struct drm_clip_rect *box,
>   	      int DR1, int DR4)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
> @@ -1035,7 +1037,7 @@ i915_emit_box(struct intel_engine_cs *ring,
>   	}
>   
>   	if (INTEL_INFO(ring->dev)->gen >= 4) {
> -		ret = intel_ring_begin(ring, 4);
> +		ret = intel_ring_begin(req, 4);
>   		if (ret)
>   			return ret;
>   
> @@ -1044,7 +1046,7 @@ i915_emit_box(struct intel_engine_cs *ring,
>   		intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
>   		intel_ring_emit(ring, DR4);
>   	} else {
> -		ret = intel_ring_begin(ring, 6);
> +		ret = intel_ring_begin(req, 6);
>   		if (ret)
>   			return ret;
>   
> @@ -1151,11 +1153,11 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>   		goto error;
>   	}
>   
> -	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
> +	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
>   	if (ret)
>   		goto error;
>   
> -	i915_gem_execbuffer_move_to_active(vmas, ring);
> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
>   
>   	/* Make sure the OLR hasn't advanced (which would indicate a flush
>   	 * of the work in progress which in turn would be a Bad Thing). */
> @@ -1200,18 +1202,18 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
>   	/* Unconditionally invalidate gpu caches and ensure that we do flush
>   	 * any residual writes from the previous batch.
>   	 */
> -	ret = intel_ring_invalidate_all_caches(ring);
> +	ret = intel_ring_invalidate_all_caches(params->request);
>   	if (ret)
>   		goto error;
>   
>   	/* Switch to the correct context for the batch */
> -	ret = i915_switch_context(ring, params->ctx);
> +	ret = i915_switch_context(params->request, params->ctx);
>   	if (ret)
>   		goto error;
>   
>   	if (ring == &dev_priv->ring[RCS] &&
>   			params->instp_mode != dev_priv->relative_constants_mode) {
> -		ret = intel_ring_begin(ring, 4);
> +		ret = intel_ring_begin(params->request, 4);
>   		if (ret)
>   			goto error;
>   
> @@ -1225,7 +1227,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
>   	}
>   
>   	if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) {
> -		ret = i915_reset_gen7_sol_offsets(params->dev, ring);
> +		ret = i915_reset_gen7_sol_offsets(params->dev, params->request);
>   		if (ret)
>   			goto error;
>   	}
> @@ -1236,29 +1238,31 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
>   
>   	if (params->cliprects) {
>   		for (i = 0; i < params->args_num_cliprects; i++) {
> -			ret = i915_emit_box(ring, &params->cliprects[i],
> -					    params->args_DR1, params->args_DR4);
> +			ret = i915_emit_box(params->request,
> +					    &params->cliprects[i],
> +					    params->args_DR1,
> +					    params->args_DR4);
>   			if (ret)
>   				goto error;
>   
> -			ret = ring->dispatch_execbuffer(ring,
> +			ret = ring->dispatch_execbuffer(params->request,
>   							exec_start, exec_len,
>   							params->dispatch_flags);
>   			if (ret)
>   				goto error;
>   		}
>   	} else {
> -		ret = ring->dispatch_execbuffer(ring,
> +		ret = ring->dispatch_execbuffer(params->request,
>   						exec_start, exec_len,
>   						params->dispatch_flags);
>   		if (ret)
>   			goto error;
>   	}
>   
> -	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags);
> +	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>   
> -	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring,
> -					    params->batch_obj);
> +	i915_gem_execbuffer_retire_commands(params->dev, params->file,
> +					    params->request, params->batch_obj);
>   
>   error:
>   	/* intel_gpu_busy should also get a ref, so it will free when the device
> @@ -1490,10 +1494,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
>   
>   	/* Allocate a request for this batch buffer nice and early. */
> -	ret = dev_priv->gt.alloc_request(ring, ctx);
> +	ret = dev_priv->gt.alloc_request(ring, ctx, &params->request);
>   	if (ret)
>   		goto err;
> -	params->request = ring->outstanding_lazy_request;
> +	WARN_ON(params->request != ring->outstanding_lazy_request);
>   
>   	/* Save assorted stuff away to pass through to *_submission_final() */
>   	params->dev                     = dev;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7eead93..776776e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -213,14 +213,15 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
>   }
>   
>   /* Broadwell Page Directory Pointer Descriptors */
> -static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
> -			   uint64_t val)
> +static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry,
> +			  uint64_t val)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	BUG_ON(entry >= 4);
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -236,7 +237,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
>   }
>   
>   static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring)
> +			  struct drm_i915_gem_request *req)
>   {
>   	int i, ret;
>   
> @@ -245,7 +246,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>   
>   	for (i = used_pd - 1; i >= 0; i--) {
>   		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> -		ret = gen8_write_pdp(ring, i, addr);
> +		ret = gen8_write_pdp(req, i, addr);
>   		if (ret)
>   			return ret;
>   	}
> @@ -710,16 +711,17 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
>   }
>   
>   static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring)
> +			 struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	/* NB: TLBs must be flushed and invalidated before a switch */
> -	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>   	if (ret)
>   		return ret;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -735,16 +737,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>   }
>   
>   static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring)
> +			  struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	/* NB: TLBs must be flushed and invalidated before a switch */
> -	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>   	if (ret)
>   		return ret;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -758,7 +761,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>   
>   	/* XXX: RCS is the only one to auto invalidate the TLBs? */
>   	if (ring->id != RCS) {
> -		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>   		if (ret)
>   			return ret;
>   	}
> @@ -767,8 +770,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>   }
>   
>   static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring)
> +			  struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_device *dev = ppgtt->base.dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   
> @@ -1125,11 +1129,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>   
>   int i915_ppgtt_init_hw(struct drm_device *dev)
>   {
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring;
> -	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> -	int i, ret = 0;
> -
>   	/* In the case of execlists, PPGTT is enabled by the context descriptor
>   	 * and the PDPs are contained within the context itself.  We don't
>   	 * need to do anything here. */
> @@ -1148,25 +1147,20 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
>   	else
>   		WARN_ON(1);
>   
> -	if (ppgtt) {
> -		for_each_ring(ring, dev_priv, i) {
> -			ret = ppgtt->switch_mm(ppgtt, ring);
> -			if (ret != 0)
> -				return ret;
> -
> -			/* Make sure the context switch (if one actually happened)
> -			 * gets wrapped up and finished rather than hanging around
> -			 * and confusing things later. */
> -			if (ring->outstanding_lazy_request) {
> -				ret = i915_add_request_no_flush(ring);
> -				if (ret)
> -					return ret;
> -			}
> -		}
> -	}
> +	return 0;
> +}
>   
> -	return ret;
> +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
> +{
> +	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
> +	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> +
> +	if (!ppgtt)
> +		return 0;
> +
> +	return ppgtt->switch_mm(ppgtt, req);
>   }
> +
>   struct i915_hw_ppgtt *
>   i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index dd849df..bee3e2a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -267,7 +267,7 @@ struct i915_hw_ppgtt {
>   
>   	int (*enable)(struct i915_hw_ppgtt *ppgtt);
>   	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring);
> +			 struct drm_i915_gem_request *req);
>   	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
>   };
>   
> @@ -278,6 +278,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev);
>   
>   int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
>   int i915_ppgtt_init_hw(struct drm_device *dev);
> +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req);
>   void i915_ppgtt_release(struct kref *kref);
>   struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
>   					struct drm_i915_file_private *fpriv);
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index aba39c3..0e0c23fe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -152,8 +152,9 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
>   	return 0;
>   }
>   
> -int i915_gem_render_state_init(struct intel_engine_cs *ring)
> +int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>   	struct render_state so;
>   	int ret;
>   
> @@ -164,16 +165,17 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>   	if (so.rodata == NULL)
>   		return 0;
>   
> -	ret = ring->dispatch_execbuffer(ring,
> +	ret = ring->dispatch_execbuffer(req,
>   					so.ggtt_offset,
>   					so.rodata->batch_items * 4,
>   					I915_DISPATCH_SECURE);
>   	if (ret)
>   		goto out;
>   
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
>   
> -	ret = __i915_add_request(ring, NULL, so.obj, true);
> +//	ret = __i915_add_request(req, NULL, so.obj, true);
> +	req->batch_obj = so.obj;
>   	/* __i915_add_request moves object to inactive if it fails */
>   out:
>   	i915_gem_render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
> index c44961e..7aa7372 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.h
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
> @@ -39,7 +39,7 @@ struct render_state {
>   	int gen;
>   };
>   
> -int i915_gem_render_state_init(struct intel_engine_cs *ring);
> +int i915_gem_render_state_init(struct drm_i915_gem_request *req);
>   void i915_gem_render_state_fini(struct render_state *so);
>   int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
>   				  struct render_state *so);
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index f0cf421..c0b0e37 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9089,14 +9089,15 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
>   				 struct drm_crtc *crtc,
>   				 struct drm_framebuffer *fb,
>   				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>   				 uint32_t flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	u32 flip_mask;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -9116,7 +9117,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
>   	intel_ring_emit(ring, 0); /* aux display base address, unused */
>   
>   	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>   	return 0;
>   }
>   
> @@ -9124,14 +9125,15 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
>   				 struct drm_crtc *crtc,
>   				 struct drm_framebuffer *fb,
>   				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>   				 uint32_t flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	u32 flip_mask;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -9148,7 +9150,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
>   	intel_ring_emit(ring, MI_NOOP);
>   
>   	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>   	return 0;
>   }
>   
> @@ -9156,15 +9158,16 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
>   				 struct drm_crtc *crtc,
>   				 struct drm_framebuffer *fb,
>   				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>   				 uint32_t flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	uint32_t pf, pipesrc;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -9187,7 +9190,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
>   	intel_ring_emit(ring, pf | pipesrc);
>   
>   	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>   	return 0;
>   }
>   
> @@ -9195,15 +9198,16 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
>   				 struct drm_crtc *crtc,
>   				 struct drm_framebuffer *fb,
>   				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>   				 uint32_t flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	uint32_t pf, pipesrc;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -9223,7 +9227,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
>   	intel_ring_emit(ring, pf | pipesrc);
>   
>   	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>   	return 0;
>   }
>   
> @@ -9231,9 +9235,10 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>   				 struct drm_crtc *crtc,
>   				 struct drm_framebuffer *fb,
>   				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>   				 uint32_t flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	uint32_t plane_bit = 0;
>   	int len, ret;
> @@ -9275,11 +9280,11 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>   	 * then do the cacheline alignment, and finally emit the
>   	 * MI_DISPLAY_FLIP.
>   	 */
> -	ret = intel_ring_cacheline_align(ring);
> +	ret = intel_ring_cacheline_align(req);
>   	if (ret)
>   		return ret;
>   
> -	ret = intel_ring_begin(ring, len);
> +	ret = intel_ring_begin(req, len);
>   	if (ret)
>   		return ret;
>   
> @@ -9318,7 +9323,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>   	intel_ring_emit(ring, (MI_NOOP));
>   
>   	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>   	return 0;
>   }
>   
> @@ -9474,9 +9479,10 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>   				 struct drm_crtc *crtc,
>   				 struct drm_framebuffer *fb,
>   				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>   				 uint32_t flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	uint32_t plane = 0, stride;
>   	int ret;
> @@ -9508,7 +9514,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>   		return -ENODEV;
>   	}
>   
> -	ret = intel_ring_begin(ring, 10);
> +	ret = intel_ring_begin(req, 10);
>   	if (ret)
>   		return ret;
>   
> @@ -9528,7 +9534,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>   	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
>   
>   	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>   
>   	return 0;
>   }
> @@ -9537,7 +9543,7 @@ static int intel_default_queue_flip(struct drm_device *dev,
>   				    struct drm_crtc *crtc,
>   				    struct drm_framebuffer *fb,
>   				    struct drm_i915_gem_object *obj,
> -				    struct intel_engine_cs *ring,
> +				    struct drm_i915_gem_request *req,
>   				    uint32_t flags)
>   {
>   	return -ENODEV;
> @@ -9729,22 +9735,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>   		i915_gem_request_assign(&work->flip_queued_req,
>   					obj->last_write_req);
>   	} else {
> -		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
> +		struct drm_i915_gem_request *req;
> +
> +		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +		if (ret)
> +			return ret;
> +
> +		i915_gem_request_assign(&work->flip_queued_req, req);
> +
> +		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, req,
>   						   page_flip_flags);
>   		if (ret)
>   			goto cleanup_unpin;
> -
> -		/* Borked: need to get the seqno for the request submitted in
> -		 * 'queue_flip()' above. However, either the request has been
> -		 * posted already and the seqno is gone (q_f calls add_request),
> -		 * or the request never gets posted and is merged into whatever
> -		 * render comes along next (q_f calls ring_advance).
> -		 *
> -		 * On the other hand, seqnos are going away soon anyway! So
> -		 * hopefully the problem will disappear...
> -		 */
> -		i915_gem_request_assign(&work->flip_queued_req,
> -					ring->outstanding_lazy_request ? intel_ring_get_request(ring) : NULL);
>   	}
>   
>   	work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 80cb87e..5077a77 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -203,6 +203,10 @@ enum {
>   };
>   #define GEN8_CTX_ID_SHIFT 32
>   
> +static int intel_logical_ring_begin(struct drm_i915_gem_request *req,
> +				    int num_dwords);
> +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req,
> +					      struct intel_context *ctx);
>   static int intel_lr_context_pin(struct intel_engine_cs *ring,
>   		struct intel_context *ctx);
>   
> @@ -587,9 +591,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
>   	return 0;
>   }
>   
> -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
> +static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
>   {
> -	struct intel_engine_cs *ring = ringbuf->ring;
> +	struct intel_engine_cs *ring = req->ring;
>   	uint32_t flush_domains;
>   	int ret;
>   
> @@ -597,7 +601,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
>   	if (ring->gpu_caches_dirty)
>   		flush_domains = I915_GEM_GPU_DOMAINS;
>   
> -	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
> +	ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
>   	if (ret)
>   		return ret;
>   
> @@ -605,10 +609,9 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
>   	return 0;
>   }
>   
> -static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
> +static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>   				 struct list_head *vmas)
>   {
> -	struct intel_engine_cs *ring = ringbuf->ring;
>   	struct i915_vma *vma;
>   	uint32_t flush_domains = 0;
>   	bool flush_chipset = false;
> @@ -617,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
>   	list_for_each_entry(vma, vmas, exec_list) {
>   		struct drm_i915_gem_object *obj = vma->obj;
>   
> -		ret = i915_gem_object_sync(obj, ring, true);
> +		ret = i915_gem_object_sync(obj, req);
>   		if (ret)
>   			return ret;
>   
> @@ -657,7 +660,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>   	struct drm_device       *dev = params->dev;
>   	struct intel_engine_cs  *ring = params->ring;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
>   	int ret;
>   
>   	params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
> @@ -706,11 +708,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>   		return -EINVAL;
>   	}
>   
> -	ret = execlists_move_to_gpu(ringbuf, vmas);
> +	ret = execlists_move_to_gpu(params->request, vmas);
>   	if (ret)
>   		return ret;
>   
> -	i915_gem_execbuffer_move_to_active(vmas, ring);
> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
>   
>   	ret = dev_priv->gt.do_execfinal(params);
>   	if (ret)
> @@ -742,13 +744,13 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
>   	/* Unconditionally invalidate gpu caches and ensure that we do flush
>   	 * any residual writes from the previous batch.
>   	 */
> -	ret = logical_ring_invalidate_all_caches(ringbuf);
> +	ret = logical_ring_invalidate_all_caches(params->request);
>   	if (ret)
>   		return ret;
>   
>   	if (ring == &dev_priv->ring[RCS] &&
>   	    params->instp_mode != dev_priv->relative_constants_mode) {
> -		ret = intel_logical_ring_begin(ringbuf, 4);
> +		ret = intel_logical_ring_begin(params->request, 4);
>   		if (ret)
>   			return ret;
>   
> @@ -764,13 +766,14 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
>   	exec_start = params->batch_obj_vm_offset +
>   		     params->args_batch_start_offset;
>   
> -	ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags);
> +	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
>   	if (ret)
>   		return ret;
>   
> -	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags);
> +	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>   
> -	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, params->batch_obj);
> +	i915_gem_execbuffer_retire_commands(params->dev, params->file,
> +					    params->request, params->batch_obj);
>   
>   	return 0;
>   }
> @@ -827,15 +830,15 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
>   	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
>   }
>   
> -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
> +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
>   {
> -	struct intel_engine_cs *ring = ringbuf->ring;
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	if (!ring->gpu_caches_dirty)
>   		return 0;
>   
> -	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
> +	ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
>   	if (ret)
>   		return ret;
>   
> @@ -910,13 +913,17 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring,
>   }
>   
>   int intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
> -				     struct intel_context *ctx)
> +				     struct intel_context *ctx,
> +				     struct drm_i915_gem_request **req_out)
>   {
>   	struct drm_i915_gem_request *request;
>   	struct drm_i915_private *dev_private = ring->dev->dev_private;
>   	int ret;
>   
> -	if (ring->outstanding_lazy_request)
> +	if (!req_out)
> +		return -EINVAL;
> +
> +	if ((*req_out = ring->outstanding_lazy_request) != NULL)
>   		return 0;
>   
>   	request = kzalloc(sizeof(*request), GFP_KERNEL);
> @@ -953,7 +960,7 @@ int intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
>   	i915_gem_context_reference(request->ctx);
>   	request->ringbuf = ctx->engine[ring->id].ringbuf;
>   
> -	ring->outstanding_lazy_request = request;
> +	*req_out = ring->outstanding_lazy_request = request;
>   	return 0;
>   }
>   
> @@ -1090,7 +1097,7 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
>   /**
>    * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
>    *
> - * @ringbuf: Logical ringbuffer.
> + * @request: The request to start some new work for
>    * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
>    *
>    * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
> @@ -1100,8 +1107,9 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
>    *
>    * Return: non-zero if the ringbuffer is not ready to be written to.
>    */
> -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
> +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
>   {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>   	struct intel_engine_cs *ring = ringbuf->ring;
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -1116,38 +1124,28 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
>   	if (ret)
>   		return ret;
>   
> -	if(!ring->outstanding_lazy_request) {
> -		printk(KERN_INFO "%s:%d> \x1B[31;1mring->outstanding_lazy_request = 0x%p\x1B[0m\n", __func__, __LINE__, ring->outstanding_lazy_request);
> -		dump_stack();
> -	}
> -
> -	/* Preallocate the olr before touching the ring */
> -	ret = intel_logical_ring_alloc_request(ring, ringbuf->FIXME_lrc_ctx);
> -	if (ret)
> -		return ret;
> -
>   	ringbuf->space -= num_dwords * sizeof(uint32_t);
>   	return 0;
>   }
>   
> -static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
> +static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req,
>   					       struct intel_context *ctx)
>   {
>   	int ret, i;
> -	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> -	struct drm_device *dev = ring->dev;
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
> +	struct drm_device *dev = req->ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct i915_workarounds *w = &dev_priv->workarounds;
>   
>   	if (WARN_ON(w->count == 0))
>   		return 0;
>   
> -	ring->gpu_caches_dirty = true;
> -	ret = logical_ring_flush_all_caches(ringbuf);
> +	req->ring->gpu_caches_dirty = true;
> +	ret = logical_ring_flush_all_caches(req);
>   	if (ret)
>   		return ret;
>   
> -	ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
> +	ret = intel_logical_ring_begin(req, w->count * 2 + 2);
>   	if (ret)
>   		return ret;
>   
> @@ -1160,8 +1158,8 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
>   
>   	intel_logical_ring_advance(ringbuf);
>   
> -	ring->gpu_caches_dirty = true;
> -	ret = logical_ring_flush_all_caches(ringbuf);
> +	req->ring->gpu_caches_dirty = true;
> +	ret = logical_ring_flush_all_caches(req);
>   	if (ret)
>   		return ret;
>   
> @@ -1210,13 +1208,14 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring)
>   	return init_workarounds_ring(ring);
>   }
>   
> -static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
>   			      u64 offset, unsigned flags)
>   {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>   	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
>   	int ret;
>   
> -	ret = intel_logical_ring_begin(ringbuf, 4);
> +	ret = intel_logical_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -1263,17 +1262,18 @@ static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
>   	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
>   }
>   
> -static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_flush(struct drm_i915_gem_request *req,
>   			   u32 invalidate_domains,
>   			   u32 unused)
>   {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>   	struct intel_engine_cs *ring = ringbuf->ring;
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	uint32_t cmd;
>   	int ret;
>   
> -	ret = intel_logical_ring_begin(ringbuf, 4);
> +	ret = intel_logical_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -1301,10 +1301,11 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
>   	return 0;
>   }
>   
> -static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_flush_render(struct drm_i915_gem_request *req,
>   				  u32 invalidate_domains,
>   				  u32 flush_domains)
>   {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>   	struct intel_engine_cs *ring = ringbuf->ring;
>   	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>   	u32 flags = 0;
> @@ -1328,7 +1329,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
>   		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>   	}
>   
> -	ret = intel_logical_ring_begin(ringbuf, 6);
> +	ret = intel_logical_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -1353,13 +1354,14 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
>   	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
>   }
>   
> -static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
> +static int gen8_emit_request(struct drm_i915_gem_request *req)
>   {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>   	struct intel_engine_cs *ring = ringbuf->ring;
>   	u32 cmd;
>   	int ret;
>   
> -	ret = intel_logical_ring_begin(ringbuf, 6);
> +	ret = intel_logical_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -1371,8 +1373,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
>   				(ring->status_page.gfx_addr +
>   				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
>   	intel_logical_ring_emit(ringbuf, 0);
> -	intel_logical_ring_emit(ringbuf,
> -		i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(req));
>   	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
>   	intel_logical_ring_emit(ringbuf, MI_NOOP);
>   	intel_logical_ring_advance_and_submit(ringbuf);
> @@ -1380,16 +1381,20 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
>   	return 0;
>   }
>   
> -static int gen8_init_rcs_context(struct intel_engine_cs *ring,
> -		       struct intel_context *ctx)
> +static int gen8_init_rcs_context(struct drm_i915_gem_request *req,
> +				 struct intel_context *ctx)
>   {
>   	int ret;
>   
> -	ret = intel_logical_ring_workarounds_emit(ring, ctx);
> +	ret = intel_logical_ring_workarounds_emit(req, ctx);
>   	if (ret)
>   		return ret;
>   
> -	return intel_lr_context_render_state_init(ring, ctx);
> +	ret = intel_lr_context_render_state_init(req, ctx);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>   }
>   
>   /**
> @@ -1409,6 +1414,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
>   
>   	intel_logical_ring_stop(ring);
>   	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
> +	WARN_ON(ring->outstanding_lazy_request);
>   	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>   
>   	if (ring->cleanup)
> @@ -1648,10 +1654,10 @@ cleanup_render_ring:
>   	return ret;
>   }
>   
> -int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
> -				       struct intel_context *ctx)
> +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req,
> +					      struct intel_context *ctx)
>   {
> -	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> +	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>   	struct render_state so;
>   	struct drm_i915_file_private *file_priv = ctx->file_priv;
>   	struct drm_file *file = file_priv ? file_priv->file : NULL;
> @@ -1664,15 +1670,13 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
>   	if (so.rodata == NULL)
>   		return 0;
>   
> -	ret = ring->emit_bb_start(ringbuf,
> -			so.ggtt_offset,
> -			I915_DISPATCH_SECURE);
> +	ret = ring->emit_bb_start(req, so.ggtt_offset, I915_DISPATCH_SECURE);
>   	if (ret)
>   		goto out;
>   
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
>   
> -	ret = __i915_add_request(ring, file, so.obj, true);
> +	ret = __i915_add_request(req, file, so.obj, true);
>   	/* intel_logical_ring_add_request moves object to inactive if it
>   	 * fails */
>   out:
> @@ -1883,6 +1887,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
>   int intel_lr_context_deferred_create(struct intel_context *ctx,
>   				     struct intel_engine_cs *ring)
>   {
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   	const bool is_global_default_ctx = (ctx == ring->default_context);
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_gem_object *ctx_obj;
> @@ -1964,13 +1969,27 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   		lrc_setup_hardware_status_page(ring, ctx_obj);
>   	else if (ring->id == RCS && !ctx->rcs_initialized) {
>   		if (ring->init_context) {
> -			ret = ring->init_context(ring, ctx);
> +			struct drm_i915_gem_request *req;
> +
> +			ret = dev_priv->gt.alloc_request(ring, ctx, &req);
> +			if (ret)
> +				return ret;
> +
> +			ret = ring->init_context(req, ctx);
>   			if (ret) {
>   				DRM_ERROR("ring init context: %d\n", ret);
> +				i915_gem_request_unreference(req);
>   				ctx->engine[ring->id].ringbuf = NULL;
>   				ctx->engine[ring->id].state = NULL;
>   				goto error;
>   			}
> +
> +			ret = i915_add_request_no_flush(req);
> +			if (ret) {
> +				DRM_ERROR("ring init context: %d\n", ret);
> +				i915_gem_request_unreference(req);
> +				goto error;
> +			}
>   		}
>   
>   		ctx->rcs_initialized = true;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index ea083d9..a2981ba 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -35,12 +35,13 @@
>   
>   /* Logical Rings */
>   int __must_check intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
> -						  struct intel_context *ctx);
> +						  struct intel_context *ctx,
> +						  struct drm_i915_gem_request **req_out);
>   void intel_logical_ring_stop(struct intel_engine_cs *ring);
>   void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
>   int intel_logical_rings_init(struct drm_device *dev);
>   
> -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
> +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req);
>   void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
>   /**
>    * intel_logical_ring_advance() - advance the ringbuffer tail
> @@ -63,11 +64,8 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
>   	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
>   	ringbuf->tail += 4;
>   }
> -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
>   
>   /* Logical Ring Contexts */
> -int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
> -				       struct intel_context *ctx);
>   void intel_lr_context_free(struct intel_context *ctx);
>   int intel_lr_context_deferred_create(struct intel_context *ctx,
>   				     struct intel_engine_cs *ring);
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 973c9de..2d2ce59 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -209,17 +209,15 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
>   }
>   
>   static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> +					 struct drm_i915_gem_request *req,
>   					 void (*tail)(struct intel_overlay *))
>   {
>   	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
>   	int ret;
>   
>   	BUG_ON(overlay->last_flip_req);
> -	i915_gem_request_assign(&overlay->last_flip_req,
> -					     ring->outstanding_lazy_request);
> -	ret = i915_add_request(ring);
> +	i915_gem_request_assign(&overlay->last_flip_req, req);
> +	ret = i915_add_request(overlay->last_flip_req);
>   	if (ret)
>   		return ret;
>   
> @@ -239,6 +237,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>   	struct drm_device *dev = overlay->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req;
>   	int ret;
>   
>   	BUG_ON(overlay->active);
> @@ -246,17 +245,21 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>   
>   	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
>   	if (ret)
>   		return ret;
>   
> -	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
> -	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
> -	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> -	intel_ring_emit(ring, MI_NOOP);
> -	intel_ring_advance(ring);
> +	ret = intel_ring_begin(req, 4);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(req->ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
> +	intel_ring_emit(req->ring, overlay->flip_addr | OFC_UPDATE);
> +	intel_ring_emit(req->ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> +	intel_ring_emit(req->ring, MI_NOOP);
> +	intel_ring_advance(req->ring);
>   
> -	return intel_overlay_do_wait_request(overlay, NULL);
> +	return intel_overlay_do_wait_request(overlay, req, NULL);
>   }
>   
>   /* overlay needs to be enabled in OCMD reg */
> @@ -266,6 +269,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>   	struct drm_device *dev = overlay->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req;
>   	u32 flip_addr = overlay->flip_addr;
>   	u32 tmp;
>   	int ret;
> @@ -280,7 +284,11 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>   	if (tmp & (1 << 17))
>   		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -289,9 +297,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>   	intel_ring_advance(ring);
>   
>   	WARN_ON(overlay->last_flip_req);
> -	i915_gem_request_assign(&overlay->last_flip_req,
> -					     ring->outstanding_lazy_request);
> -	return i915_add_request(ring);
> +	i915_gem_request_assign(&overlay->last_flip_req, req);
> +	return i915_add_request(req);
>   }
>   
>   static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
> @@ -326,6 +333,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>   	struct drm_device *dev = overlay->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req;
>   	u32 flip_addr = overlay->flip_addr;
>   	int ret;
>   
> @@ -337,7 +345,11 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>   	 * of the hw. Do it in both cases */
>   	flip_addr |= OFC_UPDATE;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -359,7 +371,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>   	}
>   	intel_ring_advance(ring);
>   
> -	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> +	return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail);
>   }
>   
>   /* recover from an interruption due to a signal
> @@ -404,7 +416,13 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>   
>   	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
>   		/* synchronous slowpath */
> -		ret = intel_ring_begin(ring, 2);
> +		struct drm_i915_gem_request *req;
> +
> +		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +		if (ret)
> +			return ret;
> +
> +		ret = intel_ring_begin(req, 2);
>   		if (ret)
>   			return ret;
>   
> @@ -412,7 +430,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>   		intel_ring_emit(ring, MI_NOOP);
>   		intel_ring_advance(ring);
>   
> -		ret = intel_overlay_do_wait_request(overlay,
> +		ret = intel_overlay_do_wait_request(overlay, req,
>   						    intel_overlay_release_old_vid_tail);
>   		if (ret)
>   			return ret;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 78911e2..5905fa5 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5506,6 +5506,7 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req = NULL;
>   	bool was_interruptible;
>   	int ret;
>   
> @@ -5524,16 +5525,17 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>   	was_interruptible = dev_priv->mm.interruptible;
>   	dev_priv->mm.interruptible = false;
>   
> +	ret = dev_priv->gt.alloc_request(ring, NULL, &req);
> +	if (ret)
> +		goto err;
> +
>   	/*
>   	 * GPU can automatically power down the render unit if given a page
>   	 * to save state.
>   	 */
> -	ret = intel_ring_begin(ring, 6);
> -	if (ret) {
> -		ironlake_teardown_rc6(dev);
> -		dev_priv->mm.interruptible = was_interruptible;
> -		return;
> -	}
> +	ret = intel_ring_begin(req, 6);
> +	if (ret)
> +		goto err;
>   
>   	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
>   	intel_ring_emit(ring, MI_SET_CONTEXT);
> @@ -5547,6 +5549,11 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>   	intel_ring_emit(ring, MI_FLUSH);
>   	intel_ring_advance(ring);
>   
> +	ret = i915_add_request_no_flush(req);
> +	if (ret)
> +		goto err;
> +	req = NULL;
> +
>   	/*
>   	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
>   	 * does an implicit flush, combined with MI_FLUSH above, it should be
> @@ -5554,16 +5561,20 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>   	 */
>   	ret = intel_ring_idle(ring);
>   	dev_priv->mm.interruptible = was_interruptible;
> -	if (ret) {
> -		DRM_ERROR("failed to enable ironlake power savings\n");
> -		ironlake_teardown_rc6(dev);
> -		return;
> -	}
> +	if (ret)
> +		goto err;
>   
>   	I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
>   	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
>   
>   	intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
> +
> +err:
> +	DRM_ERROR("failed to enable ironlake power savings\n");
> +	ironlake_teardown_rc6(dev);
> +	dev_priv->mm.interruptible = was_interruptible;
> +	if (req)
> +		i915_gem_request_unreference(req);
>   }
>   
>   static unsigned long intel_pxfreq(u32 vidfreq)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b60e59b..e6e7bb5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring)
>   }
>   
>   static int
> -gen2_render_ring_flush(struct intel_engine_cs *ring,
> +gen2_render_ring_flush(struct drm_i915_gem_request *req,
>   		       u32	invalidate_domains,
>   		       u32	flush_domains)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	u32 cmd;
>   	int ret;
>   
> @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
>   	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
>   		cmd |= MI_READ_FLUSH;
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -gen4_render_ring_flush(struct intel_engine_cs *ring,
> +gen4_render_ring_flush(struct drm_i915_gem_request *req,
>   		       u32	invalidate_domains,
>   		       u32	flush_domains)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_device *dev = ring->dev;
>   	u32 cmd;
>   	int ret;
> @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
>   	    (IS_G4X(dev) || IS_GEN5(dev)))
>   		cmd |= MI_INVALIDATE_ISP;
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -212,12 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
>    * really our business.  That leaves only stall at scoreboard.
>    */
>   static int
> -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
> +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -230,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
>   	intel_ring_emit(ring, MI_NOOP);
>   	intel_ring_advance(ring);
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -246,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
>   }
>   
>   static int
> -gen6_render_ring_flush(struct intel_engine_cs *ring,
> -                         u32 invalidate_domains, u32 flush_domains)
> +gen6_render_ring_flush(struct drm_i915_gem_request *req,
> +                       u32 invalidate_domains, u32 flush_domains)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	u32 flags = 0;
>   	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>   	int ret;
>   
>   	/* Force SNB workarounds for PIPE_CONTROL flushes */
> -	ret = intel_emit_post_sync_nonzero_flush(ring);
> +	ret = intel_emit_post_sync_nonzero_flush(req);
>   	if (ret)
>   		return ret;
>   
> @@ -284,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
>   		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
>   	}
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -298,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
> +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -316,14 +321,15 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
>   	return 0;
>   }
>   
> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
> +static int gen7_ring_fbc_flush(struct drm_i915_gem_request *req, u32 value)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	if (!ring->fbc_dirty)
>   		return 0;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   	/* WaFbcNukeOn3DBlt:ivb/hsw */
> @@ -340,9 +346,10 @@ static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
>   }
>   
>   static int
> -gen7_render_ring_flush(struct intel_engine_cs *ring,
> +gen7_render_ring_flush(struct drm_i915_gem_request *req,
>   		       u32 invalidate_domains, u32 flush_domains)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	u32 flags = 0;
>   	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>   	int ret;
> @@ -381,10 +388,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>   		/* Workaround: we must issue a pipe_control with CS-stall bit
>   		 * set before a pipe_control command that has the state cache
>   		 * invalidate bit set. */
> -		gen7_render_ring_cs_stall_wa(ring);
> +		gen7_render_ring_cs_stall_wa(req);
>   	}
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -395,18 +402,19 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>   	intel_ring_advance(ring);
>   
>   	if (!invalidate_domains && flush_domains)
> -		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> +		return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
>   
>   	return 0;
>   }
>   
>   static int
> -gen8_emit_pipe_control(struct intel_engine_cs *ring,
> +gen8_emit_pipe_control(struct drm_i915_gem_request *req,
>   		       u32 flags, u32 scratch_addr)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -422,11 +430,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -gen8_render_ring_flush(struct intel_engine_cs *ring,
> +gen8_render_ring_flush(struct drm_i915_gem_request *req,
>   		       u32 invalidate_domains, u32 flush_domains)
>   {
>   	u32 flags = 0;
> -	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>   	int ret;
>   
>   	flags |= PIPE_CONTROL_CS_STALL;
> @@ -446,7 +454,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>   		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>   
>   		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
> -		ret = gen8_emit_pipe_control(ring,
> +		ret = gen8_emit_pipe_control(req,
>   					     PIPE_CONTROL_CS_STALL |
>   					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
>   					     0);
> @@ -454,12 +462,12 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>   			return ret;
>   	}
>   
> -	ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
> +	ret = gen8_emit_pipe_control(req, flags, scratch_addr);
>   	if (ret)
>   		return ret;
>   
>   	if (!invalidate_domains && flush_domains)
> -		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> +		return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
>   
>   	return 0;
>   }
> @@ -670,9 +678,10 @@ err:
>   	return ret;
>   }
>   
> -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
> +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req,
>   				       struct intel_context *ctx)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret, i;
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -682,11 +691,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
>   		return 0;
>   
>   	ring->gpu_caches_dirty = true;
> -	ret = intel_ring_flush_all_caches(ring);
> +	ret = intel_ring_flush_all_caches(req);
>   	if (ret)
>   		return ret;
>   
> -	ret = intel_ring_begin(ring, (w->count * 2 + 2));
> +	ret = intel_ring_begin(req, (w->count * 2 + 2));
>   	if (ret)
>   		return ret;
>   
> @@ -700,7 +709,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
>   	intel_ring_advance(ring);
>   
>   	ring->gpu_caches_dirty = true;
> -	ret = intel_ring_flush_all_caches(ring);
> +	ret = intel_ring_flush_all_caches(req);
>   	if (ret)
>   		return ret;
>   
> @@ -898,10 +907,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring)
>   	intel_fini_pipe_control(ring);
>   }
>   
> -static int gen8_rcs_signal(struct intel_engine_cs *signaller,
> +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
>   			   unsigned int num_dwords)
>   {
>   #define MBOX_UPDATE_DWORDS 8
> +	struct intel_engine_cs *signaller = signaller_req->ring;
>   	struct drm_device *dev = signaller->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *waiter;
> @@ -911,7 +921,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>   	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
>   #undef MBOX_UPDATE_DWORDS
>   
> -	ret = intel_ring_begin(signaller, num_dwords);
> +	ret = intel_ring_begin(signaller_req, num_dwords);
>   	if (ret)
>   		return ret;
>   
> @@ -921,8 +931,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>   		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
>   			continue;
>   
> -		seqno = i915_gem_request_get_seqno(
> -					   signaller->outstanding_lazy_request);
> +		seqno = i915_gem_request_get_seqno(signaller_req);
>   		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
>   		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
>   					   PIPE_CONTROL_QW_WRITE |
> @@ -939,10 +948,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>   	return 0;
>   }
>   
> -static int gen8_xcs_signal(struct intel_engine_cs *signaller,
> +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
>   			   unsigned int num_dwords)
>   {
>   #define MBOX_UPDATE_DWORDS 6
> +	struct intel_engine_cs *signaller = signaller_req->ring;
>   	struct drm_device *dev = signaller->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *waiter;
> @@ -952,7 +962,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>   	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
>   #undef MBOX_UPDATE_DWORDS
>   
> -	ret = intel_ring_begin(signaller, num_dwords);
> +	ret = intel_ring_begin(signaller_req, num_dwords);
>   	if (ret)
>   		return ret;
>   
> @@ -962,8 +972,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>   		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
>   			continue;
>   
> -		seqno = i915_gem_request_get_seqno(
> -					   signaller->outstanding_lazy_request);
> +		seqno = i915_gem_request_get_seqno(signaller_req);
>   		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
>   					   MI_FLUSH_DW_OP_STOREDW);
>   		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
> @@ -978,9 +987,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>   	return 0;
>   }
>   
> -static int gen6_signal(struct intel_engine_cs *signaller,
> +static int gen6_signal(struct drm_i915_gem_request *signaller_req,
>   		       unsigned int num_dwords)
>   {
> +	struct intel_engine_cs *signaller = signaller_req->ring;
>   	struct drm_device *dev = signaller->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *useless;
> @@ -991,15 +1001,14 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>   	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
>   #undef MBOX_UPDATE_DWORDS
>   
> -	ret = intel_ring_begin(signaller, num_dwords);
> +	ret = intel_ring_begin(signaller_req, num_dwords);
>   	if (ret)
>   		return ret;
>   
>   	for_each_ring(useless, dev_priv, i) {
>   		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
>   		if (mbox_reg != GEN6_NOSYNC) {
> -			u32 seqno = i915_gem_request_get_seqno(
> -					   signaller->outstanding_lazy_request);
> +			u32 seqno = i915_gem_request_get_seqno(signaller_req);
>   			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>   			intel_ring_emit(signaller, mbox_reg);
>   			intel_ring_emit(signaller, seqno);
> @@ -1016,29 +1025,28 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>   /**
>    * gen6_add_request - Update the semaphore mailbox registers
>    *
> - * @ring - ring that is adding a request
> - * @seqno - return seqno stuck into the ring
> + * @request - request to write to the ring
>    *
>    * Update the mailbox registers in the *other* rings with the current seqno.
>    * This acts like a signal in the canonical semaphore.
>    */
>   static int
> -gen6_add_request(struct intel_engine_cs *ring)
> +gen6_add_request(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	if (ring->semaphore.signal)
> -		ret = ring->semaphore.signal(ring, 4);
> +		ret = ring->semaphore.signal(req, 4);
>   	else
> -		ret = intel_ring_begin(ring, 4);
> +		ret = intel_ring_begin(req, 4);
>   
>   	if (ret)
>   		return ret;
>   
>   	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>   	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>   	intel_ring_emit(ring, MI_USER_INTERRUPT);
>   	__intel_ring_advance(ring);
>   
> @@ -1061,14 +1069,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
>    */
>   
>   static int
> -gen8_ring_sync(struct intel_engine_cs *waiter,
> +gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
>   	       struct intel_engine_cs *signaller,
>   	       u32 seqno)
>   {
> +	struct intel_engine_cs *waiter = waiter_req->ring;
>   	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
>   	int ret;
>   
> -	ret = intel_ring_begin(waiter, 4);
> +	ret = intel_ring_begin(waiter_req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -1086,10 +1095,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter,
>   }
>   
>   static int
> -gen6_ring_sync(struct intel_engine_cs *waiter,
> +gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
>   	       struct intel_engine_cs *signaller,
>   	       u32 seqno)
>   {
> +	struct intel_engine_cs *waiter = waiter_req->ring;
>   	u32 dw1 = MI_SEMAPHORE_MBOX |
>   		  MI_SEMAPHORE_COMPARE |
>   		  MI_SEMAPHORE_REGISTER;
> @@ -1104,7 +1114,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter,
>   
>   	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
>   
> -	ret = intel_ring_begin(waiter, 4);
> +	ret = intel_ring_begin(waiter_req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -1135,8 +1145,9 @@ do {									\
>   } while (0)
>   
>   static int
> -pc_render_add_request(struct intel_engine_cs *ring)
> +pc_render_add_request(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>   	int ret;
>   
> @@ -1148,7 +1159,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>   	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
>   	 * memory before requesting an interrupt.
>   	 */
> -	ret = intel_ring_begin(ring, 32);
> +	ret = intel_ring_begin(req, 32);
>   	if (ret)
>   		return ret;
>   
> @@ -1156,8 +1167,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>   			PIPE_CONTROL_WRITE_FLUSH |
>   			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
>   	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>   	intel_ring_emit(ring, 0);
>   	PIPE_CONTROL_FLUSH(ring, scratch_addr);
>   	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -1176,8 +1186,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>   			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
>   			PIPE_CONTROL_NOTIFY);
>   	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>   	intel_ring_emit(ring, 0);
>   	__intel_ring_advance(ring);
>   
> @@ -1390,13 +1399,14 @@ void intel_ring_setup_status_page(struct intel_engine_cs *ring)
>   }
>   
>   static int
> -bsd_ring_flush(struct intel_engine_cs *ring,
> +bsd_ring_flush(struct drm_i915_gem_request *req,
>   	       u32     invalidate_domains,
>   	       u32     flush_domains)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -1407,18 +1417,18 @@ bsd_ring_flush(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -i9xx_add_request(struct intel_engine_cs *ring)
> +i9xx_add_request(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
>   	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>   	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>   	intel_ring_emit(ring, MI_USER_INTERRUPT);
>   	__intel_ring_advance(ring);
>   
> @@ -1550,13 +1560,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring)
>   }
>   
>   static int
> -i965_dispatch_execbuffer(struct intel_engine_cs *ring,
> +i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
>   			 u64 offset, u32 length,
>   			 unsigned flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -1575,14 +1586,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
>   #define I830_TLB_ENTRIES (2)
>   #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
>   static int
> -i830_dispatch_execbuffer(struct intel_engine_cs *ring,
> -				u64 offset, u32 len,
> -				unsigned flags)
> +i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
> +			 u64 offset, u32 len,
> +			 unsigned flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	u32 cs_offset = ring->scratch.gtt_offset;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>   	if (ret)
>   		return ret;
>   
> @@ -1599,7 +1611,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>   		if (len > I830_BATCH_LIMIT)
>   			return -ENOSPC;
>   
> -		ret = intel_ring_begin(ring, 6 + 2);
> +		ret = intel_ring_begin(req, 6 + 2);
>   		if (ret)
>   			return ret;
>   
> @@ -1622,7 +1634,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>   		offset = cs_offset;
>   	}
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -1636,13 +1648,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -i915_dispatch_execbuffer(struct intel_engine_cs *ring,
> +i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
>   			 u64 offset, u32 len,
>   			 unsigned flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -1885,6 +1898,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>   
>   	intel_unpin_ringbuffer_obj(ringbuf);
>   	intel_destroy_ringbuffer_obj(ringbuf);
> +	WARN_ON(ring->outstanding_lazy_request);
>   	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>   
>   	if (ring->cleanup)
> @@ -2007,8 +2021,9 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>   	int ret;
>   
>   	/* We need to add any requests required to flush the objects and ring */
> +	WARN_ON(ring->outstanding_lazy_request);
>   	if (ring->outstanding_lazy_request) {
> -		ret = i915_add_request(ring);
> +		ret = i915_add_request(ring->outstanding_lazy_request);
>   		if (ret)
>   			return ret;
>   	}
> @@ -2025,13 +2040,18 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>   }
>   
>   int
> -intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx)
> +intel_ring_alloc_request(struct intel_engine_cs *ring,
> +			 struct intel_context *ctx,
> +			 struct drm_i915_gem_request **req_out)
>   {
>   	int ret;
>   	struct drm_i915_gem_request *request;
>   	struct drm_i915_private *dev_private = ring->dev->dev_private;
>   
> -	if (ring->outstanding_lazy_request)
> +	if (!req_out)
> +		return -EINVAL;
> +
> +	if ((*req_out = ring->outstanding_lazy_request) != NULL)
>   		return 0;
>   
>   	request = kzalloc(sizeof(*request), GFP_KERNEL);
> @@ -2053,7 +2073,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx
>   	spewThisReq(request, "\x1B[32mCreated: %d:%d, ref => %d\x1B[0m", request->uniq, request->seqno, request->ref.refcount.counter);
>   
>   	//printk(KERN_INFO "%s:%d> <%s> OLR = 0x%p, uniq = %d, seqno = %d\n", __func__, __LINE__, ring->name, request, request->uniq, request->seqno);
> -	ring->outstanding_lazy_request = request;
> +	*req_out = ring->outstanding_lazy_request = request;
>   	return 0;
>   }
>   
> @@ -2078,9 +2098,10 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
>   	return 0;
>   }
>   
> -int intel_ring_begin(struct intel_engine_cs *ring,
> +int intel_ring_begin(struct drm_i915_gem_request *req,
>   		     int num_dwords)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   	int ret;
>   
> @@ -2093,18 +2114,14 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>   	if (ret)
>   		return ret;
>   
> -	/* Preallocate the olr before touching the ring */
> -	ret = intel_ring_alloc_request(ring, NULL);
> -	if (ret)
> -		return ret;
> -
>   	ring->buffer->space -= num_dwords * sizeof(uint32_t);
>   	return 0;
>   }
>   
>   /* Align the ring tail to a cacheline boundary */
> -int intel_ring_cacheline_align(struct intel_engine_cs *ring)
> +int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
>   	int ret;
>   
> @@ -2112,7 +2129,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring)
>   		return 0;
>   
>   	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
> -	ret = intel_ring_begin(ring, num_dwords);
> +	ret = intel_ring_begin(req, num_dwords);
>   	if (ret)
>   		return ret;
>   
> @@ -2176,13 +2193,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
>   		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
>   }
>   
> -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
> +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
>   			       u32 invalidate, u32 flush)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	uint32_t cmd;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -2212,14 +2230,15 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>   			      u64 offset, u32 len,
>   			      unsigned flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -2234,13 +2253,14 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>   			      u64 offset, u32 len,
>   			      unsigned flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -2256,13 +2276,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
>   }
>   
>   static int
> -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>   			      u64 offset, u32 len,
>   			      unsigned flags)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>   	if (ret)
>   		return ret;
>   
> @@ -2278,15 +2299,16 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
>   
>   /* Blitter support (SandyBridge+) */
>   
> -static int gen6_ring_flush(struct intel_engine_cs *ring,
> +static int gen6_ring_flush(struct drm_i915_gem_request *req,
>   			   u32 invalidate, u32 flush)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	uint32_t cmd;
>   	int ret;
>   
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>   	if (ret)
>   		return ret;
>   
> @@ -2315,7 +2337,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>   
>   	if (!invalidate && flush) {
>   		if (IS_GEN7(dev))
> -			return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
> +			return gen7_ring_fbc_flush(req, FBC_REND_CACHE_CLEAN);
>   		else if (IS_BROADWELL(dev))
>   			dev_priv->fbc.need_sw_cache_clean = true;
>   	}
> @@ -2696,14 +2718,15 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
>   }
>   
>   int
> -intel_ring_flush_all_caches(struct intel_engine_cs *ring)
> +intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	int ret;
>   
>   	if (!ring->gpu_caches_dirty)
>   		return 0;
>   
> -	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
> +	ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
>   	if (ret)
>   		return ret;
>   
> @@ -2714,8 +2737,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs *ring)
>   }
>   
>   int
> -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
> +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *ring = req->ring;
>   	uint32_t flush_domains;
>   	int ret;
>   
> @@ -2723,7 +2747,7 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
>   	if (ring->gpu_caches_dirty)
>   		flush_domains = I915_GEM_GPU_DOMAINS;
>   
> -	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
> +	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
>   	if (ret)
>   		return ret;
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 48cbb00..a7e47ad 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -154,15 +154,15 @@ struct  intel_engine_cs {
>   
>   	int		(*init_hw)(struct intel_engine_cs *ring);
>   
> -	int		(*init_context)(struct intel_engine_cs *ring,
> +	int		(*init_context)(struct drm_i915_gem_request *req,
>   					struct intel_context *ctx);
>   
>   	void		(*write_tail)(struct intel_engine_cs *ring,
>   				      u32 value);
> -	int __must_check (*flush)(struct intel_engine_cs *ring,
> +	int __must_check (*flush)(struct drm_i915_gem_request *req,
>   				  u32	invalidate_domains,
>   				  u32	flush_domains);
> -	int		(*add_request)(struct intel_engine_cs *ring);
> +	int		(*add_request)(struct drm_i915_gem_request *req);
>   	/* Some chipsets are not quite as coherent as advertised and need
>   	 * an expensive kick to force a true read of the up-to-date seqno.
>   	 * However, the up-to-date seqno is not always required and the last
> @@ -173,7 +173,7 @@ struct  intel_engine_cs {
>   				     bool lazy_coherency);
>   	void		(*set_seqno)(struct intel_engine_cs *ring,
>   				     u32 seqno);
> -	int		(*dispatch_execbuffer)(struct intel_engine_cs *ring,
> +	int		(*dispatch_execbuffer)(struct drm_i915_gem_request *req,
>   					       u64 offset, u32 length,
>   					       unsigned dispatch_flags);
>   #define I915_DISPATCH_SECURE 0x1
> @@ -231,10 +231,10 @@ struct  intel_engine_cs {
>   		};
>   
>   		/* AKA wait() */
> -		int	(*sync_to)(struct intel_engine_cs *ring,
> -				   struct intel_engine_cs *to,
> +		int	(*sync_to)(struct drm_i915_gem_request *to_req,
> +				   struct intel_engine_cs *from,
>   				   u32 seqno);
> -		int	(*signal)(struct intel_engine_cs *signaller,
> +		int	(*signal)(struct drm_i915_gem_request *signaller_req,
>   				  /* num_dwords needed by caller */
>   				  unsigned int num_dwords);
>   	} semaphore;
> @@ -245,11 +245,11 @@ struct  intel_engine_cs {
>   	struct list_head execlist_retired_req_list;
>   	u8 next_context_status_buffer;
>   	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
> -	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
> -	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
> +	int		(*emit_request)(struct drm_i915_gem_request *req);
> +	int		(*emit_flush)(struct drm_i915_gem_request *req,
>   				      u32 invalidate_domains,
>   				      u32 flush_domains);
> -	int		(*emit_bb_start)(struct intel_ringbuffer *ringbuf,
> +	int		(*emit_bb_start)(struct drm_i915_gem_request *req,
>   					 u64 offset, unsigned flags);
>   
>   	/**
> @@ -433,10 +433,11 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>   void intel_stop_ring_buffer(struct intel_engine_cs *ring);
>   void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
>   
> -int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
> -int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
> +int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
> +int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
>   int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring,
> -					  struct intel_context *ctx);
> +					  struct intel_context *ctx,
> +					  struct drm_i915_gem_request **req_out);
>   static inline void intel_ring_emit(struct intel_engine_cs *ring,
>   				   u32 data)
>   {
> @@ -457,8 +458,8 @@ void __intel_ring_advance(struct intel_engine_cs *ring);
>   
>   int __must_check intel_ring_idle(struct intel_engine_cs *ring);
>   void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
> -int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
> -int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
> +int intel_ring_flush_all_caches(struct drm_i915_gem_request *req);
> +int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
>   
>   void intel_fini_pipe_control(struct intel_engine_cs *ring);
>   int intel_init_pipe_control(struct intel_engine_cs *ring);
> @@ -479,11 +480,4 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>   	return ringbuf->tail;
>   }
>   
> -static inline struct drm_i915_gem_request *
> -intel_ring_get_request(struct intel_engine_cs *ring)
> -{
> -	BUG_ON(ring->outstanding_lazy_request == NULL);
> -	return ring->outstanding_lazy_request;
> -}
> -
>   #endif /* _INTEL_RINGBUFFER_H_ */

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/i915: Remove OLR
  2014-12-19 14:41 [PATCH] drm/i915: Remove OLR John.C.Harrison
  2014-12-19 14:43 ` John Harrison
@ 2015-01-06 13:52 ` Daniel Vetter
  1 sibling, 0 replies; 3+ messages in thread
From: Daniel Vetter @ 2015-01-06 13:52 UTC (permalink / raw)
  To: John.C.Harrison; +Cc: Intel-GFX

On Fri, Dec 19, 2014 at 02:41:05PM +0000, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> The outstanding lazy request mechanism does not really work well with
> a GPU scheduler. The scheduler expects each work packet, i.e. request
> structure, to be a complete entity and to belong to one and only one
> submitter. Whereas the whole lazy mechanism allows lots of work from
> lots of different places to all be lumped together into a single
> request. It also means that work is floating around in the system
> unowned and untracked at various random points in time. This all
> causes headaches for the scheduler.
> 
> This patch removes the need for the outstanding lazy request. It
> converts all functions which would otherwise be relying on the OLR to
> explicitly manage the request. Either by allocating, passing and
> submitting the request if they are the top level owner. Or by simply
> taking a request in as a parameter rather than pulling it out of the
> magic global variable if they are a client. The OLR itself is left in
> along with a bunch of sanity check asserts that it matches the request
> being passed in as a parameter. However, it should now be safe to
> remove completely.
> 
> Note that this patch is not intended as a final, shipping, isn't it
> gorgeous, end product. It is merely a quick hack that I went through
> as being the simplest way to actually work out what the real sequence
> of events and the real ownership of work is in certain circumstances.
> Most particularly to do with display and overlay work. However, I
> would like to get agreement that it is a good direction to go in and
> that removing the OLR would be a good thing. Or, to put it another
> way, is it worth me trying to break this patch into a set of
> manageable items or do I just abandon it and give up?
> 
> Note also that the patch is based on a tree including the scheduler
> prep-work patches posted earlier. So it will not apply to a clean
> nightly tree.
> 
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>

Summarizing offline discussions from a meeting about John's rfc here:

I definitely like where this is going, using requests as the primary
object to submit work to the gpu should simplify our code a lot. And
getting rid of the olr will remove a lot of the accidental complexity in
gem. I also looked at some of the details here with John specifically that
he chuffles the init_hw functions around a bit to just have 1 request to
wrap all the ring init (default ctx, ppgtt, l3 remapping).

For the details it'd be good to discuss this all with Chris since he's got
a working poc for this, just to make sure you know about all the dragons
potentially lurking around.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h              |   29 ++--
>  drivers/gpu/drm/i915/i915_gem.c              |  182 ++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_context.c      |   69 +++-----
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   62 +++----
>  drivers/gpu/drm/i915/i915_gem_gtt.c          |   64 ++++----
>  drivers/gpu/drm/i915/i915_gem_gtt.h          |    3 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.c |   10 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.h |    2 +-
>  drivers/gpu/drm/i915/intel_display.c         |   68 ++++----
>  drivers/gpu/drm/i915/intel_lrc.c             |  145 +++++++++-------
>  drivers/gpu/drm/i915/intel_lrc.h             |    8 +-
>  drivers/gpu/drm/i915/intel_overlay.c         |   58 ++++---
>  drivers/gpu/drm/i915/intel_pm.c              |   33 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  228 ++++++++++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |   38 ++---
>  15 files changed, 553 insertions(+), 446 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 511f55f..7b4309e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -513,7 +513,7 @@ struct drm_i915_display_funcs {
>  	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
>  			  struct drm_framebuffer *fb,
>  			  struct drm_i915_gem_object *obj,
> -			  struct intel_engine_cs *ring,
> +			  struct drm_i915_gem_request *req,
>  			  uint32_t flags);
>  	void (*update_primary_plane)(struct drm_crtc *crtc,
>  				     struct drm_framebuffer *fb,
> @@ -1796,7 +1796,8 @@ struct drm_i915_private {
>  	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>  	struct {
>  		int (*alloc_request)(struct intel_engine_cs *ring,
> -				     struct intel_context *ctx);
> +				     struct intel_context *ctx,
> +				     struct drm_i915_gem_request **req_out);
>  		int (*do_execbuf)(struct i915_execbuffer_params *params,
>  				  struct drm_i915_gem_execbuffer2 *args,
>  				  struct list_head *vmas);
> @@ -2511,10 +2512,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>  			     struct drm_file *file_priv);
>  void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> -					struct intel_engine_cs *ring);
> +					struct drm_i915_gem_request *req);
>  void i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  					 struct drm_file *file,
> -					 struct intel_engine_cs *ring,
> +					 struct drm_i915_gem_request *req,
>  					 struct drm_i915_gem_object *obj);
>  void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj);
>  int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe,
> @@ -2609,9 +2610,9 @@ int __must_check __i915_mutex_lock_interruptible(struct drm_device *dev, const c
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  #endif
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -			 struct intel_engine_cs *to, bool add_request);
> +			 struct drm_i915_gem_request *to_req);
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring);
> +			     struct drm_i915_gem_request *req);
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>  			 struct drm_device *dev,
>  			 struct drm_mode_create_dumb *args);
> @@ -2678,19 +2679,19 @@ int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
>  int __must_check i915_gem_init(struct drm_device *dev);
>  int i915_gem_init_rings(struct drm_device *dev);
>  int __must_check i915_gem_init_hw(struct drm_device *dev);
> -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
> +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>  void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
> -int __i915_add_request(struct intel_engine_cs *ring,
> +int __i915_add_request(struct drm_i915_gem_request *req,
>  		       struct drm_file *file,
>  		       struct drm_i915_gem_object *batch_obj,
>  		       bool flush_caches);
> -#define i915_add_request(ring) \
> -	__i915_add_request(ring, NULL, NULL, true)
> -#define i915_add_request_no_flush(ring) \
> -	__i915_add_request(ring, NULL, NULL, false)
> +#define i915_add_request(req) \
> +	__i915_add_request(req, NULL, NULL, true)
> +#define i915_add_request_no_flush(req) \
> +	__i915_add_request(req, NULL, NULL, false)
>  int __i915_wait_request(struct drm_i915_gem_request *req,
>  			unsigned reset_counter,
>  			bool interruptible,
> @@ -2810,9 +2811,9 @@ int __must_check i915_gem_context_init(struct drm_device *dev);
>  void i915_gem_context_fini(struct drm_device *dev);
>  void i915_gem_context_reset(struct drm_device *dev);
>  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> -int i915_gem_context_enable(struct drm_i915_private *dev_priv);
> +int i915_gem_context_enable(struct drm_i915_gem_request *req);
>  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
> -int i915_switch_context(struct intel_engine_cs *ring,
> +int i915_switch_context(struct drm_i915_gem_request *req,
>  			struct intel_context *to);
>  struct intel_context *
>  i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1d2cbfb..dbfb4e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1178,7 +1178,7 @@ i915_gem_check_olr(struct drm_i915_gem_request *req)
>  
>  	ret = 0;
>  	if (req == req->ring->outstanding_lazy_request)
> -		ret = i915_add_request(req->ring);
> +		ret = i915_add_request(req);
>  
>  	return ret;
>  }
> @@ -2294,17 +2294,16 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  
>  static void
>  i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> -			       struct intel_engine_cs *ring)
> +			       struct drm_i915_gem_request *req)
>  {
> -	struct drm_i915_gem_request *req;
> -	struct intel_engine_cs *old_ring;
> +	struct intel_engine_cs *new_ring, *old_ring;
>  
> -	BUG_ON(ring == NULL);
> +	BUG_ON(req == NULL);
>  
> -	req = intel_ring_get_request(ring);
> +	new_ring = i915_gem_request_get_ring(req);
>  	old_ring = i915_gem_request_get_ring(obj->last_read_req);
>  
> -	if (old_ring != ring && obj->last_write_req) {
> +	if (old_ring != new_ring && obj->last_write_req) {
>  		/* Keep the request relative to the current ring */
>  		i915_gem_request_assign(&obj->last_write_req, req);
>  	}
> @@ -2315,17 +2314,17 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>  		obj->active = 1;
>  	}
>  
> -	list_move_tail(&obj->ring_list, &ring->active_list);
> +	list_move_tail(&obj->ring_list, &new_ring->active_list);
>  
> -	//printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, ring->name, obj, req);
> +	//printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, new_ring->name, obj, req);
>  	i915_gem_request_assign(&obj->last_read_req, req);
>  }
>  
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct intel_engine_cs *ring)
> +			     struct drm_i915_gem_request *req)
>  {
>  	list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -	return i915_gem_object_move_to_active(vma->obj, ring);
> +	return i915_gem_object_move_to_active(vma->obj, req);
>  }
>  
>  static void
> @@ -2440,26 +2439,35 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  	return 0;
>  }
>  
> -int __i915_add_request(struct intel_engine_cs *ring,
> +int __i915_add_request(struct drm_i915_gem_request *request,
>  		       struct drm_file *file,
>  		       struct drm_i915_gem_object *obj,
>  		       bool flush_caches)
>  {
> -	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	struct drm_i915_gem_request *request;
> +	struct intel_engine_cs *ring;
> +	struct drm_i915_private *dev_priv;
>  	struct intel_ringbuffer *ringbuf;
>  	u32 request_ring_position, request_start;
>  	int ret;
>  
> -	request = ring->outstanding_lazy_request;
> +	/*printk( KERN_ERR "<%s> request %c %d:%d, OLR %c %d:%d\n",
> +		request ? request->ring->name : "???",
> +		request ? '=' : '?',
> +		request ? request->uniq : -1,
> +		request ? request->seqno : 0,
> +		request->ring->outstanding_lazy_request ? '=' : '?',
> +		request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->uniq : -1,
> +		request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->seqno : 0);*/
> +	//dump_stack();
> +
>  	if (WARN_ON(request == NULL))
>  		return -ENOMEM;
>  
> -	if (i915.enable_execlists) {
> -		struct intel_context *ctx = request->ctx;
> -		ringbuf = ctx->engine[ring->id].ringbuf;
> -	} else
> -		ringbuf = ring->buffer;
> +	ring = request->ring;
> +	dev_priv = ring->dev->dev_private;
> +	ringbuf = request->ringbuf;
> +
> +	WARN_ON(request != ring->outstanding_lazy_request);
>  
>  	request_start = intel_ring_get_tail(ringbuf);
>  	/*
> @@ -2471,9 +2479,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	 */
>  	if (flush_caches) {
>  		if (i915.enable_execlists)
> -			ret = logical_ring_flush_all_caches(ringbuf);
> +			ret = logical_ring_flush_all_caches(request);
>  		else
> -			ret = intel_ring_flush_all_caches(ring);
> +			ret = intel_ring_flush_all_caches(request);
>  		if (ret)
>  			return ret;
>  	}
> @@ -2488,9 +2496,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	request_ring_position = intel_ring_get_tail(ringbuf);
>  
>  	if (i915.enable_execlists)
> -		ret = ring->emit_request(ringbuf);
> +		ret = ring->emit_request(request);
>  	else
> -		ret = ring->add_request(ring);
> +		ret = ring->add_request(request);
>  	if (ret)
>  		return ret;
>  
> @@ -2504,7 +2512,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	 * inactive_list and lose its active reference. Hence we do not need
>  	 * to explicitly hold another reference here.
>  	 */
> -	request->batch_obj = obj;
> +	if (obj)
> +		request->batch_obj = obj;
>  
>  	if (!i915.enable_execlists) {
>  		/* Hold a reference to the current context so that we can inspect
> @@ -2744,6 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>  #endif
>  
>  	/* This may not have been flushed before the reset, so clean it now */
> +	WARN_ON(ring->outstanding_lazy_request);
>  	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>  }
>  
> @@ -3114,8 +3124,6 @@ out:
>   *
>   * @obj: object which may be in use on another ring.
>   * @to: ring we wish to use the object on. May be NULL.
> - * @add_request: do we need to add a request to track operations
> - *    submitted on ring with sync_to function
>   *
>   * This code is meant to abstract object synchronization with the GPU.
>   * Calling with NULL implies synchronizing the object with the CPU
> @@ -3125,8 +3133,9 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to, bool add_request)
> +		     struct drm_i915_gem_request *to_req)
>  {
> +	struct intel_engine_cs *to = to_req->ring;
>  	struct intel_engine_cs *from;
>  	u32 seqno;
>  	int ret, idx;
> @@ -3152,7 +3161,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  		return ret;
>  
>  	trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
> -	ret = to->semaphore.sync_to(to, from, seqno);
> +	ret = to->semaphore.sync_to(to_req, from, seqno);
>  	if (!ret) {
>  		/* We use last_read_req because sync_to()
>  		 * might have just caused seqno wrap under
> @@ -3160,8 +3169,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  		 */
>  		from->semaphore.sync_seqno[idx] =
>  				i915_gem_request_get_seqno(obj->last_read_req);
> -		if (add_request)
> -			i915_add_request_no_flush(to);
>  	}
>  
>  	return ret;
> @@ -3266,18 +3273,23 @@ int i915_gpu_idle(struct drm_device *dev)
>  	/* Flush everything onto the inactive list. */
>  	for_each_ring(ring, dev_priv, i) {
>  		if (!i915.enable_execlists) {
> -			ret = i915_switch_context(ring, ring->default_context);
> +			struct drm_i915_gem_request *req;
> +
> +			ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
>  			if (ret)
>  				return ret;
> -		}
>  
> -		/* Make sure the context switch (if one actually happened)
> -		 * gets wrapped up and finished rather than hanging around
> -		 * and confusing things later. */
> -		if (ring->outstanding_lazy_request) {
> -			ret = i915_add_request(ring);
> -			if (ret)
> +			ret = i915_switch_context(req, ring->default_context);
> +			if (ret) {
> +				i915_gem_request_unreference(req);
>  				return ret;
> +			}
> +
> +			ret = i915_add_request_no_flush(req);
> +			if (ret) {
> +				i915_gem_request_unreference(req);
> +				return ret;
> +			}
>  		}
>  
>  		ret = intel_ring_idle(ring);
> @@ -4099,8 +4111,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	bool was_pin_display;
>  	int ret;
>  
> -	if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
> -		ret = i915_gem_object_sync(obj, pipelined, true);
> +	if (pipelined && (pipelined != i915_gem_request_get_ring(obj->last_read_req))) {
> +		struct drm_i915_private *dev_priv = pipelined->dev->dev_private;
> +		struct drm_i915_gem_request *req;
> +
> +		ret = dev_priv->gt.alloc_request(pipelined, pipelined->default_context, &req);
> +		if (ret)
> +			return ret;
> +
> +		ret = i915_gem_object_sync(obj, req);
> +		if (ret)
> +			return ret;
> +
> +		ret = i915_add_request_no_flush(req);
>  		if (ret)
>  			return ret;
>  	}
> @@ -4771,8 +4794,9 @@ err:
>  	return ret;
>  }
>  
> -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
> +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
> @@ -4782,7 +4806,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
>  	if (!HAS_L3_DPF(dev) || !remap_info)
>  		return 0;
>  
> -	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
> +	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
>  	if (ret)
>  		return ret;
>  
> @@ -4962,37 +4986,67 @@ i915_gem_init_hw(struct drm_device *dev)
>  	 */
>  	init_unused_rings(dev);
>  
> +	BUG_ON(!dev_priv->ring[RCS].default_context);
> +
> +	ret = i915_ppgtt_init_hw(dev);
> +	if (ret) {
> +		DRM_ERROR("PPGTT enable failed %d\n", ret);
> +		i915_gem_cleanup_ringbuffer(dev);
> +		return ret;
> +	}
> +
>  	for_each_ring(ring, dev_priv, i) {
> +		struct drm_i915_gem_request *req;
> +
>  		ret = ring->init_hw(ring);
>  		if (ret)
>  			return ret;
> -	}
>  
> -	for (i = 0; i < NUM_L3_SLICES(dev); i++)
> -		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
> +		if (!ring->default_context)
> +			continue;
>  
> -	/*
> -	 * XXX: Contexts should only be initialized once. Doing a switch to the
> -	 * default context switch however is something we'd like to do after
> -	 * reset or thaw (the latter may not actually be necessary for HW, but
> -	 * goes with our code better). Context switching requires rings (for
> -	 * the do_switch), but before enabling PPGTT. So don't move this.
> -	 */
> -	ret = i915_gem_context_enable(dev_priv);
> -	if (ret && ret != -EIO) {
> -		DRM_ERROR("Context enable failed %d\n", ret);
> -		i915_gem_cleanup_ringbuffer(dev);
> +		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +		if (ret)
> +			return ret;
>  
> -		return ret;
> -	}
> +		if (ring->id == RCS) {
> +			for (i = 0; i < NUM_L3_SLICES(dev); i++)
> +				i915_gem_l3_remap(req, i);
> +		}
>  
> -	ret = i915_ppgtt_init_hw(dev);
> -	if (ret && ret != -EIO) {
> -		DRM_ERROR("PPGTT enable failed %d\n", ret);
> -		i915_gem_cleanup_ringbuffer(dev);
> +		/*
> +		 * XXX: Contexts should only be initialized once. Doing a switch to the
> +		 * default context switch however is something we'd like to do after
> +		 * reset or thaw (the latter may not actually be necessary for HW, but
> +		 * goes with our code better). Context switching requires rings (for
> +		 * the do_switch), but before enabling PPGTT. So don't move this.
> +		 */
> +		ret = i915_gem_context_enable(req);
> +		if (ret && ret != -EIO) {
> +			DRM_ERROR("Context enable failed %d\n", ret);
> +			i915_gem_request_unreference(req);
> +			i915_gem_cleanup_ringbuffer(dev);
> +
> +			return ret;
> +		}
> +
> +		ret = i915_ppgtt_init_ring(req);
> +		if (ret && ret != -EIO) {
> +			DRM_ERROR("PPGTT enable failed %d\n", ret);
> +			i915_gem_request_unreference(req);
> +			i915_gem_cleanup_ringbuffer(dev);
> +		}
> +
> +		ret = i915_add_request_no_flush(req);
> +		if (ret) {
> +			DRM_ERROR("Add request failed: %d\n", ret);
> +			i915_gem_request_unreference(req);
> +			i915_gem_cleanup_ringbuffer(dev);
> +			return ret;
> +		}
>  	}
>  
> -	return ret;
> +	return 0;
>  }
>  
>  int i915_gem_init(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index c5e1bfc..72e280b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -401,41 +401,23 @@ void i915_gem_context_fini(struct drm_device *dev)
>  	i915_gem_context_unreference(dctx);
>  }
>  
> -int i915_gem_context_enable(struct drm_i915_private *dev_priv)
> +int i915_gem_context_enable(struct drm_i915_gem_request *req)
>  {
> -	struct intel_engine_cs *ring;
> -	int ret, i;
> -
> -	BUG_ON(!dev_priv->ring[RCS].default_context);
> +	struct intel_engine_cs *ring = req->ring;
> +	int ret;
>  
>  	if (i915.enable_execlists) {
> -		for_each_ring(ring, dev_priv, i) {
> -			if (ring->init_context) {
> -				ret = ring->init_context(ring,
> -						ring->default_context);
> -				if (ret) {
> -					DRM_ERROR("ring init context: %d\n",
> -							ret);
> -					return ret;
> -				}
> -			}
> -		}
> +		if (ring->init_context == NULL)
> +			return 0;
>  
> +		ret = ring->init_context(req, ring->default_context);
>  	} else
> -		for_each_ring(ring, dev_priv, i) {
> -			ret = i915_switch_context(ring, ring->default_context);
> -			if (ret)
> -				return ret;
> -
> -			/* Make sure the context switch (if one actually happened)
> -			 * gets wrapped up and finished rather than hanging around
> -			 * and confusing things later. */
> -			if (ring->outstanding_lazy_request) {
> -				ret = i915_add_request_no_flush(ring);
> -				if (ret)
> -					return ret;
> -			}
> -		}
> +		ret = i915_switch_context(req, ring->default_context);
> +
> +	if (ret) {
> +		DRM_ERROR("ring init context: %d\n", ret);
> +		return ret;
> +	}
>  
>  	return 0;
>  }
> @@ -488,10 +470,11 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
>  }
>  
>  static inline int
> -mi_set_context(struct intel_engine_cs *ring,
> +mi_set_context(struct drm_i915_gem_request *req,
>  	       struct intel_context *new_context,
>  	       u32 hw_flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	u32 flags = hw_flags | MI_MM_SPACE_GTT;
>  	int ret;
>  
> @@ -501,7 +484,7 @@ mi_set_context(struct intel_engine_cs *ring,
>  	 * itlb_before_ctx_switch.
>  	 */
>  	if (IS_GEN6(ring->dev)) {
> -		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
> +		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
>  		if (ret)
>  			return ret;
>  	}
> @@ -510,7 +493,7 @@ mi_set_context(struct intel_engine_cs *ring,
>  	if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
>  		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -540,9 +523,10 @@ mi_set_context(struct intel_engine_cs *ring,
>  	return ret;
>  }
>  
> -static int do_switch(struct intel_engine_cs *ring,
> +static int do_switch(struct drm_i915_gem_request *req,
>  		     struct intel_context *to)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  	struct intel_context *from = ring->last_context;
>  	u32 hw_flags = 0;
> @@ -577,7 +561,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  
>  	if (to->ppgtt) {
>  		trace_switch_mm(ring, to);
> -		ret = to->ppgtt->switch_mm(to->ppgtt, ring);
> +		ret = to->ppgtt->switch_mm(to->ppgtt, req);
>  		if (ret)
>  			goto unpin_out;
>  	}
> @@ -608,7 +592,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
>  		hw_flags |= MI_RESTORE_INHIBIT;
>  
> -	ret = mi_set_context(ring, to, hw_flags);
> +	ret = mi_set_context(req, to, hw_flags);
>  	if (ret)
>  		goto unpin_out;
>  
> @@ -616,7 +600,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  		if (!(to->remap_slice & (1<<i)))
>  			continue;
>  
> -		ret = i915_gem_l3_remap(ring, i);
> +		ret = i915_gem_l3_remap(req, i);
>  		/* If it failed, try again next round */
>  		if (ret)
>  			DRM_DEBUG_DRIVER("L3 remapping failed\n");
> @@ -632,7 +616,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  	 */
>  	if (from != NULL) {
>  		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> -		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
> +		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req);
>  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>  		 * whole damn pipeline, we don't need to explicitly mark the
>  		 * object dirty. The only exception is that the context must be
> @@ -658,12 +642,12 @@ done:
>  
>  	if (uninitialized) {
>  		if (ring->init_context) {
> -			ret = ring->init_context(ring, to);
> +			ret = ring->init_context(req, to);
>  			if (ret)
>  				DRM_ERROR("ring init context: %d\n", ret);
>  		}
>  
> -		ret = i915_gem_render_state_init(ring);
> +		ret = i915_gem_render_state_init(req);
>  		if (ret)
>  			DRM_ERROR("init render state: %d\n", ret);
>  	}
> @@ -690,9 +674,10 @@ unpin_out:
>   * switched by writing to the ELSP and requests keep a reference to their
>   * context.
>   */
> -int i915_switch_context(struct intel_engine_cs *ring,
> +int i915_switch_context(struct drm_i915_gem_request *req,
>  			struct intel_context *to)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  
>  	WARN_ON(i915.enable_execlists);
> @@ -708,7 +693,7 @@ int i915_switch_context(struct intel_engine_cs *ring,
>  		return 0;
>  	}
>  
> -	return do_switch(ring, to);
> +	return do_switch(req, to);
>  }
>  
>  static bool contexts_enabled(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index ca31673..5caa2a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -822,7 +822,7 @@ err:
>  }
>  
>  static int
> -i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
> +i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  				struct list_head *vmas)
>  {
>  	struct i915_vma *vma;
> @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
> -		ret = i915_gem_object_sync(obj, ring, false);
> +		ret = i915_gem_object_sync(obj, req);
>  		if (ret)
>  			return ret;
>  
> @@ -843,7 +843,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  	}
>  
>  	if (flush_chipset)
> -		i915_gem_chipset_flush(ring->dev);
> +		i915_gem_chipset_flush(req->ring->dev);
>  
>  	if (flush_domains & I915_GEM_DOMAIN_GTT)
>  		wmb();
> @@ -941,9 +941,9 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
>  
>  void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> -				   struct intel_engine_cs *ring)
> +				   struct drm_i915_gem_request *req)
>  {
> -	struct drm_i915_gem_request *req = intel_ring_get_request(ring);
> +	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>  	struct i915_vma *vma;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
> @@ -957,7 +957,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  			obj->base.pending_read_domains |= obj->base.read_domains;
>  		obj->base.read_domains = obj->base.pending_read_domains;
>  
> -		i915_vma_move_to_active(vma, ring);
> +		i915_vma_move_to_active(vma, req);
>  		if (obj->base.write_domain) {
>  			obj->dirty = 1;
>  			i915_gem_request_assign(&obj->last_write_req, req);
> @@ -983,20 +983,21 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  void
>  i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  				    struct drm_file *file,
> -				    struct intel_engine_cs *ring,
> +				    struct drm_i915_gem_request *req,
>  				    struct drm_i915_gem_object *obj)
>  {
>  	/* Unconditionally force add_request to emit a full flush. */
> -	ring->gpu_caches_dirty = true;
> +	req->ring->gpu_caches_dirty = true;
>  
>  	/* Add a breadcrumb for the completion of the batch buffer */
> -	(void)__i915_add_request(ring, file, obj, true);
> +	(void)__i915_add_request(req, file, obj, true);
>  }
>  
>  static int
>  i915_reset_gen7_sol_offsets(struct drm_device *dev,
> -			    struct intel_engine_cs *ring)
> +			    struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int ret, i;
>  
> @@ -1005,7 +1006,7 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>  		return -EINVAL;
>  	}
>  
> -	ret = intel_ring_begin(ring, 4 * 3);
> +	ret = intel_ring_begin(req, 4 * 3);
>  	if (ret)
>  		return ret;
>  
> @@ -1021,10 +1022,11 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>  }
>  
>  static int
> -i915_emit_box(struct intel_engine_cs *ring,
> +i915_emit_box(struct drm_i915_gem_request *req,
>  	      struct drm_clip_rect *box,
>  	      int DR1, int DR4)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
> @@ -1035,7 +1037,7 @@ i915_emit_box(struct intel_engine_cs *ring,
>  	}
>  
>  	if (INTEL_INFO(ring->dev)->gen >= 4) {
> -		ret = intel_ring_begin(ring, 4);
> +		ret = intel_ring_begin(req, 4);
>  		if (ret)
>  			return ret;
>  
> @@ -1044,7 +1046,7 @@ i915_emit_box(struct intel_engine_cs *ring,
>  		intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
>  		intel_ring_emit(ring, DR4);
>  	} else {
> -		ret = intel_ring_begin(ring, 6);
> +		ret = intel_ring_begin(req, 6);
>  		if (ret)
>  			return ret;
>  
> @@ -1151,11 +1153,11 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  		goto error;
>  	}
>  
> -	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
> +	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
>  	if (ret)
>  		goto error;
>  
> -	i915_gem_execbuffer_move_to_active(vmas, ring);
> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
>  
>  	/* Make sure the OLR hasn't advanced (which would indicate a flush
>  	 * of the work in progress which in turn would be a Bad Thing). */
> @@ -1200,18 +1202,18 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
>  	/* Unconditionally invalidate gpu caches and ensure that we do flush
>  	 * any residual writes from the previous batch.
>  	 */
> -	ret = intel_ring_invalidate_all_caches(ring);
> +	ret = intel_ring_invalidate_all_caches(params->request);
>  	if (ret)
>  		goto error;
>  
>  	/* Switch to the correct context for the batch */
> -	ret = i915_switch_context(ring, params->ctx);
> +	ret = i915_switch_context(params->request, params->ctx);
>  	if (ret)
>  		goto error;
>  
>  	if (ring == &dev_priv->ring[RCS] &&
>  			params->instp_mode != dev_priv->relative_constants_mode) {
> -		ret = intel_ring_begin(ring, 4);
> +		ret = intel_ring_begin(params->request, 4);
>  		if (ret)
>  			goto error;
>  
> @@ -1225,7 +1227,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
>  	}
>  
>  	if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) {
> -		ret = i915_reset_gen7_sol_offsets(params->dev, ring);
> +		ret = i915_reset_gen7_sol_offsets(params->dev, params->request);
>  		if (ret)
>  			goto error;
>  	}
> @@ -1236,29 +1238,31 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
>  
>  	if (params->cliprects) {
>  		for (i = 0; i < params->args_num_cliprects; i++) {
> -			ret = i915_emit_box(ring, &params->cliprects[i],
> -					    params->args_DR1, params->args_DR4);
> +			ret = i915_emit_box(params->request,
> +					    &params->cliprects[i],
> +					    params->args_DR1,
> +					    params->args_DR4);
>  			if (ret)
>  				goto error;
>  
> -			ret = ring->dispatch_execbuffer(ring,
> +			ret = ring->dispatch_execbuffer(params->request,
>  							exec_start, exec_len,
>  							params->dispatch_flags);
>  			if (ret)
>  				goto error;
>  		}
>  	} else {
> -		ret = ring->dispatch_execbuffer(ring,
> +		ret = ring->dispatch_execbuffer(params->request,
>  						exec_start, exec_len,
>  						params->dispatch_flags);
>  		if (ret)
>  			goto error;
>  	}
>  
> -	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags);
> +	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>  
> -	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring,
> -					    params->batch_obj);
> +	i915_gem_execbuffer_retire_commands(params->dev, params->file,
> +					    params->request, params->batch_obj);
>  
>  error:
>  	/* intel_gpu_busy should also get a ref, so it will free when the device
> @@ -1490,10 +1494,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
>  
>  	/* Allocate a request for this batch buffer nice and early. */
> -	ret = dev_priv->gt.alloc_request(ring, ctx);
> +	ret = dev_priv->gt.alloc_request(ring, ctx, &params->request);
>  	if (ret)
>  		goto err;
> -	params->request = ring->outstanding_lazy_request;
> +	WARN_ON(params->request != ring->outstanding_lazy_request);
>  
>  	/* Save assorted stuff away to pass through to *_submission_final() */
>  	params->dev                     = dev;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7eead93..776776e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -213,14 +213,15 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
>  }
>  
>  /* Broadwell Page Directory Pointer Descriptors */
> -static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
> -			   uint64_t val)
> +static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry,
> +			  uint64_t val)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	BUG_ON(entry >= 4);
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -236,7 +237,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
>  }
>  
>  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring)
> +			  struct drm_i915_gem_request *req)
>  {
>  	int i, ret;
>  
> @@ -245,7 +246,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  
>  	for (i = used_pd - 1; i >= 0; i--) {
>  		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> -		ret = gen8_write_pdp(ring, i, addr);
> +		ret = gen8_write_pdp(req, i, addr);
>  		if (ret)
>  			return ret;
>  	}
> @@ -710,16 +711,17 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
>  }
>  
>  static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring)
> +			 struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
> -	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -735,16 +737,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  }
>  
>  static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring)
> +			  struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
> -	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -758,7 +761,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  
>  	/* XXX: RCS is the only one to auto invalidate the TLBs? */
>  	if (ring->id != RCS) {
> -		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>  		if (ret)
>  			return ret;
>  	}
> @@ -767,8 +770,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  }
>  
>  static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring)
> +			  struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> @@ -1125,11 +1129,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>  
>  int i915_ppgtt_init_hw(struct drm_device *dev)
>  {
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring;
> -	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> -	int i, ret = 0;
> -
>  	/* In the case of execlists, PPGTT is enabled by the context descriptor
>  	 * and the PDPs are contained within the context itself.  We don't
>  	 * need to do anything here. */
> @@ -1148,25 +1147,20 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
>  	else
>  		WARN_ON(1);
>  
> -	if (ppgtt) {
> -		for_each_ring(ring, dev_priv, i) {
> -			ret = ppgtt->switch_mm(ppgtt, ring);
> -			if (ret != 0)
> -				return ret;
> -
> -			/* Make sure the context switch (if one actually happened)
> -			 * gets wrapped up and finished rather than hanging around
> -			 * and confusing things later. */
> -			if (ring->outstanding_lazy_request) {
> -				ret = i915_add_request_no_flush(ring);
> -				if (ret)
> -					return ret;
> -			}
> -		}
> -	}
> +	return 0;
> +}
>  
> -	return ret;
> +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
> +{
> +	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
> +	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> +
> +	if (!ppgtt)
> +		return 0;
> +
> +	return ppgtt->switch_mm(ppgtt, req);
>  }
> +
>  struct i915_hw_ppgtt *
>  i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index dd849df..bee3e2a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -267,7 +267,7 @@ struct i915_hw_ppgtt {
>  
>  	int (*enable)(struct i915_hw_ppgtt *ppgtt);
>  	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring);
> +			 struct drm_i915_gem_request *req);
>  	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
>  };
>  
> @@ -278,6 +278,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev);
>  
>  int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
>  int i915_ppgtt_init_hw(struct drm_device *dev);
> +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req);
>  void i915_ppgtt_release(struct kref *kref);
>  struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
>  					struct drm_i915_file_private *fpriv);
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index aba39c3..0e0c23fe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -152,8 +152,9 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
>  	return 0;
>  }
>  
> -int i915_gem_render_state_init(struct intel_engine_cs *ring)
> +int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>  	struct render_state so;
>  	int ret;
>  
> @@ -164,16 +165,17 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>  	if (so.rodata == NULL)
>  		return 0;
>  
> -	ret = ring->dispatch_execbuffer(ring,
> +	ret = ring->dispatch_execbuffer(req,
>  					so.ggtt_offset,
>  					so.rodata->batch_items * 4,
>  					I915_DISPATCH_SECURE);
>  	if (ret)
>  		goto out;
>  
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
>  
> -	ret = __i915_add_request(ring, NULL, so.obj, true);
> +//	ret = __i915_add_request(req, NULL, so.obj, true);
> +	req->batch_obj = so.obj;
>  	/* __i915_add_request moves object to inactive if it fails */
>  out:
>  	i915_gem_render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
> index c44961e..7aa7372 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.h
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
> @@ -39,7 +39,7 @@ struct render_state {
>  	int gen;
>  };
>  
> -int i915_gem_render_state_init(struct intel_engine_cs *ring);
> +int i915_gem_render_state_init(struct drm_i915_gem_request *req);
>  void i915_gem_render_state_fini(struct render_state *so);
>  int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
>  				  struct render_state *so);
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index f0cf421..c0b0e37 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9089,14 +9089,15 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
>  				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>  				 uint32_t flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	u32 flip_mask;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -9116,7 +9117,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, 0); /* aux display base address, unused */
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>  	return 0;
>  }
>  
> @@ -9124,14 +9125,15 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
>  				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>  				 uint32_t flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	u32 flip_mask;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -9148,7 +9150,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, MI_NOOP);
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>  	return 0;
>  }
>  
> @@ -9156,15 +9158,16 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
>  				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>  				 uint32_t flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	uint32_t pf, pipesrc;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -9187,7 +9190,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, pf | pipesrc);
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>  	return 0;
>  }
>  
> @@ -9195,15 +9198,16 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
>  				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>  				 uint32_t flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	uint32_t pf, pipesrc;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -9223,7 +9227,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, pf | pipesrc);
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>  	return 0;
>  }
>  
> @@ -9231,9 +9235,10 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
>  				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>  				 uint32_t flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	uint32_t plane_bit = 0;
>  	int len, ret;
> @@ -9275,11 +9280,11 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	 * then do the cacheline alignment, and finally emit the
>  	 * MI_DISPLAY_FLIP.
>  	 */
> -	ret = intel_ring_cacheline_align(ring);
> +	ret = intel_ring_cacheline_align(req);
>  	if (ret)
>  		return ret;
>  
> -	ret = intel_ring_begin(ring, len);
> +	ret = intel_ring_begin(req, len);
>  	if (ret)
>  		return ret;
>  
> @@ -9318,7 +9323,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, (MI_NOOP));
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>  	return 0;
>  }
>  
> @@ -9474,9 +9479,10 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
>  				 struct drm_i915_gem_object *obj,
> -				 struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_request *req,
>  				 uint32_t flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	uint32_t plane = 0, stride;
>  	int ret;
> @@ -9508,7 +9514,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>  		return -ENODEV;
>  	}
>  
> -	ret = intel_ring_begin(ring, 10);
> +	ret = intel_ring_begin(req, 10);
>  	if (ret)
>  		return ret;
>  
> @@ -9528,7 +9534,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_no_flush(ring);
> +	i915_add_request_no_flush(req);
>  
>  	return 0;
>  }
> @@ -9537,7 +9543,7 @@ static int intel_default_queue_flip(struct drm_device *dev,
>  				    struct drm_crtc *crtc,
>  				    struct drm_framebuffer *fb,
>  				    struct drm_i915_gem_object *obj,
> -				    struct intel_engine_cs *ring,
> +				    struct drm_i915_gem_request *req,
>  				    uint32_t flags)
>  {
>  	return -ENODEV;
> @@ -9729,22 +9735,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  		i915_gem_request_assign(&work->flip_queued_req,
>  					obj->last_write_req);
>  	} else {
> -		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
> +		struct drm_i915_gem_request *req;
> +
> +		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +		if (ret)
> +			return ret;
> +
> +		i915_gem_request_assign(&work->flip_queued_req, req);
> +
> +		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, req,
>  						   page_flip_flags);
>  		if (ret)
>  			goto cleanup_unpin;
> -
> -		/* Borked: need to get the seqno for the request submitted in
> -		 * 'queue_flip()' above. However, either the request has been
> -		 * posted already and the seqno is gone (q_f calls add_request),
> -		 * or the request never gets posted and is merged into whatever
> -		 * render comes along next (q_f calls ring_advance).
> -		 *
> -		 * On the other hand, seqnos are going away soon anyway! So
> -		 * hopefully the problem will disappear...
> -		 */
> -		i915_gem_request_assign(&work->flip_queued_req,
> -					ring->outstanding_lazy_request ? intel_ring_get_request(ring) : NULL);
>  	}
>  
>  	work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 80cb87e..5077a77 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -203,6 +203,10 @@ enum {
>  };
>  #define GEN8_CTX_ID_SHIFT 32
>  
> +static int intel_logical_ring_begin(struct drm_i915_gem_request *req,
> +				    int num_dwords);
> +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req,
> +					      struct intel_context *ctx);
>  static int intel_lr_context_pin(struct intel_engine_cs *ring,
>  		struct intel_context *ctx);
>  
> @@ -587,9 +591,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
>  	return 0;
>  }
>  
> -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
> +static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
>  {
> -	struct intel_engine_cs *ring = ringbuf->ring;
> +	struct intel_engine_cs *ring = req->ring;
>  	uint32_t flush_domains;
>  	int ret;
>  
> @@ -597,7 +601,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
>  	if (ring->gpu_caches_dirty)
>  		flush_domains = I915_GEM_GPU_DOMAINS;
>  
> -	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
> +	ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
>  	if (ret)
>  		return ret;
>  
> @@ -605,10 +609,9 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
>  	return 0;
>  }
>  
> -static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
> +static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>  				 struct list_head *vmas)
>  {
> -	struct intel_engine_cs *ring = ringbuf->ring;
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
>  	bool flush_chipset = false;
> @@ -617,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
>  
> -		ret = i915_gem_object_sync(obj, ring, true);
> +		ret = i915_gem_object_sync(obj, req);
>  		if (ret)
>  			return ret;
>  
> @@ -657,7 +660,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  	struct drm_device       *dev = params->dev;
>  	struct intel_engine_cs  *ring = params->ring;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
>  	int ret;
>  
>  	params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
> @@ -706,11 +708,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  		return -EINVAL;
>  	}
>  
> -	ret = execlists_move_to_gpu(ringbuf, vmas);
> +	ret = execlists_move_to_gpu(params->request, vmas);
>  	if (ret)
>  		return ret;
>  
> -	i915_gem_execbuffer_move_to_active(vmas, ring);
> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
>  
>  	ret = dev_priv->gt.do_execfinal(params);
>  	if (ret)
> @@ -742,13 +744,13 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
>  	/* Unconditionally invalidate gpu caches and ensure that we do flush
>  	 * any residual writes from the previous batch.
>  	 */
> -	ret = logical_ring_invalidate_all_caches(ringbuf);
> +	ret = logical_ring_invalidate_all_caches(params->request);
>  	if (ret)
>  		return ret;
>  
>  	if (ring == &dev_priv->ring[RCS] &&
>  	    params->instp_mode != dev_priv->relative_constants_mode) {
> -		ret = intel_logical_ring_begin(ringbuf, 4);
> +		ret = intel_logical_ring_begin(params->request, 4);
>  		if (ret)
>  			return ret;
>  
> @@ -764,13 +766,14 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
>  	exec_start = params->batch_obj_vm_offset +
>  		     params->args_batch_start_offset;
>  
> -	ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags);
> +	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
>  	if (ret)
>  		return ret;
>  
> -	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags);
> +	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>  
> -	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, params->batch_obj);
> +	i915_gem_execbuffer_retire_commands(params->dev, params->file,
> +					    params->request, params->batch_obj);
>  
>  	return 0;
>  }
> @@ -827,15 +830,15 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
>  	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
>  }
>  
> -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
> +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
>  {
> -	struct intel_engine_cs *ring = ringbuf->ring;
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	if (!ring->gpu_caches_dirty)
>  		return 0;
>  
> -	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
> +	ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> @@ -910,13 +913,17 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring,
>  }
>  
>  int intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
> -				     struct intel_context *ctx)
> +				     struct intel_context *ctx,
> +				     struct drm_i915_gem_request **req_out)
>  {
>  	struct drm_i915_gem_request *request;
>  	struct drm_i915_private *dev_private = ring->dev->dev_private;
>  	int ret;
>  
> -	if (ring->outstanding_lazy_request)
> +	if (!req_out)
> +		return -EINVAL;
> +
> +	if ((*req_out = ring->outstanding_lazy_request) != NULL)
>  		return 0;
>  
>  	request = kzalloc(sizeof(*request), GFP_KERNEL);
> @@ -953,7 +960,7 @@ int intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
>  	i915_gem_context_reference(request->ctx);
>  	request->ringbuf = ctx->engine[ring->id].ringbuf;
>  
> -	ring->outstanding_lazy_request = request;
> +	*req_out = ring->outstanding_lazy_request = request;
>  	return 0;
>  }
>  
> @@ -1090,7 +1097,7 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
>  /**
>   * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
>   *
> - * @ringbuf: Logical ringbuffer.
> + * @request: The request to start some new work for
>   * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
>   *
>   * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
> @@ -1100,8 +1107,9 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
>   *
>   * Return: non-zero if the ringbuffer is not ready to be written to.
>   */
> -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
> +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
>  {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>  	struct intel_engine_cs *ring = ringbuf->ring;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -1116,38 +1124,28 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
>  	if (ret)
>  		return ret;
>  
> -	if(!ring->outstanding_lazy_request) {
> -		printk(KERN_INFO "%s:%d> \x1B[31;1mring->outstanding_lazy_request = 0x%p\x1B[0m\n", __func__, __LINE__, ring->outstanding_lazy_request);
> -		dump_stack();
> -	}
> -
> -	/* Preallocate the olr before touching the ring */
> -	ret = intel_logical_ring_alloc_request(ring, ringbuf->FIXME_lrc_ctx);
> -	if (ret)
> -		return ret;
> -
>  	ringbuf->space -= num_dwords * sizeof(uint32_t);
>  	return 0;
>  }
>  
> -static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
> +static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req,
>  					       struct intel_context *ctx)
>  {
>  	int ret, i;
> -	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> -	struct drm_device *dev = ring->dev;
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
> +	struct drm_device *dev = req->ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct i915_workarounds *w = &dev_priv->workarounds;
>  
>  	if (WARN_ON(w->count == 0))
>  		return 0;
>  
> -	ring->gpu_caches_dirty = true;
> -	ret = logical_ring_flush_all_caches(ringbuf);
> +	req->ring->gpu_caches_dirty = true;
> +	ret = logical_ring_flush_all_caches(req);
>  	if (ret)
>  		return ret;
>  
> -	ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
> +	ret = intel_logical_ring_begin(req, w->count * 2 + 2);
>  	if (ret)
>  		return ret;
>  
> @@ -1160,8 +1158,8 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
>  
>  	intel_logical_ring_advance(ringbuf);
>  
> -	ring->gpu_caches_dirty = true;
> -	ret = logical_ring_flush_all_caches(ringbuf);
> +	req->ring->gpu_caches_dirty = true;
> +	ret = logical_ring_flush_all_caches(req);
>  	if (ret)
>  		return ret;
>  
> @@ -1210,13 +1208,14 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring)
>  	return init_workarounds_ring(ring);
>  }
>  
> -static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
>  			      u64 offset, unsigned flags)
>  {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>  	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
>  	int ret;
>  
> -	ret = intel_logical_ring_begin(ringbuf, 4);
> +	ret = intel_logical_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -1263,17 +1262,18 @@ static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
>  	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
>  }
>  
> -static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_flush(struct drm_i915_gem_request *req,
>  			   u32 invalidate_domains,
>  			   u32 unused)
>  {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>  	struct intel_engine_cs *ring = ringbuf->ring;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	uint32_t cmd;
>  	int ret;
>  
> -	ret = intel_logical_ring_begin(ringbuf, 4);
> +	ret = intel_logical_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -1301,10 +1301,11 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
>  	return 0;
>  }
>  
> -static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_flush_render(struct drm_i915_gem_request *req,
>  				  u32 invalidate_domains,
>  				  u32 flush_domains)
>  {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>  	struct intel_engine_cs *ring = ringbuf->ring;
>  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>  	u32 flags = 0;
> @@ -1328,7 +1329,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
>  		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>  	}
>  
> -	ret = intel_logical_ring_begin(ringbuf, 6);
> +	ret = intel_logical_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -1353,13 +1354,14 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
>  	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
>  }
>  
> -static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
> +static int gen8_emit_request(struct drm_i915_gem_request *req)
>  {
> +	struct intel_ringbuffer *ringbuf = req->ringbuf;
>  	struct intel_engine_cs *ring = ringbuf->ring;
>  	u32 cmd;
>  	int ret;
>  
> -	ret = intel_logical_ring_begin(ringbuf, 6);
> +	ret = intel_logical_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -1371,8 +1373,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
>  				(ring->status_page.gfx_addr +
>  				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
>  	intel_logical_ring_emit(ringbuf, 0);
> -	intel_logical_ring_emit(ringbuf,
> -		i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(req));
>  	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
>  	intel_logical_ring_emit(ringbuf, MI_NOOP);
>  	intel_logical_ring_advance_and_submit(ringbuf);
> @@ -1380,16 +1381,20 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
>  	return 0;
>  }
>  
> -static int gen8_init_rcs_context(struct intel_engine_cs *ring,
> -		       struct intel_context *ctx)
> +static int gen8_init_rcs_context(struct drm_i915_gem_request *req,
> +				 struct intel_context *ctx)
>  {
>  	int ret;
>  
> -	ret = intel_logical_ring_workarounds_emit(ring, ctx);
> +	ret = intel_logical_ring_workarounds_emit(req, ctx);
>  	if (ret)
>  		return ret;
>  
> -	return intel_lr_context_render_state_init(ring, ctx);
> +	ret = intel_lr_context_render_state_init(req, ctx);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>  }
>  
>  /**
> @@ -1409,6 +1414,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
>  
>  	intel_logical_ring_stop(ring);
>  	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
> +	WARN_ON(ring->outstanding_lazy_request);
>  	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>  
>  	if (ring->cleanup)
> @@ -1648,10 +1654,10 @@ cleanup_render_ring:
>  	return ret;
>  }
>  
> -int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
> -				       struct intel_context *ctx)
> +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req,
> +					      struct intel_context *ctx)
>  {
> -	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> +	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>  	struct render_state so;
>  	struct drm_i915_file_private *file_priv = ctx->file_priv;
>  	struct drm_file *file = file_priv ? file_priv->file : NULL;
> @@ -1664,15 +1670,13 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
>  	if (so.rodata == NULL)
>  		return 0;
>  
> -	ret = ring->emit_bb_start(ringbuf,
> -			so.ggtt_offset,
> -			I915_DISPATCH_SECURE);
> +	ret = ring->emit_bb_start(req, so.ggtt_offset, I915_DISPATCH_SECURE);
>  	if (ret)
>  		goto out;
>  
> -	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
>  
> -	ret = __i915_add_request(ring, file, so.obj, true);
> +	ret = __i915_add_request(req, file, so.obj, true);
>  	/* intel_logical_ring_add_request moves object to inactive if it
>  	 * fails */
>  out:
> @@ -1883,6 +1887,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>  				     struct intel_engine_cs *ring)
>  {
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  	const bool is_global_default_ctx = (ctx == ring->default_context);
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_gem_object *ctx_obj;
> @@ -1964,13 +1969,27 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>  		lrc_setup_hardware_status_page(ring, ctx_obj);
>  	else if (ring->id == RCS && !ctx->rcs_initialized) {
>  		if (ring->init_context) {
> -			ret = ring->init_context(ring, ctx);
> +			struct drm_i915_gem_request *req;
> +
> +			ret = dev_priv->gt.alloc_request(ring, ctx, &req);
> +			if (ret)
> +				return ret;
> +
> +			ret = ring->init_context(req, ctx);
>  			if (ret) {
>  				DRM_ERROR("ring init context: %d\n", ret);
> +				i915_gem_request_unreference(req);
>  				ctx->engine[ring->id].ringbuf = NULL;
>  				ctx->engine[ring->id].state = NULL;
>  				goto error;
>  			}
> +
> +			ret = i915_add_request_no_flush(req);
> +			if (ret) {
> +				DRM_ERROR("ring init context: %d\n", ret);
> +				i915_gem_request_unreference(req);
> +				goto error;
> +			}
>  		}
>  
>  		ctx->rcs_initialized = true;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index ea083d9..a2981ba 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -35,12 +35,13 @@
>  
>  /* Logical Rings */
>  int __must_check intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
> -						  struct intel_context *ctx);
> +						  struct intel_context *ctx,
> +						  struct drm_i915_gem_request **req_out);
>  void intel_logical_ring_stop(struct intel_engine_cs *ring);
>  void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
>  int intel_logical_rings_init(struct drm_device *dev);
>  
> -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
> +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req);
>  void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
>  /**
>   * intel_logical_ring_advance() - advance the ringbuffer tail
> @@ -63,11 +64,8 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
>  	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
>  	ringbuf->tail += 4;
>  }
> -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
>  
>  /* Logical Ring Contexts */
> -int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
> -				       struct intel_context *ctx);
>  void intel_lr_context_free(struct intel_context *ctx);
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>  				     struct intel_engine_cs *ring);
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 973c9de..2d2ce59 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -209,17 +209,15 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
>  }
>  
>  static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> +					 struct drm_i915_gem_request *req,
>  					 void (*tail)(struct intel_overlay *))
>  {
>  	struct drm_device *dev = overlay->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
>  	int ret;
>  
>  	BUG_ON(overlay->last_flip_req);
> -	i915_gem_request_assign(&overlay->last_flip_req,
> -					     ring->outstanding_lazy_request);
> -	ret = i915_add_request(ring);
> +	i915_gem_request_assign(&overlay->last_flip_req, req);
> +	ret = i915_add_request(overlay->last_flip_req);
>  	if (ret)
>  		return ret;
>  
> @@ -239,6 +237,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>  	struct drm_device *dev = overlay->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req;
>  	int ret;
>  
>  	BUG_ON(overlay->active);
> @@ -246,17 +245,21 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>  
>  	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
>  	if (ret)
>  		return ret;
>  
> -	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
> -	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
> -	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> -	intel_ring_emit(ring, MI_NOOP);
> -	intel_ring_advance(ring);
> +	ret = intel_ring_begin(req, 4);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(req->ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
> +	intel_ring_emit(req->ring, overlay->flip_addr | OFC_UPDATE);
> +	intel_ring_emit(req->ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> +	intel_ring_emit(req->ring, MI_NOOP);
> +	intel_ring_advance(req->ring);
>  
> -	return intel_overlay_do_wait_request(overlay, NULL);
> +	return intel_overlay_do_wait_request(overlay, req, NULL);
>  }
>  
>  /* overlay needs to be enabled in OCMD reg */
> @@ -266,6 +269,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>  	struct drm_device *dev = overlay->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req;
>  	u32 flip_addr = overlay->flip_addr;
>  	u32 tmp;
>  	int ret;
> @@ -280,7 +284,11 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>  	if (tmp & (1 << 17))
>  		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -289,9 +297,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>  	intel_ring_advance(ring);
>  
>  	WARN_ON(overlay->last_flip_req);
> -	i915_gem_request_assign(&overlay->last_flip_req,
> -					     ring->outstanding_lazy_request);
> -	return i915_add_request(ring);
> +	i915_gem_request_assign(&overlay->last_flip_req, req);
> +	return i915_add_request(req);
>  }
>  
>  static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
> @@ -326,6 +333,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>  	struct drm_device *dev = overlay->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req;
>  	u32 flip_addr = overlay->flip_addr;
>  	int ret;
>  
> @@ -337,7 +345,11 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>  	 * of the hw. Do it in both cases */
>  	flip_addr |= OFC_UPDATE;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -359,7 +371,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>  	}
>  	intel_ring_advance(ring);
>  
> -	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> +	return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail);
>  }
>  
>  /* recover from an interruption due to a signal
> @@ -404,7 +416,13 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  
>  	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
>  		/* synchronous slowpath */
> -		ret = intel_ring_begin(ring, 2);
> +		struct drm_i915_gem_request *req;
> +
> +		ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +		if (ret)
> +			return ret;
> +
> +		ret = intel_ring_begin(req, 2);
>  		if (ret)
>  			return ret;
>  
> @@ -412,7 +430,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  		intel_ring_emit(ring, MI_NOOP);
>  		intel_ring_advance(ring);
>  
> -		ret = intel_overlay_do_wait_request(overlay,
> +		ret = intel_overlay_do_wait_request(overlay, req,
>  						    intel_overlay_release_old_vid_tail);
>  		if (ret)
>  			return ret;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 78911e2..5905fa5 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5506,6 +5506,7 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +	struct drm_i915_gem_request *req = NULL;
>  	bool was_interruptible;
>  	int ret;
>  
> @@ -5524,16 +5525,17 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>  	was_interruptible = dev_priv->mm.interruptible;
>  	dev_priv->mm.interruptible = false;
>  
> +	ret = dev_priv->gt.alloc_request(ring, NULL, &req);
> +	if (ret)
> +		goto err;
> +
>  	/*
>  	 * GPU can automatically power down the render unit if given a page
>  	 * to save state.
>  	 */
> -	ret = intel_ring_begin(ring, 6);
> -	if (ret) {
> -		ironlake_teardown_rc6(dev);
> -		dev_priv->mm.interruptible = was_interruptible;
> -		return;
> -	}
> +	ret = intel_ring_begin(req, 6);
> +	if (ret)
> +		goto err;
>  
>  	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
>  	intel_ring_emit(ring, MI_SET_CONTEXT);
> @@ -5547,6 +5549,11 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>  	intel_ring_emit(ring, MI_FLUSH);
>  	intel_ring_advance(ring);
>  
> +	ret = i915_add_request_no_flush(req);
> +	if (ret)
> +		goto err;
> +	req = NULL;
> +
>  	/*
>  	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
>  	 * does an implicit flush, combined with MI_FLUSH above, it should be
> @@ -5554,16 +5561,20 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>  	 */
>  	ret = intel_ring_idle(ring);
>  	dev_priv->mm.interruptible = was_interruptible;
> -	if (ret) {
> -		DRM_ERROR("failed to enable ironlake power savings\n");
> -		ironlake_teardown_rc6(dev);
> -		return;
> -	}
> +	if (ret)
> +		goto err;
>  
>  	I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
>  	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
>  
>  	intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
> +
> +err:
> +	DRM_ERROR("failed to enable ironlake power savings\n");
> +	ironlake_teardown_rc6(dev);
> +	dev_priv->mm.interruptible = was_interruptible;
> +	if (req)
> +		i915_gem_request_unreference(req);
>  }
>  
>  static unsigned long intel_pxfreq(u32 vidfreq)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b60e59b..e6e7bb5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring)
>  }
>  
>  static int
> -gen2_render_ring_flush(struct intel_engine_cs *ring,
> +gen2_render_ring_flush(struct drm_i915_gem_request *req,
>  		       u32	invalidate_domains,
>  		       u32	flush_domains)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	u32 cmd;
>  	int ret;
>  
> @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
>  	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
>  		cmd |= MI_READ_FLUSH;
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen4_render_ring_flush(struct intel_engine_cs *ring,
> +gen4_render_ring_flush(struct drm_i915_gem_request *req,
>  		       u32	invalidate_domains,
>  		       u32	flush_domains)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_device *dev = ring->dev;
>  	u32 cmd;
>  	int ret;
> @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
>  	    (IS_G4X(dev) || IS_GEN5(dev)))
>  		cmd |= MI_INVALIDATE_ISP;
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -212,12 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
>   * really our business.  That leaves only stall at scoreboard.
>   */
>  static int
> -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
> +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -230,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
>  	intel_ring_emit(ring, MI_NOOP);
>  	intel_ring_advance(ring);
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -246,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
>  }
>  
>  static int
> -gen6_render_ring_flush(struct intel_engine_cs *ring,
> -                         u32 invalidate_domains, u32 flush_domains)
> +gen6_render_ring_flush(struct drm_i915_gem_request *req,
> +                       u32 invalidate_domains, u32 flush_domains)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	u32 flags = 0;
>  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>  	int ret;
>  
>  	/* Force SNB workarounds for PIPE_CONTROL flushes */
> -	ret = intel_emit_post_sync_nonzero_flush(ring);
> +	ret = intel_emit_post_sync_nonzero_flush(req);
>  	if (ret)
>  		return ret;
>  
> @@ -284,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
>  		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
>  	}
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -298,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
> +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -316,14 +321,15 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
>  	return 0;
>  }
>  
> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
> +static int gen7_ring_fbc_flush(struct drm_i915_gem_request *req, u32 value)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	if (!ring->fbc_dirty)
>  		return 0;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  	/* WaFbcNukeOn3DBlt:ivb/hsw */
> @@ -340,9 +346,10 @@ static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
>  }
>  
>  static int
> -gen7_render_ring_flush(struct intel_engine_cs *ring,
> +gen7_render_ring_flush(struct drm_i915_gem_request *req,
>  		       u32 invalidate_domains, u32 flush_domains)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	u32 flags = 0;
>  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>  	int ret;
> @@ -381,10 +388,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>  		/* Workaround: we must issue a pipe_control with CS-stall bit
>  		 * set before a pipe_control command that has the state cache
>  		 * invalidate bit set. */
> -		gen7_render_ring_cs_stall_wa(ring);
> +		gen7_render_ring_cs_stall_wa(req);
>  	}
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -395,18 +402,19 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>  	intel_ring_advance(ring);
>  
>  	if (!invalidate_domains && flush_domains)
> -		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> +		return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
>  
>  	return 0;
>  }
>  
>  static int
> -gen8_emit_pipe_control(struct intel_engine_cs *ring,
> +gen8_emit_pipe_control(struct drm_i915_gem_request *req,
>  		       u32 flags, u32 scratch_addr)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -422,11 +430,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen8_render_ring_flush(struct intel_engine_cs *ring,
> +gen8_render_ring_flush(struct drm_i915_gem_request *req,
>  		       u32 invalidate_domains, u32 flush_domains)
>  {
>  	u32 flags = 0;
> -	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>  	int ret;
>  
>  	flags |= PIPE_CONTROL_CS_STALL;
> @@ -446,7 +454,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>  		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>  
>  		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
> -		ret = gen8_emit_pipe_control(ring,
> +		ret = gen8_emit_pipe_control(req,
>  					     PIPE_CONTROL_CS_STALL |
>  					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
>  					     0);
> @@ -454,12 +462,12 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>  			return ret;
>  	}
>  
> -	ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
> +	ret = gen8_emit_pipe_control(req, flags, scratch_addr);
>  	if (ret)
>  		return ret;
>  
>  	if (!invalidate_domains && flush_domains)
> -		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> +		return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
>  
>  	return 0;
>  }
> @@ -670,9 +678,10 @@ err:
>  	return ret;
>  }
>  
> -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
> +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req,
>  				       struct intel_context *ctx)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret, i;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -682,11 +691,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
>  		return 0;
>  
>  	ring->gpu_caches_dirty = true;
> -	ret = intel_ring_flush_all_caches(ring);
> +	ret = intel_ring_flush_all_caches(req);
>  	if (ret)
>  		return ret;
>  
> -	ret = intel_ring_begin(ring, (w->count * 2 + 2));
> +	ret = intel_ring_begin(req, (w->count * 2 + 2));
>  	if (ret)
>  		return ret;
>  
> @@ -700,7 +709,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
>  	intel_ring_advance(ring);
>  
>  	ring->gpu_caches_dirty = true;
> -	ret = intel_ring_flush_all_caches(ring);
> +	ret = intel_ring_flush_all_caches(req);
>  	if (ret)
>  		return ret;
>  
> @@ -898,10 +907,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring)
>  	intel_fini_pipe_control(ring);
>  }
>  
> -static int gen8_rcs_signal(struct intel_engine_cs *signaller,
> +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
>  			   unsigned int num_dwords)
>  {
>  #define MBOX_UPDATE_DWORDS 8
> +	struct intel_engine_cs *signaller = signaller_req->ring;
>  	struct drm_device *dev = signaller->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *waiter;
> @@ -911,7 +921,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>  	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
>  #undef MBOX_UPDATE_DWORDS
>  
> -	ret = intel_ring_begin(signaller, num_dwords);
> +	ret = intel_ring_begin(signaller_req, num_dwords);
>  	if (ret)
>  		return ret;
>  
> @@ -921,8 +931,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>  		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
>  			continue;
>  
> -		seqno = i915_gem_request_get_seqno(
> -					   signaller->outstanding_lazy_request);
> +		seqno = i915_gem_request_get_seqno(signaller_req);
>  		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
>  		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
>  					   PIPE_CONTROL_QW_WRITE |
> @@ -939,10 +948,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
>  	return 0;
>  }
>  
> -static int gen8_xcs_signal(struct intel_engine_cs *signaller,
> +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
>  			   unsigned int num_dwords)
>  {
>  #define MBOX_UPDATE_DWORDS 6
> +	struct intel_engine_cs *signaller = signaller_req->ring;
>  	struct drm_device *dev = signaller->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *waiter;
> @@ -952,7 +962,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>  	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
>  #undef MBOX_UPDATE_DWORDS
>  
> -	ret = intel_ring_begin(signaller, num_dwords);
> +	ret = intel_ring_begin(signaller_req, num_dwords);
>  	if (ret)
>  		return ret;
>  
> @@ -962,8 +972,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>  		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
>  			continue;
>  
> -		seqno = i915_gem_request_get_seqno(
> -					   signaller->outstanding_lazy_request);
> +		seqno = i915_gem_request_get_seqno(signaller_req);
>  		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
>  					   MI_FLUSH_DW_OP_STOREDW);
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
> @@ -978,9 +987,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
>  	return 0;
>  }
>  
> -static int gen6_signal(struct intel_engine_cs *signaller,
> +static int gen6_signal(struct drm_i915_gem_request *signaller_req,
>  		       unsigned int num_dwords)
>  {
> +	struct intel_engine_cs *signaller = signaller_req->ring;
>  	struct drm_device *dev = signaller->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *useless;
> @@ -991,15 +1001,14 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>  	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
>  #undef MBOX_UPDATE_DWORDS
>  
> -	ret = intel_ring_begin(signaller, num_dwords);
> +	ret = intel_ring_begin(signaller_req, num_dwords);
>  	if (ret)
>  		return ret;
>  
>  	for_each_ring(useless, dev_priv, i) {
>  		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
>  		if (mbox_reg != GEN6_NOSYNC) {
> -			u32 seqno = i915_gem_request_get_seqno(
> -					   signaller->outstanding_lazy_request);
> +			u32 seqno = i915_gem_request_get_seqno(signaller_req);
>  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>  			intel_ring_emit(signaller, mbox_reg);
>  			intel_ring_emit(signaller, seqno);
> @@ -1016,29 +1025,28 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>  /**
>   * gen6_add_request - Update the semaphore mailbox registers
>   *
> - * @ring - ring that is adding a request
> - * @seqno - return seqno stuck into the ring
> + * @request - request to write to the ring
>   *
>   * Update the mailbox registers in the *other* rings with the current seqno.
>   * This acts like a signal in the canonical semaphore.
>   */
>  static int
> -gen6_add_request(struct intel_engine_cs *ring)
> +gen6_add_request(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	if (ring->semaphore.signal)
> -		ret = ring->semaphore.signal(ring, 4);
> +		ret = ring->semaphore.signal(req, 4);
>  	else
> -		ret = intel_ring_begin(ring, 4);
> +		ret = intel_ring_begin(req, 4);
>  
>  	if (ret)
>  		return ret;
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -1061,14 +1069,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
>   */
>  
>  static int
> -gen8_ring_sync(struct intel_engine_cs *waiter,
> +gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
>  	       struct intel_engine_cs *signaller,
>  	       u32 seqno)
>  {
> +	struct intel_engine_cs *waiter = waiter_req->ring;
>  	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
>  	int ret;
>  
> -	ret = intel_ring_begin(waiter, 4);
> +	ret = intel_ring_begin(waiter_req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -1086,10 +1095,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter,
>  }
>  
>  static int
> -gen6_ring_sync(struct intel_engine_cs *waiter,
> +gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
>  	       struct intel_engine_cs *signaller,
>  	       u32 seqno)
>  {
> +	struct intel_engine_cs *waiter = waiter_req->ring;
>  	u32 dw1 = MI_SEMAPHORE_MBOX |
>  		  MI_SEMAPHORE_COMPARE |
>  		  MI_SEMAPHORE_REGISTER;
> @@ -1104,7 +1114,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter,
>  
>  	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
>  
> -	ret = intel_ring_begin(waiter, 4);
> +	ret = intel_ring_begin(waiter_req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -1135,8 +1145,9 @@ do {									\
>  } while (0)
>  
>  static int
> -pc_render_add_request(struct intel_engine_cs *ring)
> +pc_render_add_request(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>  	int ret;
>  
> @@ -1148,7 +1159,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
>  	 * memory before requesting an interrupt.
>  	 */
> -	ret = intel_ring_begin(ring, 32);
> +	ret = intel_ring_begin(req, 32);
>  	if (ret)
>  		return ret;
>  
> @@ -1156,8 +1167,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_WRITE_FLUSH |
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>  	intel_ring_emit(ring, 0);
>  	PIPE_CONTROL_FLUSH(ring, scratch_addr);
>  	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -1176,8 +1186,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
>  			PIPE_CONTROL_NOTIFY);
>  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>  	intel_ring_emit(ring, 0);
>  	__intel_ring_advance(ring);
>  
> @@ -1390,13 +1399,14 @@ void intel_ring_setup_status_page(struct intel_engine_cs *ring)
>  }
>  
>  static int
> -bsd_ring_flush(struct intel_engine_cs *ring,
> +bsd_ring_flush(struct drm_i915_gem_request *req,
>  	       u32     invalidate_domains,
>  	       u32     flush_domains)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -1407,18 +1417,18 @@ bsd_ring_flush(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -i9xx_add_request(struct intel_engine_cs *ring)
> +i9xx_add_request(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring,
> -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	__intel_ring_advance(ring);
>  
> @@ -1550,13 +1560,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring)
>  }
>  
>  static int
> -i965_dispatch_execbuffer(struct intel_engine_cs *ring,
> +i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  			 u64 offset, u32 length,
>  			 unsigned flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -1575,14 +1586,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
>  #define I830_TLB_ENTRIES (2)
>  #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
>  static int
> -i830_dispatch_execbuffer(struct intel_engine_cs *ring,
> -				u64 offset, u32 len,
> -				unsigned flags)
> +i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
> +			 u64 offset, u32 len,
> +			 unsigned flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	u32 cs_offset = ring->scratch.gtt_offset;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(req, 6);
>  	if (ret)
>  		return ret;
>  
> @@ -1599,7 +1611,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>  		if (len > I830_BATCH_LIMIT)
>  			return -ENOSPC;
>  
> -		ret = intel_ring_begin(ring, 6 + 2);
> +		ret = intel_ring_begin(req, 6 + 2);
>  		if (ret)
>  			return ret;
>  
> @@ -1622,7 +1634,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>  		offset = cs_offset;
>  	}
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -1636,13 +1648,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -i915_dispatch_execbuffer(struct intel_engine_cs *ring,
> +i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  			 u64 offset, u32 len,
>  			 unsigned flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -1885,6 +1898,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  
>  	intel_unpin_ringbuffer_obj(ringbuf);
>  	intel_destroy_ringbuffer_obj(ringbuf);
> +	WARN_ON(ring->outstanding_lazy_request);
>  	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>  
>  	if (ring->cleanup)
> @@ -2007,8 +2021,9 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>  	int ret;
>  
>  	/* We need to add any requests required to flush the objects and ring */
> +	WARN_ON(ring->outstanding_lazy_request);
>  	if (ring->outstanding_lazy_request) {
> -		ret = i915_add_request(ring);
> +		ret = i915_add_request(ring->outstanding_lazy_request);
>  		if (ret)
>  			return ret;
>  	}
> @@ -2025,13 +2040,18 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>  }
>  
>  int
> -intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx)
> +intel_ring_alloc_request(struct intel_engine_cs *ring,
> +			 struct intel_context *ctx,
> +			 struct drm_i915_gem_request **req_out)
>  {
>  	int ret;
>  	struct drm_i915_gem_request *request;
>  	struct drm_i915_private *dev_private = ring->dev->dev_private;
>  
> -	if (ring->outstanding_lazy_request)
> +	if (!req_out)
> +		return -EINVAL;
> +
> +	if ((*req_out = ring->outstanding_lazy_request) != NULL)
>  		return 0;
>  
>  	request = kzalloc(sizeof(*request), GFP_KERNEL);
> @@ -2053,7 +2073,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx
>  	spewThisReq(request, "\x1B[32mCreated: %d:%d, ref => %d\x1B[0m", request->uniq, request->seqno, request->ref.refcount.counter);
>  
>  	//printk(KERN_INFO "%s:%d> <%s> OLR = 0x%p, uniq = %d, seqno = %d\n", __func__, __LINE__, ring->name, request, request->uniq, request->seqno);
> -	ring->outstanding_lazy_request = request;
> +	*req_out = ring->outstanding_lazy_request = request;
>  	return 0;
>  }
>  
> @@ -2078,9 +2098,10 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
>  	return 0;
>  }
>  
> -int intel_ring_begin(struct intel_engine_cs *ring,
> +int intel_ring_begin(struct drm_i915_gem_request *req,
>  		     int num_dwords)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  	int ret;
>  
> @@ -2093,18 +2114,14 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	/* Preallocate the olr before touching the ring */
> -	ret = intel_ring_alloc_request(ring, NULL);
> -	if (ret)
> -		return ret;
> -
>  	ring->buffer->space -= num_dwords * sizeof(uint32_t);
>  	return 0;
>  }
>  
>  /* Align the ring tail to a cacheline boundary */
> -int intel_ring_cacheline_align(struct intel_engine_cs *ring)
> +int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
>  	int ret;
>  
> @@ -2112,7 +2129,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring)
>  		return 0;
>  
>  	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
> -	ret = intel_ring_begin(ring, num_dwords);
> +	ret = intel_ring_begin(req, num_dwords);
>  	if (ret)
>  		return ret;
>  
> @@ -2176,13 +2193,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
>  		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
>  }
>  
> -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
> +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
>  			       u32 invalidate, u32 flush)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	uint32_t cmd;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -2212,14 +2230,15 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  			      u64 offset, u32 len,
>  			      unsigned flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -2234,13 +2253,14 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  			      u64 offset, u32 len,
>  			      unsigned flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -2256,13 +2276,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  			      u64 offset, u32 len,
>  			      unsigned flags)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 2);
> +	ret = intel_ring_begin(req, 2);
>  	if (ret)
>  		return ret;
>  
> @@ -2278,15 +2299,16 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
>  
>  /* Blitter support (SandyBridge+) */
>  
> -static int gen6_ring_flush(struct intel_engine_cs *ring,
> +static int gen6_ring_flush(struct drm_i915_gem_request *req,
>  			   u32 invalidate, u32 flush)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	uint32_t cmd;
>  	int ret;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(req, 4);
>  	if (ret)
>  		return ret;
>  
> @@ -2315,7 +2337,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>  
>  	if (!invalidate && flush) {
>  		if (IS_GEN7(dev))
> -			return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
> +			return gen7_ring_fbc_flush(req, FBC_REND_CACHE_CLEAN);
>  		else if (IS_BROADWELL(dev))
>  			dev_priv->fbc.need_sw_cache_clean = true;
>  	}
> @@ -2696,14 +2718,15 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
>  }
>  
>  int
> -intel_ring_flush_all_caches(struct intel_engine_cs *ring)
> +intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	int ret;
>  
>  	if (!ring->gpu_caches_dirty)
>  		return 0;
>  
> -	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
> +	ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> @@ -2714,8 +2737,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs *ring)
>  }
>  
>  int
> -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
> +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *ring = req->ring;
>  	uint32_t flush_domains;
>  	int ret;
>  
> @@ -2723,7 +2747,7 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
>  	if (ring->gpu_caches_dirty)
>  		flush_domains = I915_GEM_GPU_DOMAINS;
>  
> -	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
> +	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 48cbb00..a7e47ad 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -154,15 +154,15 @@ struct  intel_engine_cs {
>  
>  	int		(*init_hw)(struct intel_engine_cs *ring);
>  
> -	int		(*init_context)(struct intel_engine_cs *ring,
> +	int		(*init_context)(struct drm_i915_gem_request *req,
>  					struct intel_context *ctx);
>  
>  	void		(*write_tail)(struct intel_engine_cs *ring,
>  				      u32 value);
> -	int __must_check (*flush)(struct intel_engine_cs *ring,
> +	int __must_check (*flush)(struct drm_i915_gem_request *req,
>  				  u32	invalidate_domains,
>  				  u32	flush_domains);
> -	int		(*add_request)(struct intel_engine_cs *ring);
> +	int		(*add_request)(struct drm_i915_gem_request *req);
>  	/* Some chipsets are not quite as coherent as advertised and need
>  	 * an expensive kick to force a true read of the up-to-date seqno.
>  	 * However, the up-to-date seqno is not always required and the last
> @@ -173,7 +173,7 @@ struct  intel_engine_cs {
>  				     bool lazy_coherency);
>  	void		(*set_seqno)(struct intel_engine_cs *ring,
>  				     u32 seqno);
> -	int		(*dispatch_execbuffer)(struct intel_engine_cs *ring,
> +	int		(*dispatch_execbuffer)(struct drm_i915_gem_request *req,
>  					       u64 offset, u32 length,
>  					       unsigned dispatch_flags);
>  #define I915_DISPATCH_SECURE 0x1
> @@ -231,10 +231,10 @@ struct  intel_engine_cs {
>  		};
>  
>  		/* AKA wait() */
> -		int	(*sync_to)(struct intel_engine_cs *ring,
> -				   struct intel_engine_cs *to,
> +		int	(*sync_to)(struct drm_i915_gem_request *to_req,
> +				   struct intel_engine_cs *from,
>  				   u32 seqno);
> -		int	(*signal)(struct intel_engine_cs *signaller,
> +		int	(*signal)(struct drm_i915_gem_request *signaller_req,
>  				  /* num_dwords needed by caller */
>  				  unsigned int num_dwords);
>  	} semaphore;
> @@ -245,11 +245,11 @@ struct  intel_engine_cs {
>  	struct list_head execlist_retired_req_list;
>  	u8 next_context_status_buffer;
>  	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
> -	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
> -	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
> +	int		(*emit_request)(struct drm_i915_gem_request *req);
> +	int		(*emit_flush)(struct drm_i915_gem_request *req,
>  				      u32 invalidate_domains,
>  				      u32 flush_domains);
> -	int		(*emit_bb_start)(struct intel_ringbuffer *ringbuf,
> +	int		(*emit_bb_start)(struct drm_i915_gem_request *req,
>  					 u64 offset, unsigned flags);
>  
>  	/**
> @@ -433,10 +433,11 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>  void intel_stop_ring_buffer(struct intel_engine_cs *ring);
>  void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
>  
> -int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
> -int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
> +int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
> +int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
>  int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring,
> -					  struct intel_context *ctx);
> +					  struct intel_context *ctx,
> +					  struct drm_i915_gem_request **req_out);
>  static inline void intel_ring_emit(struct intel_engine_cs *ring,
>  				   u32 data)
>  {
> @@ -457,8 +458,8 @@ void __intel_ring_advance(struct intel_engine_cs *ring);
>  
>  int __must_check intel_ring_idle(struct intel_engine_cs *ring);
>  void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
> -int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
> -int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
> +int intel_ring_flush_all_caches(struct drm_i915_gem_request *req);
> +int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
>  
>  void intel_fini_pipe_control(struct intel_engine_cs *ring);
>  int intel_init_pipe_control(struct intel_engine_cs *ring);
> @@ -479,11 +480,4 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>  	return ringbuf->tail;
>  }
>  
> -static inline struct drm_i915_gem_request *
> -intel_ring_get_request(struct intel_engine_cs *ring)
> -{
> -	BUG_ON(ring->outstanding_lazy_request == NULL);
> -	return ring->outstanding_lazy_request;
> -}
> -
>  #endif /* _INTEL_RINGBUFFER_H_ */
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-01-06 13:52 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-19 14:41 [PATCH] drm/i915: Remove OLR John.C.Harrison
2014-12-19 14:43 ` John Harrison
2015-01-06 13:52 ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.