All of lore.kernel.org
 help / color / mirror / Atom feed
* Derive requests from dma-buf fence
@ 2016-07-15 10:11 Chris Wilson
  2016-07-15 10:11 ` [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend Chris Wilson
                   ` (9 more replies)
  0 siblings, 10 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

This is the minimal conversion to derive struct i915_gem_request from
dma-buf fences. They are still only used privately (i.e. the requests
are not attached to the dma-buf or any reservation object so not exposed
to any other driver or userspace) and so we defer the issue of RCU
access to the request until later - until we have annotations over own
access to the request so that we can catch RCU issues.

This series just looks at the issues surrounding using requests as
fences, plus a bonus patch aimed at BAT/bsw.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 10:41   ` Mika Kuoppala
  2016-07-15 10:11 ` [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

Before suspend, and especially before building the hibernation image, we
need to context image to be coherent in memory. To do this we require
that we perform a context switch to a disposable context (i.e. the
dev_priv->kernel_context) - when that switch is complete, all other
context images will be complete. This leaves the kernel_context image as
incomplete, but fortunately that is disposable and we can do a quick
fixup of the logical state after resuming.

Testcase: igt/gem_exec_suspend # bsw
References: https://bugs.freedesktop.org/show_bug.cgi?id=96526
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c |  4 +---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 46 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 15440123e38d..c5b7b8e0678a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1590,9 +1590,7 @@ static int i915_drm_resume(struct drm_device *dev)
 
 	intel_csr_ucode_resume(dev_priv);
 
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_restore_gtt_mappings(dev);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_resume(dev);
 
 	i915_restore_state(dev);
 	intel_opregion_setup(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1ec523d29789..e73c0fc84c73 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3384,6 +3384,7 @@ void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_suspend(struct drm_device *dev);
+void i915_gem_resume(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
 			struct drm_i915_gem_object *batch_obj,
 			bool flush_caches);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cf0e8aa8035c..8b42a5101f11 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4974,6 +4974,35 @@ i915_gem_stop_engines(struct drm_device *dev)
 		dev_priv->gt.stop_engine(engine);
 }
 
+static int switch_to_kernel_context(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv) {
+		struct drm_i915_gem_request *req;
+		int ret;
+
+		if (engine->last_context == NULL)
+			continue;
+
+		if (engine->last_context == dev_priv->kernel_context)
+			continue;
+
+		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
+		if (IS_ERR(req))
+			return PTR_ERR(req);
+
+		ret = 0;
+		if (!i915.enable_execlists)
+			ret = i915_switch_context(req);
+		i915_add_request_no_flush(req);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int
 i915_gem_suspend(struct drm_device *dev)
 {
@@ -4983,6 +5012,10 @@ i915_gem_suspend(struct drm_device *dev)
 	intel_suspend_gt_powersave(dev_priv);
 
 	mutex_lock(&dev->struct_mutex);
+	ret = switch_to_kernel_context(dev_priv);
+	if (ret)
+		goto err;
+
 	ret = i915_gem_wait_for_idle(dev_priv);
 	if (ret)
 		goto err;
@@ -5009,6 +5042,19 @@ err:
 	return ret;
 }
 
+void i915_gem_resume(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (i915.enable_execlists)
+		intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
+
+	i915_gem_restore_gtt_mappings(dev);
+	mutex_unlock(&dev->struct_mutex);
+}
+
 void i915_gem_init_swizzling(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
  2016-07-15 10:11 ` [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 10:41   ` Joonas Lahtinen
  2016-07-15 10:43   ` Mika Kuoppala
  2016-07-15 10:11 ` [PATCH 3/9] drm/i915: Retire oldest completed request before allocating next Chris Wilson
                   ` (7 subsequent siblings)
  9 siblings, 2 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

Migrate the request operations out of the main body of i915_gem.c and
into their own C file for easier expansion.

v2: Move __i915_add_request() across as well

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_drv.h         | 209 +---------
 drivers/gpu/drm/i915/i915_gem.c         | 655 +------------------------------
 drivers/gpu/drm/i915/i915_gem_request.c | 658 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h | 238 ++++++++++++
 5 files changed, 905 insertions(+), 856 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 75318ebb8d25..6092f0ea24df 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -33,6 +33,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_gtt.o \
 	  i915_gem.o \
 	  i915_gem_render_state.o \
+	  i915_gem_request.o \
 	  i915_gem_shrinker.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e73c0fc84c73..a4767c198413 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -61,6 +61,7 @@
 #include "i915_gem.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
+#include "i915_gem_request.h"
 
 #include "intel_gvt.h"
 
@@ -2365,171 +2366,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
 	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
 	     ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
 
-/**
- * Request queue structure.
- *
- * The request queue allows us to note sequence numbers that have been emitted
- * and may be associated with active buffers to be retired.
- *
- * By keeping this list, we can avoid having to do questionable sequence
- * number comparisons on buffer last_read|write_seqno. It also allows an
- * emission time to be associated with the request for tracking how far ahead
- * of the GPU the submission is.
- *
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
- */
-struct drm_i915_gem_request {
-	struct kref ref;
-
-	/** On Which ring this request was generated */
-	struct drm_i915_private *i915;
-	struct intel_engine_cs *engine;
-	struct intel_signal_node signaling;
-
-	 /** GEM sequence number associated with the previous request,
-	  * when the HWS breadcrumb is equal to this the GPU is processing
-	  * this request.
-	  */
-	u32 previous_seqno;
-
-	 /** GEM sequence number associated with this request,
-	  * when the HWS breadcrumb is equal or greater than this the GPU
-	  * has finished processing this request.
-	  */
-	u32 seqno;
-
-	/** Position in the ringbuffer of the start of the request */
-	u32 head;
-
-	/**
-	 * Position in the ringbuffer of the start of the postfix.
-	 * This is required to calculate the maximum available ringbuffer
-	 * space without overwriting the postfix.
-	 */
-	 u32 postfix;
-
-	/** Position in the ringbuffer of the end of the whole request */
-	u32 tail;
-
-	/** Preallocate space in the ringbuffer for the emitting the request */
-	u32 reserved_space;
-
-	/**
-	 * Context and ring buffer related to this request
-	 * Contexts are refcounted, so when this request is associated with a
-	 * context, we must increment the context's refcount, to guarantee that
-	 * it persists while any request is linked to it. Requests themselves
-	 * are also refcounted, so the request will only be freed when the last
-	 * reference to it is dismissed, and the code in
-	 * i915_gem_request_free() will then decrement the refcount on the
-	 * context.
-	 */
-	struct i915_gem_context *ctx;
-	struct intel_ringbuffer *ringbuf;
-
-	/**
-	 * Context related to the previous request.
-	 * As the contexts are accessed by the hardware until the switch is
-	 * completed to a new context, the hardware may still be writing
-	 * to the context object after the breadcrumb is visible. We must
-	 * not unpin/unbind/prune that object whilst still active and so
-	 * we keep the previous context pinned until the following (this)
-	 * request is retired.
-	 */
-	struct i915_gem_context *previous_context;
-
-	/** Batch buffer related to this request if any (used for
-	    error state dump only) */
-	struct drm_i915_gem_object *batch_obj;
-
-	/** Time at which this request was emitted, in jiffies. */
-	unsigned long emitted_jiffies;
-
-	/** global list entry for this request */
-	struct list_head list;
-
-	struct drm_i915_file_private *file_priv;
-	/** file_priv list entry for this request */
-	struct list_head client_list;
-
-	/** process identifier submitting this request */
-	struct pid *pid;
-
-	/**
-	 * The ELSP only accepts two elements at a time, so we queue
-	 * context/tail pairs on a given queue (ring->execlist_queue) until the
-	 * hardware is available. The queue serves a double purpose: we also use
-	 * it to keep track of the up to 2 contexts currently in the hardware
-	 * (usually one in execution and the other queued up by the GPU): We
-	 * only remove elements from the head of the queue when the hardware
-	 * informs us that an element has been completed.
-	 *
-	 * All accesses to the queue are mediated by a spinlock
-	 * (ring->execlist_lock).
-	 */
-
-	/** Execlist link in the submission queue.*/
-	struct list_head execlist_link;
-
-	/** Execlists no. of times this request has been sent to the ELSP */
-	int elsp_submitted;
-
-	/** Execlists context hardware id. */
-	unsigned ctx_hw_id;
-};
-
-struct drm_i915_gem_request * __must_check
-i915_gem_request_alloc(struct intel_engine_cs *engine,
-		       struct i915_gem_context *ctx);
-void i915_gem_request_free(struct kref *req_ref);
-int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
-				   struct drm_file *file);
-
-static inline uint32_t
-i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
-{
-	return req ? req->seqno : 0;
-}
-
-static inline struct intel_engine_cs *
-i915_gem_request_get_engine(struct drm_i915_gem_request *req)
-{
-	return req ? req->engine : NULL;
-}
-
-static inline struct drm_i915_gem_request *
-i915_gem_request_reference(struct drm_i915_gem_request *req)
-{
-	if (req)
-		kref_get(&req->ref);
-	return req;
-}
-
-static inline void
-i915_gem_request_unreference(struct drm_i915_gem_request *req)
-{
-	kref_put(&req->ref, i915_gem_request_free);
-}
-
-static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
-					   struct drm_i915_gem_request *src)
-{
-	if (src)
-		i915_gem_request_reference(src);
-
-	if (*pdst)
-		i915_gem_request_unreference(*pdst);
-
-	*pdst = src;
-}
-
-/*
- * XXX: i915_gem_request_completed should be here but currently needs the
- * definition of i915_seqno_passed() which is below. It will be moved in
- * a later patch when the call to i915_seqno_passed() is obsoleted...
- */
-
 /*
  * A command that requires special handling by the command parser.
  */
@@ -3297,37 +3133,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits);
 
-/**
- * Returns true if seq1 is later than seq2.
- */
-static inline bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
-{
-	return (int32_t)(seq1 - seq2) >= 0;
-}
-
-static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
-{
-	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->previous_seqno);
-}
-
-static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
-{
-	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->seqno);
-}
-
-bool __i915_spin_request(const struct drm_i915_gem_request *request,
-			 int state, unsigned long timeout_us);
-static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
-				     int state, unsigned long timeout_us)
-{
-	return (i915_gem_request_started(request) &&
-		__i915_spin_request(request, state, timeout_us));
-}
-
-int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
 
 struct drm_i915_gem_request *
@@ -3385,18 +3190,6 @@ void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void i915_gem_resume(struct drm_device *dev);
-void __i915_add_request(struct drm_i915_gem_request *req,
-			struct drm_i915_gem_object *batch_obj,
-			bool flush_caches);
-#define i915_add_request(req) \
-	__i915_add_request(req, NULL, true)
-#define i915_add_request_no_flush(req) \
-	__i915_add_request(req, NULL, false)
-int __i915_wait_request(struct drm_i915_gem_request *req,
-			bool interruptible,
-			s64 *timeout,
-			struct intel_rps_client *rps);
-int __must_check i915_wait_request(struct drm_i915_gem_request *req);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8b42a5101f11..10e5db3f294e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1325,365 +1325,6 @@ put_rpm:
 	return ret;
 }
 
-static int
-i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
-{
-	if (__i915_terminally_wedged(reset_counter))
-		return -EIO;
-
-	if (__i915_reset_in_progress(reset_counter)) {
-		/* Non-interruptible callers can't handle -EAGAIN, hence return
-		 * -EIO unconditionally for these. */
-		if (!interruptible)
-			return -EIO;
-
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
-static unsigned long local_clock_us(unsigned *cpu)
-{
-	unsigned long t;
-
-	/* Cheaply and approximately convert from nanoseconds to microseconds.
-	 * The result and subsequent calculations are also defined in the same
-	 * approximate microseconds units. The principal source of timing
-	 * error here is from the simple truncation.
-	 *
-	 * Note that local_clock() is only defined wrt to the current CPU;
-	 * the comparisons are no longer valid if we switch CPUs. Instead of
-	 * blocking preemption for the entire busywait, we can detect the CPU
-	 * switch and use that as indicator of system load and a reason to
-	 * stop busywaiting, see busywait_stop().
-	 */
-	*cpu = get_cpu();
-	t = local_clock() >> 10;
-	put_cpu();
-
-	return t;
-}
-
-static bool busywait_stop(unsigned long timeout, unsigned cpu)
-{
-	unsigned this_cpu;
-
-	if (time_after(local_clock_us(&this_cpu), timeout))
-		return true;
-
-	return this_cpu != cpu;
-}
-
-bool __i915_spin_request(const struct drm_i915_gem_request *req,
-			 int state, unsigned long timeout_us)
-{
-	unsigned cpu;
-
-	/* When waiting for high frequency requests, e.g. during synchronous
-	 * rendering split between the CPU and GPU, the finite amount of time
-	 * required to set up the irq and wait upon it limits the response
-	 * rate. By busywaiting on the request completion for a short while we
-	 * can service the high frequency waits as quick as possible. However,
-	 * if it is a slow request, we want to sleep as quickly as possible.
-	 * The tradeoff between waiting and sleeping is roughly the time it
-	 * takes to sleep on a request, on the order of a microsecond.
-	 */
-
-	timeout_us += local_clock_us(&cpu);
-	do {
-		if (i915_gem_request_completed(req))
-			return true;
-
-		if (signal_pending_state(state, current))
-			break;
-
-		if (busywait_stop(timeout_us, cpu))
-			break;
-
-		cpu_relax_lowlatency();
-	} while (!need_resched());
-
-	return false;
-}
-
-/**
- * __i915_wait_request - wait until execution of request has finished
- * @req: duh!
- * @interruptible: do an interruptible wait (normally yes)
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- * @rps: RPS client
- *
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
- *
- * Returns 0 if the request was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
- */
-int __i915_wait_request(struct drm_i915_gem_request *req,
-			bool interruptible,
-			s64 *timeout,
-			struct intel_rps_client *rps)
-{
-	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	DEFINE_WAIT(reset);
-	struct intel_wait wait;
-	unsigned long timeout_remain;
-	s64 before = 0; /* Only to silence a compiler warning. */
-	int ret = 0;
-
-	might_sleep();
-
-	if (list_empty(&req->list))
-		return 0;
-
-	if (i915_gem_request_completed(req))
-		return 0;
-
-	timeout_remain = MAX_SCHEDULE_TIMEOUT;
-	if (timeout) {
-		if (WARN_ON(*timeout < 0))
-			return -EINVAL;
-
-		if (*timeout == 0)
-			return -ETIME;
-
-		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
-
-		/*
-		 * Record current time in case interrupted by signal, or wedged.
-		 */
-		before = ktime_get_raw_ns();
-	}
-
-	trace_i915_gem_request_wait_begin(req);
-
-	/* This client is about to stall waiting for the GPU. In many cases
-	 * this is undesirable and limits the throughput of the system, as
-	 * many clients cannot continue processing user input/output whilst
-	 * blocked. RPS autotuning may take tens of milliseconds to respond
-	 * to the GPU load and thus incurs additional latency for the client.
-	 * We can circumvent that by promoting the GPU frequency to maximum
-	 * before we wait. This makes the GPU throttle up much more quickly
-	 * (good for benchmarks and user experience, e.g. window animations),
-	 * but at a cost of spending more power processing the workload
-	 * (bad for battery). Not all clients even want their results
-	 * immediately and for them we should just let the GPU select its own
-	 * frequency to maximise efficiency. To prevent a single client from
-	 * forcing the clocks too high for the whole system, we only allow
-	 * each client to waitboost once in a busy period.
-	 */
-	if (INTEL_INFO(req->i915)->gen >= 6)
-		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
-
-	/* Optimistic spin for the next ~jiffie before touching IRQs */
-	if (i915_spin_request(req, state, 5))
-		goto complete;
-
-	set_current_state(state);
-	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
-
-	intel_wait_init(&wait, req->seqno);
-	if (intel_engine_add_wait(req->engine, &wait))
-		/* In order to check that we haven't missed the interrupt
-		 * as we enabled it, we need to kick ourselves to do a
-		 * coherent check on the seqno before we sleep.
-		 */
-		goto wakeup;
-
-	for (;;) {
-		if (signal_pending_state(state, current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		timeout_remain = io_schedule_timeout(timeout_remain);
-		if (timeout_remain == 0) {
-			ret = -ETIME;
-			break;
-		}
-
-		if (intel_wait_complete(&wait))
-			break;
-
-		set_current_state(state);
-
-wakeup:
-		/* Carefully check if the request is complete, giving time
-		 * for the seqno to be visible following the interrupt.
-		 * We also have to check in case we are kicked by the GPU
-		 * reset in order to drop the struct_mutex.
-		 */
-		if (__i915_request_irq_complete(req))
-			break;
-
-		/* Only spin if we know the GPU is processing this request */
-		if (i915_spin_request(req, state, 2))
-			break;
-	}
-	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
-
-	intel_engine_remove_wait(req->engine, &wait);
-	__set_current_state(TASK_RUNNING);
-complete:
-	trace_i915_gem_request_wait_end(req);
-
-	if (timeout) {
-		s64 tres = *timeout - (ktime_get_raw_ns() - before);
-
-		*timeout = tres < 0 ? 0 : tres;
-
-		/*
-		 * Apparently ktime isn't accurate enough and occasionally has a
-		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
-		 * things up to make the test happy. We allow up to 1 jiffy.
-		 *
-		 * This is a regrssion from the timespec->ktime conversion.
-		 */
-		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
-			*timeout = 0;
-	}
-
-	if (rps && req->seqno == req->engine->last_submitted_seqno) {
-		/* The GPU is now idle and this client has stalled.
-		 * Since no other client has submitted a request in the
-		 * meantime, assume that this client is the only one
-		 * supplying work to the GPU but is unable to keep that
-		 * work supplied because it is waiting. Since the GPU is
-		 * then never kept fully busy, RPS autoclocking will
-		 * keep the clocks relatively low, causing further delays.
-		 * Compensate by giving the synchronous client credit for
-		 * a waitboost next time.
-		 */
-		spin_lock(&req->i915->rps.client_lock);
-		list_del_init(&rps->link);
-		spin_unlock(&req->i915->rps.client_lock);
-	}
-
-	return ret;
-}
-
-int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
-				   struct drm_file *file)
-{
-	struct drm_i915_file_private *file_priv;
-
-	WARN_ON(!req || !file || req->file_priv);
-
-	if (!req || !file)
-		return -EINVAL;
-
-	if (req->file_priv)
-		return -EINVAL;
-
-	file_priv = file->driver_priv;
-
-	spin_lock(&file_priv->mm.lock);
-	req->file_priv = file_priv;
-	list_add_tail(&req->client_list, &file_priv->mm.request_list);
-	spin_unlock(&file_priv->mm.lock);
-
-	req->pid = get_pid(task_pid(current));
-
-	return 0;
-}
-
-static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
-{
-	struct drm_i915_file_private *file_priv = request->file_priv;
-
-	if (!file_priv)
-		return;
-
-	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
-	spin_unlock(&file_priv->mm.lock);
-
-	put_pid(request->pid);
-	request->pid = NULL;
-}
-
-static void i915_gem_request_retire(struct drm_i915_gem_request *request)
-{
-	trace_i915_gem_request_retire(request);
-
-	/* We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	request->ringbuf->last_retired_head = request->postfix;
-
-	list_del_init(&request->list);
-	i915_gem_request_remove_from_client(request);
-
-	if (request->previous_context) {
-		if (i915.enable_execlists)
-			intel_lr_context_unpin(request->previous_context,
-					       request->engine);
-	}
-
-	i915_gem_context_unreference(request->ctx);
-	i915_gem_request_unreference(request);
-}
-
-static void
-__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
-{
-	struct intel_engine_cs *engine = req->engine;
-	struct drm_i915_gem_request *tmp;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	if (list_empty(&req->list))
-		return;
-
-	do {
-		tmp = list_first_entry(&engine->request_list,
-				       typeof(*tmp), list);
-
-		i915_gem_request_retire(tmp);
-	} while (tmp != req);
-
-	WARN_ON(i915_verify_lists(engine->dev));
-}
-
-/**
- * Waits for a request to be signaled, and cleans up the
- * request and object lists appropriately for that event.
- * @req: request to wait on
- */
-int
-i915_wait_request(struct drm_i915_gem_request *req)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	bool interruptible;
-	int ret;
-
-	interruptible = dev_priv->mm.interruptible;
-
-	BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
-
-	ret = __i915_wait_request(req, interruptible, NULL, NULL);
-	if (ret)
-		return ret;
-
-	/* If the GPU hung, we want to keep the requests to find the guilty. */
-	if (!i915_reset_in_progress(&dev_priv->gpu_error))
-		__i915_gem_request_retire__upto(req);
-
-	return 0;
-}
-
 /**
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
@@ -1740,7 +1381,7 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
 		i915_gem_object_retire__write(obj);
 
 	if (!i915_reset_in_progress(&req->i915->gpu_error))
-		__i915_gem_request_retire__upto(req);
+		i915_gem_request_retire_upto(req);
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -2761,193 +2402,6 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 	drm_gem_object_unreference(&obj->base);
 }
 
-static int
-i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
-{
-	struct intel_engine_cs *engine;
-	int ret;
-
-	/* Carefully retire all requests without writing to the rings */
-	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine);
-		if (ret)
-			return ret;
-	}
-	i915_gem_retire_requests(dev_priv);
-
-	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
-		while (intel_kick_waiters(dev_priv) ||
-		       intel_kick_signalers(dev_priv))
-			yield();
-	}
-
-	/* Finally reset hw state */
-	for_each_engine(engine, dev_priv)
-		intel_ring_init_seqno(engine, seqno);
-
-	return 0;
-}
-
-int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	int ret;
-
-	if (seqno == 0)
-		return -EINVAL;
-
-	/* HWS page needs to be set less than what we
-	 * will inject to ring
-	 */
-	ret = i915_gem_init_seqno(dev_priv, seqno - 1);
-	if (ret)
-		return ret;
-
-	/* Carefully set the last_seqno value so that wrap
-	 * detection still works
-	 */
-	dev_priv->next_seqno = seqno;
-	dev_priv->last_seqno = seqno - 1;
-	if (dev_priv->last_seqno == 0)
-		dev_priv->last_seqno--;
-
-	return 0;
-}
-
-int
-i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
-{
-	/* reserve 0 for non-seqno */
-	if (dev_priv->next_seqno == 0) {
-		int ret = i915_gem_init_seqno(dev_priv, 0);
-		if (ret)
-			return ret;
-
-		dev_priv->next_seqno = 1;
-	}
-
-	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
-	return 0;
-}
-
-static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	dev_priv->gt.active_engines |= intel_engine_flag(engine);
-	if (dev_priv->gt.awake)
-		return;
-
-	intel_runtime_pm_get_noresume(dev_priv);
-	dev_priv->gt.awake = true;
-
-	intel_enable_gt_powersave(dev_priv);
-	i915_update_gfx_val(dev_priv);
-	if (INTEL_GEN(dev_priv) >= 6)
-		gen6_rps_busy(dev_priv);
-
-	queue_delayed_work(dev_priv->wq,
-			   &dev_priv->gt.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
-/*
- * NB: This function is not allowed to fail. Doing so would mean the the
- * request is not being tracked for completion but the work itself is
- * going to happen on the hardware. This would be a Bad Thing(tm).
- */
-void __i915_add_request(struct drm_i915_gem_request *request,
-			struct drm_i915_gem_object *obj,
-			bool flush_caches)
-{
-	struct intel_engine_cs *engine;
-	struct intel_ringbuffer *ringbuf;
-	u32 request_start;
-	u32 reserved_tail;
-	int ret;
-
-	if (WARN_ON(request == NULL))
-		return;
-
-	engine = request->engine;
-	ringbuf = request->ringbuf;
-
-	/*
-	 * To ensure that this call will not fail, space for its emissions
-	 * should already have been reserved in the ring buffer. Let the ring
-	 * know that it is time to use that space up.
-	 */
-	request_start = intel_ring_get_tail(ringbuf);
-	reserved_tail = request->reserved_space;
-	request->reserved_space = 0;
-
-	/*
-	 * Emit any outstanding flushes - execbuf can fail to emit the flush
-	 * after having emitted the batchbuffer command. Hence we need to fix
-	 * things up similar to emitting the lazy request. The difference here
-	 * is that the flush _must_ happen before the next request, no matter
-	 * what.
-	 */
-	if (flush_caches) {
-		if (i915.enable_execlists)
-			ret = logical_ring_flush_all_caches(request);
-		else
-			ret = intel_ring_flush_all_caches(request);
-		/* Not allowed to fail! */
-		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
-	}
-
-	trace_i915_gem_request_add(request);
-
-	request->head = request_start;
-
-	/* Whilst this request exists, batch_obj will be on the
-	 * active_list, and so will hold the active reference. Only when this
-	 * request is retired will the the batch_obj be moved onto the
-	 * inactive_list and lose its active reference. Hence we do not need
-	 * to explicitly hold another reference here.
-	 */
-	request->batch_obj = obj;
-
-	/* Seal the request and mark it as pending execution. Note that
-	 * we may inspect this state, without holding any locks, during
-	 * hangcheck. Hence we apply the barrier to ensure that we do not
-	 * see a more recent value in the hws than we are tracking.
-	 */
-	request->emitted_jiffies = jiffies;
-	request->previous_seqno = engine->last_submitted_seqno;
-	smp_store_mb(engine->last_submitted_seqno, request->seqno);
-	list_add_tail(&request->list, &engine->request_list);
-
-	/* Record the position of the start of the request so that
-	 * should we detect the updated seqno part-way through the
-	 * GPU processing the request, we never over-estimate the
-	 * position of the head.
-	 */
-	request->postfix = intel_ring_get_tail(ringbuf);
-
-	if (i915.enable_execlists)
-		ret = engine->emit_request(request);
-	else {
-		ret = engine->add_request(request);
-
-		request->tail = intel_ring_get_tail(ringbuf);
-	}
-	/* Not allowed to fail! */
-	WARN(ret, "emit|add_request failed: %d!\n", ret);
-	/* Sanity check that the reserved size was large enough. */
-	ret = intel_ring_get_tail(ringbuf) - request_start;
-	if (ret < 0)
-		ret += ringbuf->size;
-	WARN_ONCE(ret > reserved_tail,
-		  "Not enough space reserved (%d bytes) "
-		  "for adding the request (%d bytes)\n",
-		  reserved_tail, ret);
-
-	i915_gem_mark_busy(engine);
-}
-
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
 {
 	unsigned long elapsed;
@@ -2979,101 +2433,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx,
 	}
 }
 
-void i915_gem_request_free(struct kref *req_ref)
-{
-	struct drm_i915_gem_request *req = container_of(req_ref,
-						 typeof(*req), ref);
-	kmem_cache_free(req->i915->requests, req);
-}
-
-static inline int
-__i915_gem_request_alloc(struct intel_engine_cs *engine,
-			 struct i915_gem_context *ctx,
-			 struct drm_i915_gem_request **req_out)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	struct drm_i915_gem_request *req;
-	int ret;
-
-	if (!req_out)
-		return -EINVAL;
-
-	*req_out = NULL;
-
-	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
-	 * and restart.
-	 */
-	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
-	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
-	if (req == NULL)
-		return -ENOMEM;
-
-	ret = i915_gem_get_seqno(engine->i915, &req->seqno);
-	if (ret)
-		goto err;
-
-	kref_init(&req->ref);
-	req->i915 = dev_priv;
-	req->engine = engine;
-	req->ctx  = ctx;
-	i915_gem_context_reference(req->ctx);
-
-	/*
-	 * Reserve space in the ring buffer for all the commands required to
-	 * eventually emit this request. This is to guarantee that the
-	 * i915_add_request() call can't fail. Note that the reserve may need
-	 * to be redone if the request is not actually submitted straight
-	 * away, e.g. because a GPU scheduler has deferred it.
-	 */
-	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-
-	if (i915.enable_execlists)
-		ret = intel_logical_ring_alloc_request_extras(req);
-	else
-		ret = intel_ring_alloc_request_extras(req);
-	if (ret)
-		goto err_ctx;
-
-	*req_out = req;
-	return 0;
-
-err_ctx:
-	i915_gem_context_unreference(ctx);
-err:
-	kmem_cache_free(dev_priv->requests, req);
-	return ret;
-}
-
-/**
- * i915_gem_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *       This can be NULL if the request is not directly related to
- *       any specific user context, in which case this function will
- *       choose an appropriate context to use.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
-struct drm_i915_gem_request *
-i915_gem_request_alloc(struct intel_engine_cs *engine,
-		       struct i915_gem_context *ctx)
-{
-	struct drm_i915_gem_request *req;
-	int err;
-
-	if (ctx == NULL)
-		ctx = engine->i915->kernel_context;
-	err = __i915_gem_request_alloc(engine, ctx, &req);
-	return err ? ERR_PTR(err) : req;
-}
-
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
@@ -3147,14 +2506,14 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	 * implicit references on things like e.g. ppgtt address spaces through
 	 * the request.
 	 */
-	while (!list_empty(&engine->request_list)) {
+	if (!list_empty(&engine->request_list)) {
 		struct drm_i915_gem_request *request;
 
-		request = list_first_entry(&engine->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		request = list_last_entry(&engine->request_list,
+					  struct drm_i915_gem_request,
+					  list);
 
-		i915_gem_request_retire(request);
+		i915_gem_request_retire_upto(request);
 	}
 
 	/* Having flushed all requests from all queues, we know that all
@@ -3222,7 +2581,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 		if (!i915_gem_request_completed(request))
 			break;
 
-		i915_gem_request_retire(request);
+		i915_gem_request_retire_upto(request);
 	}
 
 	/* Move any buffers on the active list that are no longer referenced
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
new file mode 100644
index 000000000000..9e9aa6b725f7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+
+int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
+				   struct drm_file *file)
+{
+	struct drm_i915_private *dev_private;
+	struct drm_i915_file_private *file_priv;
+
+	WARN_ON(!req || !file || req->file_priv);
+
+	if (!req || !file)
+		return -EINVAL;
+
+	if (req->file_priv)
+		return -EINVAL;
+
+	dev_private = req->i915;
+	file_priv = file->driver_priv;
+
+	spin_lock(&file_priv->mm.lock);
+	req->file_priv = file_priv;
+	list_add_tail(&req->client_list, &file_priv->mm.request_list);
+	spin_unlock(&file_priv->mm.lock);
+
+	req->pid = get_pid(task_pid(current));
+
+	return 0;
+}
+
+static inline void
+i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+{
+	struct drm_i915_file_private *file_priv = request->file_priv;
+
+	if (!file_priv)
+		return;
+
+	spin_lock(&file_priv->mm.lock);
+	list_del(&request->client_list);
+	request->file_priv = NULL;
+	spin_unlock(&file_priv->mm.lock);
+
+	put_pid(request->pid);
+	request->pid = NULL;
+}
+
+static void i915_gem_request_retire(struct drm_i915_gem_request *request)
+{
+	trace_i915_gem_request_retire(request);
+	list_del_init(&request->list);
+
+	/* We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	request->ringbuf->last_retired_head = request->postfix;
+
+	i915_gem_request_remove_from_client(request);
+
+	if (request->previous_context) {
+		if (i915.enable_execlists)
+			intel_lr_context_unpin(request->previous_context,
+					       request->engine);
+	}
+
+	i915_gem_context_unreference(request->ctx);
+	i915_gem_request_unreference(request);
+}
+
+void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct drm_i915_gem_request *tmp;
+
+	lockdep_assert_held(&req->i915->drm.struct_mutex);
+
+	if (list_empty(&req->list))
+		return;
+
+	do {
+		tmp = list_first_entry(&engine->request_list,
+				       typeof(*tmp), list);
+
+		i915_gem_request_retire(tmp);
+	} while (tmp != req);
+
+	WARN_ON(i915_verify_lists(engine->dev));
+}
+
+static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+{
+	if (__i915_terminally_wedged(reset_counter))
+		return -EIO;
+
+	if (__i915_reset_in_progress(reset_counter)) {
+		/* Non-interruptible callers can't handle -EAGAIN, hence return
+		 * -EIO unconditionally for these.
+		 */
+		if (!interruptible)
+			return -EIO;
+
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
+{
+	struct intel_engine_cs *engine;
+	int ret;
+
+	/* Carefully retire all requests without writing to the rings */
+	for_each_engine(engine, dev_priv) {
+		ret = intel_engine_idle(engine);
+		if (ret)
+			return ret;
+	}
+	i915_gem_retire_requests(dev_priv);
+
+	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
+	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
+		while (intel_kick_waiters(dev_priv) ||
+		       intel_kick_signalers(dev_priv))
+			yield();
+	}
+
+	/* Finally reset hw state */
+	for_each_engine(engine, dev_priv)
+		intel_ring_init_seqno(engine, seqno);
+
+	return 0;
+}
+
+int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	int ret;
+
+	if (seqno == 0)
+		return -EINVAL;
+
+	/* HWS page needs to be set less than what we
+	 * will inject to ring
+	 */
+	ret = i915_gem_init_seqno(dev_priv, seqno - 1);
+	if (ret)
+		return ret;
+
+	/* Carefully set the last_seqno value so that wrap
+	 * detection still works
+	 */
+	dev_priv->next_seqno = seqno;
+	dev_priv->last_seqno = seqno - 1;
+	if (dev_priv->last_seqno == 0)
+		dev_priv->last_seqno--;
+
+	return 0;
+}
+
+static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
+{
+	/* reserve 0 for non-seqno */
+	if (unlikely(dev_priv->next_seqno == 0)) {
+		int ret;
+
+		ret = i915_gem_init_seqno(dev_priv, 0);
+		if (ret)
+			return ret;
+
+		dev_priv->next_seqno = 1;
+	}
+
+	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
+	return 0;
+}
+
+static inline int
+__i915_gem_request_alloc(struct intel_engine_cs *engine,
+			 struct i915_gem_context *ctx,
+			 struct drm_i915_gem_request **req_out)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
+	struct drm_i915_gem_request *req;
+	int ret;
+
+	if (!req_out)
+		return -EINVAL;
+
+	*req_out = NULL;
+
+	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
+	 * and restart.
+	 */
+	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+	if (ret)
+		return ret;
+
+	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = i915_gem_get_seqno(dev_priv, &req->seqno);
+	if (ret)
+		goto err;
+
+	kref_init(&req->ref);
+	req->i915 = dev_priv;
+	req->engine = engine;
+	req->ctx = ctx;
+	i915_gem_context_reference(ctx);
+
+	/*
+	 * Reserve space in the ring buffer for all the commands required to
+	 * eventually emit this request. This is to guarantee that the
+	 * i915_add_request() call can't fail. Note that the reserve may need
+	 * to be redone if the request is not actually submitted straight
+	 * away, e.g. because a GPU scheduler has deferred it.
+	 */
+	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
+
+	if (i915.enable_execlists)
+		ret = intel_logical_ring_alloc_request_extras(req);
+	else
+		ret = intel_ring_alloc_request_extras(req);
+	if (ret)
+		goto err_ctx;
+
+	*req_out = req;
+	return 0;
+
+err_ctx:
+	i915_gem_context_unreference(ctx);
+err:
+	kmem_cache_free(dev_priv->requests, req);
+	return ret;
+}
+
+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *       This can be NULL if the request is not directly related to
+ *       any specific user context, in which case this function will
+ *       choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+		       struct i915_gem_context *ctx)
+{
+	struct drm_i915_gem_request *req;
+	int err;
+
+	if (!ctx)
+		ctx = engine->i915->kernel_context;
+	err = __i915_gem_request_alloc(engine, ctx, &req);
+	return err ? ERR_PTR(err) : req;
+}
+
+static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	dev_priv->gt.active_engines |= intel_engine_flag(engine);
+	if (dev_priv->gt.awake)
+		return;
+
+	intel_runtime_pm_get_noresume(dev_priv);
+	dev_priv->gt.awake = true;
+
+	intel_enable_gt_powersave(dev_priv);
+	i915_update_gfx_val(dev_priv);
+	if (INTEL_GEN(dev_priv) >= 6)
+		gen6_rps_busy(dev_priv);
+
+	queue_delayed_work(dev_priv->wq,
+			   &dev_priv->gt.retire_work,
+			   round_jiffies_up_relative(HZ));
+}
+
+/*
+ * NB: This function is not allowed to fail. Doing so would mean the the
+ * request is not being tracked for completion but the work itself is
+ * going to happen on the hardware. This would be a Bad Thing(tm).
+ */
+void __i915_add_request(struct drm_i915_gem_request *request,
+			struct drm_i915_gem_object *obj,
+			bool flush_caches)
+{
+	struct intel_engine_cs *engine;
+	struct intel_ringbuffer *ringbuf;
+	u32 request_start;
+	u32 reserved_tail;
+	int ret;
+
+	if (WARN_ON(!request))
+		return;
+
+	engine = request->engine;
+	ringbuf = request->ringbuf;
+
+	/*
+	 * To ensure that this call will not fail, space for its emissions
+	 * should already have been reserved in the ring buffer. Let the ring
+	 * know that it is time to use that space up.
+	 */
+	request_start = intel_ring_get_tail(ringbuf);
+	reserved_tail = request->reserved_space;
+	request->reserved_space = 0;
+
+	/*
+	 * Emit any outstanding flushes - execbuf can fail to emit the flush
+	 * after having emitted the batchbuffer command. Hence we need to fix
+	 * things up similar to emitting the lazy request. The difference here
+	 * is that the flush _must_ happen before the next request, no matter
+	 * what.
+	 */
+	if (flush_caches) {
+		if (i915.enable_execlists)
+			ret = logical_ring_flush_all_caches(request);
+		else
+			ret = intel_ring_flush_all_caches(request);
+		/* Not allowed to fail! */
+		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
+	}
+
+	trace_i915_gem_request_add(request);
+
+	request->head = request_start;
+
+	/* Whilst this request exists, batch_obj will be on the
+	 * active_list, and so will hold the active reference. Only when this
+	 * request is retired will the the batch_obj be moved onto the
+	 * inactive_list and lose its active reference. Hence we do not need
+	 * to explicitly hold another reference here.
+	 */
+	request->batch_obj = obj;
+
+	/* Seal the request and mark it as pending execution. Note that
+	 * we may inspect this state, without holding any locks, during
+	 * hangcheck. Hence we apply the barrier to ensure that we do not
+	 * see a more recent value in the hws than we are tracking.
+	 */
+	request->emitted_jiffies = jiffies;
+	request->previous_seqno = engine->last_submitted_seqno;
+	smp_store_mb(engine->last_submitted_seqno, request->seqno);
+	list_add_tail(&request->list, &engine->request_list);
+
+	/* Record the position of the start of the request so that
+	 * should we detect the updated seqno part-way through the
+	 * GPU processing the request, we never over-estimate the
+	 * position of the head.
+	 */
+	request->postfix = intel_ring_get_tail(ringbuf);
+
+	if (i915.enable_execlists) {
+		ret = engine->emit_request(request);
+	} else {
+		ret = engine->add_request(request);
+
+		request->tail = intel_ring_get_tail(ringbuf);
+	}
+	/* Not allowed to fail! */
+	WARN(ret, "emit|add_request failed: %d!\n", ret);
+	/* Sanity check that the reserved size was large enough. */
+	ret = intel_ring_get_tail(ringbuf) - request_start;
+	if (ret < 0)
+		ret += ringbuf->size;
+	WARN_ONCE(ret > reserved_tail,
+		  "Not enough space reserved (%d bytes) "
+		  "for adding the request (%d bytes)\n",
+		  reserved_tail, ret);
+
+	i915_gem_mark_busy(engine);
+}
+
+static unsigned long local_clock_us(unsigned int *cpu)
+{
+	unsigned long t;
+
+	/* Cheaply and approximately convert from nanoseconds to microseconds.
+	 * The result and subsequent calculations are also defined in the same
+	 * approximate microseconds units. The principal source of timing
+	 * error here is from the simple truncation.
+	 *
+	 * Note that local_clock() is only defined wrt to the current CPU;
+	 * the comparisons are no longer valid if we switch CPUs. Instead of
+	 * blocking preemption for the entire busywait, we can detect the CPU
+	 * switch and use that as indicator of system load and a reason to
+	 * stop busywaiting, see busywait_stop().
+	 */
+	*cpu = get_cpu();
+	t = local_clock() >> 10;
+	put_cpu();
+
+	return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned int cpu)
+{
+	unsigned int this_cpu;
+
+	if (time_after(local_clock_us(&this_cpu), timeout))
+		return true;
+
+	return this_cpu != cpu;
+}
+
+bool __i915_spin_request(const struct drm_i915_gem_request *req,
+			 int state, unsigned long timeout_us)
+{
+	unsigned int cpu;
+
+	/* When waiting for high frequency requests, e.g. during synchronous
+	 * rendering split between the CPU and GPU, the finite amount of time
+	 * required to set up the irq and wait upon it limits the response
+	 * rate. By busywaiting on the request completion for a short while we
+	 * can service the high frequency waits as quick as possible. However,
+	 * if it is a slow request, we want to sleep as quickly as possible.
+	 * The tradeoff between waiting and sleeping is roughly the time it
+	 * takes to sleep on a request, on the order of a microsecond.
+	 */
+
+	timeout_us += local_clock_us(&cpu);
+	do {
+		if (i915_gem_request_completed(req))
+			return true;
+
+		if (signal_pending_state(state, current))
+			break;
+
+		if (busywait_stop(timeout_us, cpu))
+			break;
+
+		cpu_relax_lowlatency();
+	} while (!need_resched());
+
+	return false;
+}
+
+/**
+ * __i915_wait_request - wait until execution of request has finished
+ * @req: duh!
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout: in - how long to wait (NULL forever); out - how much time remaining
+ * @rps: client to charge for RPS boosting
+ *
+ * Note: It is of utmost importance that the passed in seqno and reset_counter
+ * values have been read by the caller in an smp safe manner. Where read-side
+ * locks are involved, it is sufficient to read the reset_counter before
+ * unlocking the lock that protects the seqno. For lockless tricks, the
+ * reset_counter _must_ be read before, and an appropriate smp_rmb must be
+ * inserted.
+ *
+ * Returns 0 if the request was found within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
+int __i915_wait_request(struct drm_i915_gem_request *req,
+			bool interruptible,
+			s64 *timeout,
+			struct intel_rps_client *rps)
+{
+	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	DEFINE_WAIT(reset);
+	struct intel_wait wait;
+	unsigned long timeout_remain;
+	int ret = 0;
+
+	might_sleep();
+
+	if (list_empty(&req->list))
+		return 0;
+
+	if (i915_gem_request_completed(req))
+		return 0;
+
+	timeout_remain = MAX_SCHEDULE_TIMEOUT;
+	if (timeout) {
+		if (WARN_ON(*timeout < 0))
+			return -EINVAL;
+
+		if (*timeout == 0)
+			return -ETIME;
+
+		/* Record current time in case interrupted, or wedged */
+		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
+		*timeout += ktime_get_raw_ns();
+	}
+
+	trace_i915_gem_request_wait_begin(req);
+
+	/* This client is about to stall waiting for the GPU. In many cases
+	 * this is undesirable and limits the throughput of the system, as
+	 * many clients cannot continue processing user input/output whilst
+	 * blocked. RPS autotuning may take tens of milliseconds to respond
+	 * to the GPU load and thus incurs additional latency for the client.
+	 * We can circumvent that by promoting the GPU frequency to maximum
+	 * before we wait. This makes the GPU throttle up much more quickly
+	 * (good for benchmarks and user experience, e.g. window animations),
+	 * but at a cost of spending more power processing the workload
+	 * (bad for battery). Not all clients even want their results
+	 * immediately and for them we should just let the GPU select its own
+	 * frequency to maximise efficiency. To prevent a single client from
+	 * forcing the clocks too high for the whole system, we only allow
+	 * each client to waitboost once in a busy period.
+	 */
+	if (INTEL_GEN(req->i915) >= 6)
+		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
+
+	/* Optimistic spin for the next ~jiffie before touching IRQs */
+	if (i915_spin_request(req, state, 5))
+		goto complete;
+
+	set_current_state(state);
+	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+
+	intel_wait_init(&wait, req->seqno);
+	if (intel_engine_add_wait(req->engine, &wait))
+		/* In order to check that we haven't missed the interrupt
+		 * as we enabled it, we need to kick ourselves to do a
+		 * coherent check on the seqno before we sleep.
+		 */
+		goto wakeup;
+
+	for (;;) {
+		if (signal_pending_state(state, current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		timeout_remain = io_schedule_timeout(timeout_remain);
+		if (timeout_remain == 0) {
+			ret = -ETIME;
+			break;
+		}
+
+		if (intel_wait_complete(&wait))
+			break;
+
+		set_current_state(state);
+
+wakeup:
+		/* Carefully check if the request is complete, giving time
+		 * for the seqno to be visible following the interrupt.
+		 * We also have to check in case we are kicked by the GPU
+		 * reset in order to drop the struct_mutex.
+		 */
+		if (__i915_request_irq_complete(req))
+			break;
+
+		/* Only spin if we know the GPU is processing this request */
+		if (i915_spin_request(req, state, 2))
+			break;
+	}
+	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+
+	intel_engine_remove_wait(req->engine, &wait);
+	__set_current_state(TASK_RUNNING);
+complete:
+	trace_i915_gem_request_wait_end(req);
+
+	if (timeout) {
+		*timeout -= ktime_get_raw_ns();
+		if (*timeout < 0)
+			*timeout = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regrssion from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
+			*timeout = 0;
+	}
+
+	if (rps && req->seqno == req->engine->last_submitted_seqno) {
+		/* The GPU is now idle and this client has stalled.
+		 * Since no other client has submitted a request in the
+		 * meantime, assume that this client is the only one
+		 * supplying work to the GPU but is unable to keep that
+		 * work supplied because it is waiting. Since the GPU is
+		 * then never kept fully busy, RPS autoclocking will
+		 * keep the clocks relatively low, causing further delays.
+		 * Compensate by giving the synchronous client credit for
+		 * a waitboost next time.
+		 */
+		spin_lock(&req->i915->rps.client_lock);
+		list_del_init(&rps->link);
+		spin_unlock(&req->i915->rps.client_lock);
+	}
+
+	return ret;
+}
+
+/**
+ * Waits for a request to be signaled, and cleans up the
+ * request and object lists appropriately for that event.
+ */
+int i915_wait_request(struct drm_i915_gem_request *req)
+{
+	int ret;
+
+	GEM_BUG_ON(!req);
+	lockdep_assert_held(&req->i915->drm.struct_mutex);
+
+	ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL);
+	if (ret)
+		return ret;
+
+	/* If the GPU hung, we want to keep the requests to find the guilty. */
+	if (!i915_reset_in_progress(&req->i915->gpu_error))
+		i915_gem_request_retire_upto(req);
+
+	return 0;
+}
+
+void i915_gem_request_free(struct kref *req_ref)
+{
+	struct drm_i915_gem_request *req =
+		container_of(req_ref, typeof(*req), ref);
+	kmem_cache_free(req->i915->requests, req);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
new file mode 100644
index 000000000000..ea700befcc28
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_GEM_REQUEST_H
+#define I915_GEM_REQUEST_H
+
+/**
+ * Request queue structure.
+ *
+ * The request queue allows us to note sequence numbers that have been emitted
+ * and may be associated with active buffers to be retired.
+ *
+ * By keeping this list, we can avoid having to do questionable sequence
+ * number comparisons on buffer last_read|write_seqno. It also allows an
+ * emission time to be associated with the request for tracking how far ahead
+ * of the GPU the submission is.
+ *
+ * The requests are reference counted, so upon creation they should have an
+ * initial reference taken using kref_init
+ */
+struct drm_i915_gem_request {
+	struct kref ref;
+
+	/** On Which ring this request was generated */
+	struct drm_i915_private *i915;
+
+	/**
+	 * Context and ring buffer related to this request
+	 * Contexts are refcounted, so when this request is associated with a
+	 * context, we must increment the context's refcount, to guarantee that
+	 * it persists while any request is linked to it. Requests themselves
+	 * are also refcounted, so the request will only be freed when the last
+	 * reference to it is dismissed, and the code in
+	 * i915_gem_request_free() will then decrement the refcount on the
+	 * context.
+	 */
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs *engine;
+	struct intel_ringbuffer *ringbuf;
+	struct intel_signal_node signaling;
+
+	/** GEM sequence number associated with the previous request,
+	 * when the HWS breadcrumb is equal to this the GPU is processing
+	 * this request.
+	 */
+	u32 previous_seqno;
+
+	/** GEM sequence number associated with this request,
+	 * when the HWS breadcrumb is equal or greater than this the GPU
+	 * has finished processing this request.
+	 */
+	u32 seqno;
+
+	/** Position in the ringbuffer of the start of the request */
+	u32 head;
+
+	/**
+	 * Position in the ringbuffer of the start of the postfix.
+	 * This is required to calculate the maximum available ringbuffer
+	 * space without overwriting the postfix.
+	 */
+	u32 postfix;
+
+	/** Position in the ringbuffer of the end of the whole request */
+	u32 tail;
+
+	/** Preallocate space in the ringbuffer for the emitting the request */
+	u32 reserved_space;
+
+	/**
+	 * Context related to the previous request.
+	 * As the contexts are accessed by the hardware until the switch is
+	 * completed to a new context, the hardware may still be writing
+	 * to the context object after the breadcrumb is visible. We must
+	 * not unpin/unbind/prune that object whilst still active and so
+	 * we keep the previous context pinned until the following (this)
+	 * request is retired.
+	 */
+	struct i915_gem_context *previous_context;
+
+	/** Batch buffer related to this request if any (used for
+	 * error state dump only).
+	 */
+	struct drm_i915_gem_object *batch_obj;
+
+	/** Time at which this request was emitted, in jiffies. */
+	unsigned long emitted_jiffies;
+
+	/** global list entry for this request */
+	struct list_head list;
+
+	struct drm_i915_file_private *file_priv;
+	/** file_priv list entry for this request */
+	struct list_head client_list;
+
+	/** process identifier submitting this request */
+	struct pid *pid;
+
+	/**
+	 * The ELSP only accepts two elements at a time, so we queue
+	 * context/tail pairs on a given queue (ring->execlist_queue) until the
+	 * hardware is available. The queue serves a double purpose: we also use
+	 * it to keep track of the up to 2 contexts currently in the hardware
+	 * (usually one in execution and the other queued up by the GPU): We
+	 * only remove elements from the head of the queue when the hardware
+	 * informs us that an element has been completed.
+	 *
+	 * All accesses to the queue are mediated by a spinlock
+	 * (ring->execlist_lock).
+	 */
+
+	/** Execlist link in the submission queue.*/
+	struct list_head execlist_link;
+
+	/** Execlists no. of times this request has been sent to the ELSP */
+	int elsp_submitted;
+
+	/** Execlists context hardware id. */
+	unsigned int ctx_hw_id;
+};
+
+struct drm_i915_gem_request * __must_check
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+		       struct i915_gem_context *ctx);
+void i915_gem_request_free(struct kref *req_ref);
+int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
+				   struct drm_file *file);
+void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
+
+static inline u32
+i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
+{
+	return req ? req->seqno : 0;
+}
+
+static inline struct intel_engine_cs *
+i915_gem_request_get_engine(struct drm_i915_gem_request *req)
+{
+	return req ? req->engine : NULL;
+}
+
+static inline struct drm_i915_gem_request *
+i915_gem_request_reference(struct drm_i915_gem_request *req)
+{
+	if (req)
+		kref_get(&req->ref);
+	return req;
+}
+
+static inline void
+i915_gem_request_unreference(struct drm_i915_gem_request *req)
+{
+	kref_put(&req->ref, i915_gem_request_free);
+}
+
+static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
+					   struct drm_i915_gem_request *src)
+{
+	if (src)
+		i915_gem_request_reference(src);
+
+	if (*pdst)
+		i915_gem_request_unreference(*pdst);
+
+	*pdst = src;
+}
+
+void __i915_add_request(struct drm_i915_gem_request *req,
+			struct drm_i915_gem_object *batch_obj,
+			bool flush_caches);
+#define i915_add_request(req) \
+	__i915_add_request(req, NULL, true)
+#define i915_add_request_no_flush(req) \
+	__i915_add_request(req, NULL, false)
+
+struct intel_rps_client;
+
+int __i915_wait_request(struct drm_i915_gem_request *req,
+			bool interruptible,
+			s64 *timeout,
+			struct intel_rps_client *rps);
+int __must_check i915_wait_request(struct drm_i915_gem_request *req);
+
+static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
+
+/**
+ * Returns true if seq1 is later than seq2.
+ */
+static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
+{
+	return (s32)(seq1 - seq2) >= 0;
+}
+
+static inline bool
+i915_gem_request_started(const struct drm_i915_gem_request *req)
+{
+	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
+				 req->previous_seqno);
+}
+
+static inline bool
+i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
+				 req->seqno);
+}
+
+bool __i915_spin_request(const struct drm_i915_gem_request *request,
+			 int state, unsigned long timeout_us);
+static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
+				     int state, unsigned long timeout_us)
+{
+	return (i915_gem_request_started(request) &&
+		__i915_spin_request(request, state, timeout_us));
+}
+
+#endif /* I915_GEM_REQUEST_H */
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/9] drm/i915: Retire oldest completed request before allocating next
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
  2016-07-15 10:11 ` [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend Chris Wilson
  2016-07-15 10:11 ` [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 10:11 ` [PATCH 4/9] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

In order to keep the memory allocated for requests reasonably tight, try
to reuse the oldest request (so long as it is completed and has no
external references) for the next allocation.

v2: Throw in a comment to hopefully make sure no one mistakes the
optimistic retirement of the oldest request for simply stealing it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9e9aa6b725f7..5cbb11ece60a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -226,6 +226,14 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		return ret;
 
+	/* Move the oldest request to the slab-cache (if not in use!) */
+	if (!list_empty(&engine->request_list)) {
+		req = list_first_entry(&engine->request_list,
+				       typeof(*req), list);
+		if (i915_gem_request_completed(req))
+			i915_gem_request_retire(req);
+	}
+
 	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
 	if (!req)
 		return -ENOMEM;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/9] drm/i915: Mark all current requests as complete before resetting them
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
                   ` (2 preceding siblings ...)
  2016-07-15 10:11 ` [PATCH 3/9] drm/i915: Retire oldest completed request before allocating next Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 10:11 ` [PATCH 5/9] drm/i915: Derive GEM requests from dma-fence Chris Wilson
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

Following a GPU reset upon hang, we retire all the requests and then
mark them all as complete. If we mark them as complete first, we both
keep the normal retirement order (completed first then retired) and
provide a small optimisation for concurrent lookups.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 10e5db3f294e..40cc3da68611 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2486,6 +2486,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 		i915_gem_object_retire__read(obj, engine->id);
 	}
 
+	/* Mark all pending requests as complete so that any concurrent
+	 * (lockless) lookup doesn't try and wait upon the request as we
+	 * reset it.
+	 */
+	intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+
 	/*
 	 * Clear the execlists queue up before freeing the requests, as those
 	 * are the ones that keep the context and ringbuffer backing objects
@@ -2528,8 +2534,6 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 		intel_ring_update_space(buffer);
 	}
 
-	intel_ring_init_seqno(engine, engine->last_submitted_seqno);
-
 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 5/9] drm/i915: Derive GEM requests from dma-fence
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
                   ` (3 preceding siblings ...)
  2016-07-15 10:11 ` [PATCH 4/9] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 10:11 ` [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Jesse Barnes

dma-buf provides a generic fence class for interoperation between
drivers. Internally we use the request structure as a fence, and so with
only a little bit of interfacing we can rebase those requests on top of
dma-buf fences. This will allow us, in the future, to pass those fences
back to userspace or between drivers.

v2: The fence_context needs to be globally unique, not just unique to
this device.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 113 ++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.h    |  43 +++++++----
 drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |   4 +-
 drivers/gpu/drm/i915/i915_trace.h          |  10 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |   7 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     |   2 +
 drivers/gpu/drm/i915/intel_lrc.c           |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  10 +--
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   1 +
 11 files changed, 150 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 90aef4540193..55fd3d9cc448 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 			if (req->pid)
 				task = pid_task(req->pid, PIDTYPE_PID);
 			seq_printf(m, "    %x @ %d: %s [%d]\n",
-				   req->seqno,
+				   req->fence.seqno,
 				   (int) (jiffies - req->emitted_jiffies),
 				   task ? task->comm : "<unknown>",
 				   task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5cbb11ece60a..6528536878f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,95 @@
 
 #include "i915_drv.h"
 
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_fence_get_timeline_name(struct fence *fence)
+{
+	/* Timelines are bound by eviction to a VM. However, since
+	 * we only have a global seqno at the moment, we only have
+	 * a single timeline. Note that each timeline will have
+	 * multiple execution contexts (fence contexts) as we allow
+	 * engines within a single timeline to execute in parallel.
+	 */
+	return "global";
+}
+
+static bool i915_fence_signaled(struct fence *fence)
+{
+	return i915_gem_request_completed(to_request(fence));
+}
+
+static bool i915_fence_enable_signaling(struct fence *fence)
+{
+	if (i915_fence_signaled(fence))
+		return false;
+
+	intel_engine_enable_signaling(to_request(fence));
+	return true;
+}
+
+static signed long i915_fence_wait(struct fence *fence,
+				   bool interruptible,
+				   signed long timeout_jiffies)
+{
+	s64 timeout_ns, *timeout;
+	int ret;
+
+	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+		timeout_ns = jiffies_to_nsecs(timeout_jiffies);
+		timeout = &timeout_ns;
+	} else {
+		timeout = NULL;
+	}
+
+	ret = __i915_wait_request(to_request(fence),
+				  interruptible, timeout,
+				  NULL);
+	if (ret == -ETIME)
+		return 0;
+
+	if (ret < 0)
+		return ret;
+
+	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
+		timeout_jiffies = nsecs_to_jiffies(timeout_ns);
+
+	return timeout_jiffies;
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+					  int size)
+{
+	snprintf(str, size, "%u",
+		 intel_engine_get_seqno(to_request(fence)->engine));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_request(fence);
+
+	kmem_cache_free(req->i915->requests, req);
+}
+
+const struct fence_ops i915_fence_ops = {
+	.get_driver_name = i915_fence_get_driver_name,
+	.get_timeline_name = i915_fence_get_timeline_name,
+	.enable_signaling = i915_fence_enable_signaling,
+	.signaled = i915_fence_signaled,
+	.wait = i915_fence_wait,
+	.release = i915_fence_release,
+	.fence_value_str = i915_fence_value_str,
+	.timeline_value_str = i915_fence_timeline_value_str,
+};
+
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
 				   struct drm_file *file)
 {
@@ -211,6 +300,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
+	u32 seqno;
 	int ret;
 
 	if (!req_out)
@@ -238,11 +328,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (!req)
 		return -ENOMEM;
 
-	ret = i915_gem_get_seqno(dev_priv, &req->seqno);
+	ret = i915_gem_get_seqno(dev_priv, &seqno);
 	if (ret)
 		goto err;
 
-	kref_init(&req->ref);
+	spin_lock_init(&req->lock);
+	fence_init(&req->fence,
+		   &i915_fence_ops,
+		   &req->lock,
+		   engine->fence_context,
+		   seqno);
+
 	req->i915 = dev_priv;
 	req->engine = engine;
 	req->ctx = ctx;
@@ -385,7 +481,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	 */
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
-	smp_store_mb(engine->last_submitted_seqno, request->seqno);
+	smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
 	list_add_tail(&request->list, &engine->request_list);
 
 	/* Record the position of the start of the request so that
@@ -556,7 +652,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	set_current_state(state);
 	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
-	intel_wait_init(&wait, req->seqno);
+	intel_wait_init(&wait, req->fence.seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
@@ -617,7 +713,7 @@ complete:
 			*timeout = 0;
 	}
 
-	if (rps && req->seqno == req->engine->last_submitted_seqno) {
+	if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -657,10 +753,3 @@ int i915_wait_request(struct drm_i915_gem_request *req)
 
 	return 0;
 }
-
-void i915_gem_request_free(struct kref *req_ref)
-{
-	struct drm_i915_gem_request *req =
-		container_of(req_ref, typeof(*req), ref);
-	kmem_cache_free(req->i915->requests, req);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ea700befcc28..6f2c820785f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -25,6 +25,10 @@
 #ifndef I915_GEM_REQUEST_H
 #define I915_GEM_REQUEST_H
 
+#include <linux/fence.h>
+
+#include "i915_gem.h"
+
 /**
  * Request queue structure.
  *
@@ -36,11 +40,11 @@
  * emission time to be associated with the request for tracking how far ahead
  * of the GPU the submission is.
  *
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
+ * The requests are reference counted.
  */
 struct drm_i915_gem_request {
-	struct kref ref;
+	struct fence fence;
+	spinlock_t lock;
 
 	/** On Which ring this request was generated */
 	struct drm_i915_private *i915;
@@ -66,12 +70,6 @@ struct drm_i915_gem_request {
 	 */
 	u32 previous_seqno;
 
-	/** GEM sequence number associated with this request,
-	 * when the HWS breadcrumb is equal or greater than this the GPU
-	 * has finished processing this request.
-	 */
-	u32 seqno;
-
 	/** Position in the ringbuffer of the start of the request */
 	u32 head;
 
@@ -140,10 +138,16 @@ struct drm_i915_gem_request {
 	unsigned int ctx_hw_id;
 };
 
+extern const struct fence_ops i915_fence_ops;
+
+static inline bool fence_is_i915(struct fence *fence)
+{
+	return fence->ops == &i915_fence_ops;
+}
+
 struct drm_i915_gem_request * __must_check
 i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx);
-void i915_gem_request_free(struct kref *req_ref);
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
 				   struct drm_file *file);
 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
@@ -151,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
 static inline u32
 i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
 {
-	return req ? req->seqno : 0;
+	return req ? req->fence.seqno : 0;
 }
 
 static inline struct intel_engine_cs *
@@ -161,17 +165,24 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req)
 }
 
 static inline struct drm_i915_gem_request *
+to_request(struct fence *fence)
+{
+	/* We assume that NULL fence/request are interoperable */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
+	GEM_BUG_ON(fence && !fence_is_i915(fence));
+	return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static inline struct drm_i915_gem_request *
 i915_gem_request_reference(struct drm_i915_gem_request *req)
 {
-	if (req)
-		kref_get(&req->ref);
-	return req;
+	return to_request(fence_get(&req->fence));
 }
 
 static inline void
 i915_gem_request_unreference(struct drm_i915_gem_request *req)
 {
-	kref_put(&req->ref, i915_gem_request_free);
+	fence_put(&req->fence);
 }
 
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
@@ -223,7 +234,7 @@ static inline bool
 i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->seqno);
+				 req->fence.seqno);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9d73d2216adc..6daaf4ecd2da 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1182,7 +1182,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			}
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
+			erq->seqno = request->fence.seqno;
 			erq->jiffies = request->emitted_jiffies;
 			erq->tail = request->postfix;
 		}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 2112e029db6a..1cc5de129e76 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -506,7 +506,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
 							     rq->engine);
 
 	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
-	wqi->fence_id = rq->seqno;
+	wqi->fence_id = rq->fence.seqno;
 
 	kunmap_atomic(base);
 }
@@ -601,7 +601,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
 		client->b_fail += 1;
 
 	guc->submissions[engine_id] += 1;
-	guc->last_seqno[engine_id] = rq->seqno;
+	guc->last_seqno[engine_id] = rq->fence.seqno;
 
 	return b_ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 534154e05fbe..007112d1e049 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
 			   __entry->dev = from->i915->drm.primary->index;
 			   __entry->sync_from = from->id;
 			   __entry->sync_to = to_req->engine->id;
-			   __entry->seqno = i915_gem_request_get_seqno(req);
+			   __entry->seqno = req->fence.seqno;
 			   ),
 
 	    TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->seqno;
+			   __entry->seqno = req->fence.seqno;
 			   __entry->flags = flags;
-			   intel_engine_enable_signaling(req);
+			   fence_enable_sw_signaling(&req->fence);
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
@@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->seqno;
+			   __entry->seqno = req->fence.seqno;
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->seqno;
+			   __entry->seqno = req->fence.seqno;
 			   __entry->blocking =
 				     mutex_is_locked(&req->i915->drm.struct_mutex);
 			   ),
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index b074f3d6d127..32ada41dedfb 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -436,6 +436,7 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			intel_engine_remove_wait(engine,
 						 &request->signaling.wait);
+			fence_signal(&request->fence);
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
@@ -482,7 +483,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	}
 
 	request->signaling.wait.tsk = b->signaler;
-	request->signaling.wait.seqno = request->seqno;
+	request->signaling.wait.seqno = request->fence.seqno;
 	i915_gem_request_reference(request);
 
 	/* First add ourselves into the list of waiters, but register our
@@ -504,8 +505,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	p = &b->signals.rb_node;
 	while (*p) {
 		parent = *p;
-		if (i915_seqno_passed(request->seqno,
-				      to_signaler(parent)->seqno)) {
+		if (i915_seqno_passed(request->fence.seqno,
+				      to_signaler(parent)->fence.seqno)) {
 			p = &parent->rb_right;
 			first = false;
 		} else {
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e3c9f04ea51e..f4a35ec78481 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -182,6 +182,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
 
+	engine->fence_context = fence_context_alloc(1);
+
 	intel_engine_init_hangcheck(engine);
 	i915_gem_batch_pool_init(&engine->i915->drm, &engine->batch_pool);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b6af635e3a0f..b82c12e38e4b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1827,7 +1827,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
 				intel_hws_seqno_address(request->engine) |
 				MI_FLUSH_DW_USE_GTT);
 	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, request->seqno);
+	intel_logical_ring_emit(ringbuf, request->fence.seqno);
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	return intel_logical_ring_advance_and_submit(request);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 94c8ef461721..af0bd71e3a36 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1348,7 +1348,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
 					   PIPE_CONTROL_CS_STALL);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller_req->seqno);
+		intel_ring_emit(signaller, signaller_req->fence.seqno);
 		intel_ring_emit(signaller, 0);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->hw_id));
@@ -1386,7 +1386,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller_req->seqno);
+		intel_ring_emit(signaller, signaller_req->fence.seqno);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->hw_id));
 		intel_ring_emit(signaller, 0);
@@ -1419,7 +1419,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req,
 		if (i915_mmio_reg_valid(mbox_reg)) {
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit_reg(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller_req->seqno);
+			intel_ring_emit(signaller, signaller_req->fence.seqno);
 		}
 	}
 
@@ -1455,7 +1455,7 @@ gen6_add_request(struct drm_i915_gem_request *req)
 	intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(engine,
 			I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(engine, req->seqno);
+	intel_ring_emit(engine, req->fence.seqno);
 	intel_ring_emit(engine, MI_USER_INTERRUPT);
 	__intel_ring_advance(engine);
 
@@ -1704,7 +1704,7 @@ i9xx_add_request(struct drm_i915_gem_request *req)
 	intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(engine,
 			I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(engine, req->seqno);
+	intel_ring_emit(engine, req->fence.seqno);
 	intel_ring_emit(engine, MI_USER_INTERRUPT);
 	__intel_ring_advance(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index df7587ab1ba7..5cbafc00bfd5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -146,6 +146,7 @@ struct intel_engine_cs {
 	unsigned int exec_id;
 	unsigned int hw_id;
 	unsigned int guc_id; /* XXX same as hw_id? */
+	u64 fence_context;
 	u32		mmio_base;
 	unsigned int irq_shift;
 	struct intel_ringbuffer *buffer;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
                   ` (4 preceding siblings ...)
  2016-07-15 10:11 ` [PATCH 5/9] drm/i915: Derive GEM requests from dma-fence Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 10:47   ` Joonas Lahtinen
                     ` (2 more replies)
  2016-07-15 10:11 ` [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
                   ` (3 subsequent siblings)
  9 siblings, 3 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

We want to restrict waitboosting to known process contexts, where we can
track which clients are receiving waitboosts and prevent excessive power
wasting. For fence_wait() we do not have any client tracking and so that
leaves it open to abuse.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++---
 drivers/gpu/drm/i915/i915_gem_request.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6528536878f2..bfec4a88707d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -70,7 +70,7 @@ static signed long i915_fence_wait(struct fence *fence,
 
 	ret = __i915_wait_request(to_request(fence),
 				  interruptible, timeout,
-				  NULL);
+				  NO_WAITBOOST);
 	if (ret == -ETIME)
 		return 0;
 
@@ -642,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	 * forcing the clocks too high for the whole system, we only allow
 	 * each client to waitboost once in a busy period.
 	 */
-	if (INTEL_GEN(req->i915) >= 6)
+	if (!IS_ERR(rps) && INTEL_GEN(req->i915) >= 6)
 		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
 	/* Optimistic spin for the next ~jiffie before touching IRQs */
@@ -713,7 +713,8 @@ complete:
 			*timeout = 0;
 	}
 
-	if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
+	if (!IS_ERR_OR_NULL(rps) &&
+	    req->fence.seqno == req->engine->last_submitted_seqno) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6f2c820785f3..f7f497a07893 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -206,6 +206,7 @@ void __i915_add_request(struct drm_i915_gem_request *req,
 	__i915_add_request(req, NULL, false)
 
 struct intel_rps_client;
+#define NO_WAITBOOST ERR_PTR(-1)
 
 int __i915_wait_request(struct drm_i915_gem_request *req,
 			bool interruptible,
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
                   ` (5 preceding siblings ...)
  2016-07-15 10:11 ` [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 11:08   ` Mika Kuoppala
  2016-07-15 12:26   ` Mika Kuoppala
  2016-07-15 10:11 ` [PATCH 8/9] drm/i915: Wait on external rendering for GEM objects Chris Wilson
                   ` (2 subsequent siblings)
  9 siblings, 2 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

Since commit a6f766f39751 ("drm/i915: Limit ring synchronisation (sw
sempahores) RPS boosts") and commit bcafc4e38b6a ("drm/i915: Limit mmio
flip RPS boosts") we have limited the waitboosting for semaphores and
flips. Ideally we do not want to boost in either of these instances as no
consumer is waiting upon the results. With the introduction of NO_WAITBOOST
in the previous patch, we can finally disable these needless boosts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 8 +-------
 drivers/gpu/drm/i915/i915_drv.h      | 2 --
 drivers/gpu/drm/i915/i915_gem.c      | 2 +-
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 drivers/gpu/drm/i915/intel_pm.c      | 2 --
 5 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 55fd3d9cc448..618f8cf210fc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2465,13 +2465,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 			   list_empty(&file_priv->rps.link) ? "" : ", active");
 		rcu_read_unlock();
 	}
-	seq_printf(m, "Semaphore boosts: %d%s\n",
-		   dev_priv->rps.semaphores.boosts,
-		   list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
-	seq_printf(m, "MMIO flip boosts: %d%s\n",
-		   dev_priv->rps.mmioflips.boosts,
-		   list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
-	seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+	seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
 	spin_unlock(&dev_priv->rps.client_lock);
 	mutex_unlock(&dev->filelist_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a4767c198413..08d9c081e9c8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1195,8 +1195,6 @@ struct intel_gen6_power_mgmt {
 	struct delayed_work autoenable_work;
 	unsigned boosts;
 
-	struct intel_rps_client semaphores, mmioflips;
-
 	/* manual wa residency calculations */
 	struct intel_rps_ei up_ei, down_ei;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 40cc3da68611..2ec78aa7279e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2849,7 +2849,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		ret = __i915_wait_request(from_req,
 					  i915->mm.interruptible,
 					  NULL,
-					  &i915->rps.semaphores);
+					  NO_WAITBOOST);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9337d3a7b83d..813c9c3b32ca 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11473,7 +11473,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 	if (work->flip_queued_req)
 		WARN_ON(__i915_wait_request(work->flip_queued_req,
 					    false, NULL,
-					    &dev_priv->rps.mmioflips));
+					    NO_WAITBOOST));
 
 	/* For framebuffer backed by dmabuf, wait for fence */
 	resv = i915_gem_object_get_dmabuf_resv(obj);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fa6b341c2792..a1bf5f8fbb1c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7810,8 +7810,6 @@ void intel_pm_setup(struct drm_device *dev)
 	INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
 			  __intel_autoenable_gt_powersave);
 	INIT_LIST_HEAD(&dev_priv->rps.clients);
-	INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
-	INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
 
 	dev_priv->pm.suspended = false;
 	atomic_set(&dev_priv->pm.wakeref_count, 0);
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 8/9] drm/i915: Wait on external rendering for GEM objects
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
                   ` (6 preceding siblings ...)
  2016-07-15 10:11 ` [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-18 10:18   ` Chris Wilson
  2016-07-15 10:11 ` [PATCH 9/9] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
  2016-07-15 10:48 ` ✗ Ro.CI.BAT: failure for series starting with [1/9] drm/i915: Flush logical context image out to memory upon suspend Patchwork
  9 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

When transitioning to the GTT or CPU domain we wait on all rendering
from i915 to complete (with the optimisation of allowing concurrent read
access by both the GPU and client). We don't yet ensure all rendering
from third parties (tracked by implicit fences on the dma-buf) is
complete. Since implicitly tracked rendering by third parties will
ignore our cache-domain tracking, we have to always wait upon rendering
from third-parties when transitioning to direct access to the backing
store. We still rely on clients notifying us of cache domain changes
(i.e. they need to move to the GTT read or write domain after doing a CPU
access before letting the third party render again).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 42 ++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2ec78aa7279e..c9ff9e3f348a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,10 +29,12 @@
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_dmabuf.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_mocs.h"
+#include <linux/reservation.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
@@ -511,6 +513,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
 		return -EINVAL;
 
+	ret = i915_gem_object_wait_rendering(obj, true);
+	if (ret)
+		return ret;
+
 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
 		/* If we're not in the cpu read domain, set ourself into the gtt
 		 * read domain and manually flush cachelines (if required). This
@@ -518,9 +524,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 		 * anyway again before the next pread happens. */
 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
 							obj->cache_level);
-		ret = i915_gem_object_wait_rendering(obj, true);
-		if (ret)
-			return ret;
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -1132,15 +1135,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 
 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 
+	ret = i915_gem_object_wait_rendering(obj, false);
+	if (ret)
+		return ret;
+
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 		/* If we're not in the cpu write domain, set ourself into the gtt
 		 * write domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will use the data
 		 * right away and we therefore have to clflush anyway. */
 		needs_clflush_after = cpu_write_needs_clflush(obj);
-		ret = i915_gem_object_wait_rendering(obj, false);
-		if (ret)
-			return ret;
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -1335,11 +1339,9 @@ int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
+	struct reservation_object *resv;
 	int ret, i;
 
-	if (!obj->active)
-		return 0;
-
 	if (readonly) {
 		if (obj->last_write_req != NULL) {
 			ret = i915_wait_request(obj->last_write_req);
@@ -1366,6 +1368,16 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(obj->active);
 	}
 
+	resv = i915_gem_object_get_dmabuf_resv(obj);
+	if (resv) {
+		long err;
+
+		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
+							  MAX_SCHEDULE_TIMEOUT);
+		if (err < 0)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -3402,13 +3414,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	struct i915_vma *vma;
 	int ret;
 
-	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
-		return 0;
-
 	ret = i915_gem_object_wait_rendering(obj, !write);
 	if (ret)
 		return ret;
 
+	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
+		return 0;
+
 	/* Flush and acquire obj->pages so that we are coherent through
 	 * direct access in memory with previous cached writes through
 	 * shmemfs and that our cache domain tracking remains valid.
@@ -3752,13 +3764,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
-	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
-		return 0;
-
 	ret = i915_gem_object_wait_rendering(obj, !write);
 	if (ret)
 		return ret;
 
+	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return 0;
+
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	old_write_domain = obj->base.write_domain;
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 9/9] drm/i915: Mark imported dma-buf objects as being coherent
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
                   ` (7 preceding siblings ...)
  2016-07-15 10:11 ` [PATCH 8/9] drm/i915: Wait on external rendering for GEM objects Chris Wilson
@ 2016-07-15 10:11 ` Chris Wilson
  2016-07-15 11:33   ` Mika Kuoppala
  2016-07-15 10:48 ` ✗ Ro.CI.BAT: failure for series starting with [1/9] drm/i915: Flush logical context image out to memory upon suspend Patchwork
  9 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 10:11 UTC (permalink / raw)
  To: intel-gfx

A foreign dma-buf does not share our cache domain tracking, and we rely
on the producer ensuring cache coherency. Marking them as being in the
CPU domain is incorrect.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 80bbe43a2e92..c7d734248ed0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -299,6 +299,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
 	obj->base.import_attach = attach;
+	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
+	obj->base.write_domain = 0;
 
 	return &obj->base;
 
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend
  2016-07-15 10:11 ` [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend Chris Wilson
@ 2016-07-15 10:41   ` Mika Kuoppala
  2016-07-15 11:11     ` Chris Wilson
  0 siblings, 1 reply; 29+ messages in thread
From: Mika Kuoppala @ 2016-07-15 10:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Before suspend, and especially before building the hibernation image, we
> need to context image to be coherent in memory. To do this we require
> that we perform a context switch to a disposable context (i.e. the
> dev_priv->kernel_context) - when that switch is complete, all other
> context images will be complete. This leaves the kernel_context image as
> incomplete, but fortunately that is disposable and we can do a quick
> fixup of the logical state after resuming.
>
> Testcase: igt/gem_exec_suspend # bsw
> References: https://bugs.freedesktop.org/show_bug.cgi?id=96526
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c |  4 +---
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/i915_gem.c | 46 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 48 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 15440123e38d..c5b7b8e0678a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1590,9 +1590,7 @@ static int i915_drm_resume(struct drm_device *dev)
>  
>  	intel_csr_ucode_resume(dev_priv);
>  
> -	mutex_lock(&dev->struct_mutex);
> -	i915_gem_restore_gtt_mappings(dev);
> -	mutex_unlock(&dev->struct_mutex);
> +	i915_gem_resume(dev);
>  
>  	i915_restore_state(dev);
>  	intel_opregion_setup(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1ec523d29789..e73c0fc84c73 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3384,6 +3384,7 @@ void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_engines(struct drm_device *dev);
>  int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
> +void i915_gem_resume(struct drm_device *dev);
>  void __i915_add_request(struct drm_i915_gem_request *req,
>  			struct drm_i915_gem_object *batch_obj,
>  			bool flush_caches);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index cf0e8aa8035c..8b42a5101f11 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4974,6 +4974,35 @@ i915_gem_stop_engines(struct drm_device *dev)
>  		dev_priv->gt.stop_engine(engine);
>  }
>  
> +static int switch_to_kernel_context(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +
> +	for_each_engine(engine, dev_priv) {
> +		struct drm_i915_gem_request *req;
> +		int ret;
> +
> +		if (engine->last_context == NULL)
> +			continue;
> +
> +		if (engine->last_context == dev_priv->kernel_context)
> +			continue;
> +
> +		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
> +		if (IS_ERR(req))
> +			return PTR_ERR(req);
> +
> +		ret = 0;
> +		if (!i915.enable_execlists)
> +			ret = i915_switch_context(req);
> +		i915_add_request_no_flush(req);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +

Why do you want keep this separate? Altho the evict one is a little
bit different and as I didn't see any perf implications: Why not
consolidate the evict one and this one and have i915_gem_idle which
would park to default and wait for idle.

Other than that, this all looks fine.

-Mika



>  int
>  i915_gem_suspend(struct drm_device *dev)
>  {
> @@ -4983,6 +5012,10 @@ i915_gem_suspend(struct drm_device *dev)
>  	intel_suspend_gt_powersave(dev_priv);
>  
>  	mutex_lock(&dev->struct_mutex);
> +	ret = switch_to_kernel_context(dev_priv);
> +	if (ret)
> +		goto err;
> +
>  	ret = i915_gem_wait_for_idle(dev_priv);
>  	if (ret)
>  		goto err;
> @@ -5009,6 +5042,19 @@ err:
>  	return ret;
>  }
>  
> +void i915_gem_resume(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +
> +	mutex_lock(&dev->struct_mutex);
> +
> +	if (i915.enable_execlists)
> +		intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
> +
> +	i915_gem_restore_gtt_mappings(dev);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
>  void i915_gem_init_swizzling(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c
  2016-07-15 10:11 ` [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
@ 2016-07-15 10:41   ` Joonas Lahtinen
  2016-07-15 10:43   ` Mika Kuoppala
  1 sibling, 0 replies; 29+ messages in thread
From: Joonas Lahtinen @ 2016-07-15 10:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-07-15 at 11:11 +0100, Chris Wilson wrote:
> Migrate the request operations out of the main body of i915_gem.c and
> into their own C file for easier expansion.
> 
> v2: Move __i915_add_request() across as well
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/Makefile           |   1 +
>  drivers/gpu/drm/i915/i915_drv.h         | 209 +---------
>  drivers/gpu/drm/i915/i915_gem.c         | 655 +------------------------------
>  drivers/gpu/drm/i915/i915_gem_request.c | 658 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_request.h | 238 ++++++++++++
>  5 files changed, 905 insertions(+), 856 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h
> 

Assuming pure code motion, 

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Might wanna CC people who are working with the moved functions.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c
  2016-07-15 10:11 ` [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
  2016-07-15 10:41   ` Joonas Lahtinen
@ 2016-07-15 10:43   ` Mika Kuoppala
  1 sibling, 0 replies; 29+ messages in thread
From: Mika Kuoppala @ 2016-07-15 10:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Migrate the request operations out of the main body of i915_gem.c and
> into their own C file for easier expansion.
>
> v2: Move __i915_add_request() across as well
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Acked-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/Makefile           |   1 +
>  drivers/gpu/drm/i915/i915_drv.h         | 209 +---------
>  drivers/gpu/drm/i915/i915_gem.c         | 655 +------------------------------
>  drivers/gpu/drm/i915/i915_gem_request.c | 658 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_request.h | 238 ++++++++++++
>  5 files changed, 905 insertions(+), 856 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 75318ebb8d25..6092f0ea24df 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -33,6 +33,7 @@ i915-y += i915_cmd_parser.o \
>  	  i915_gem_gtt.o \
>  	  i915_gem.o \
>  	  i915_gem_render_state.o \
> +	  i915_gem_request.o \
>  	  i915_gem_shrinker.o \
>  	  i915_gem_stolen.o \
>  	  i915_gem_tiling.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e73c0fc84c73..a4767c198413 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -61,6 +61,7 @@
>  #include "i915_gem.h"
>  #include "i915_gem_gtt.h"
>  #include "i915_gem_render_state.h"
> +#include "i915_gem_request.h"
>  
>  #include "intel_gvt.h"
>  
> @@ -2365,171 +2366,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
>  	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
>  	     ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
>  
> -/**
> - * Request queue structure.
> - *
> - * The request queue allows us to note sequence numbers that have been emitted
> - * and may be associated with active buffers to be retired.
> - *
> - * By keeping this list, we can avoid having to do questionable sequence
> - * number comparisons on buffer last_read|write_seqno. It also allows an
> - * emission time to be associated with the request for tracking how far ahead
> - * of the GPU the submission is.
> - *
> - * The requests are reference counted, so upon creation they should have an
> - * initial reference taken using kref_init
> - */
> -struct drm_i915_gem_request {
> -	struct kref ref;
> -
> -	/** On Which ring this request was generated */
> -	struct drm_i915_private *i915;
> -	struct intel_engine_cs *engine;
> -	struct intel_signal_node signaling;
> -
> -	 /** GEM sequence number associated with the previous request,
> -	  * when the HWS breadcrumb is equal to this the GPU is processing
> -	  * this request.
> -	  */
> -	u32 previous_seqno;
> -
> -	 /** GEM sequence number associated with this request,
> -	  * when the HWS breadcrumb is equal or greater than this the GPU
> -	  * has finished processing this request.
> -	  */
> -	u32 seqno;
> -
> -	/** Position in the ringbuffer of the start of the request */
> -	u32 head;
> -
> -	/**
> -	 * Position in the ringbuffer of the start of the postfix.
> -	 * This is required to calculate the maximum available ringbuffer
> -	 * space without overwriting the postfix.
> -	 */
> -	 u32 postfix;
> -
> -	/** Position in the ringbuffer of the end of the whole request */
> -	u32 tail;
> -
> -	/** Preallocate space in the ringbuffer for the emitting the request */
> -	u32 reserved_space;
> -
> -	/**
> -	 * Context and ring buffer related to this request
> -	 * Contexts are refcounted, so when this request is associated with a
> -	 * context, we must increment the context's refcount, to guarantee that
> -	 * it persists while any request is linked to it. Requests themselves
> -	 * are also refcounted, so the request will only be freed when the last
> -	 * reference to it is dismissed, and the code in
> -	 * i915_gem_request_free() will then decrement the refcount on the
> -	 * context.
> -	 */
> -	struct i915_gem_context *ctx;
> -	struct intel_ringbuffer *ringbuf;
> -
> -	/**
> -	 * Context related to the previous request.
> -	 * As the contexts are accessed by the hardware until the switch is
> -	 * completed to a new context, the hardware may still be writing
> -	 * to the context object after the breadcrumb is visible. We must
> -	 * not unpin/unbind/prune that object whilst still active and so
> -	 * we keep the previous context pinned until the following (this)
> -	 * request is retired.
> -	 */
> -	struct i915_gem_context *previous_context;
> -
> -	/** Batch buffer related to this request if any (used for
> -	    error state dump only) */
> -	struct drm_i915_gem_object *batch_obj;
> -
> -	/** Time at which this request was emitted, in jiffies. */
> -	unsigned long emitted_jiffies;
> -
> -	/** global list entry for this request */
> -	struct list_head list;
> -
> -	struct drm_i915_file_private *file_priv;
> -	/** file_priv list entry for this request */
> -	struct list_head client_list;
> -
> -	/** process identifier submitting this request */
> -	struct pid *pid;
> -
> -	/**
> -	 * The ELSP only accepts two elements at a time, so we queue
> -	 * context/tail pairs on a given queue (ring->execlist_queue) until the
> -	 * hardware is available. The queue serves a double purpose: we also use
> -	 * it to keep track of the up to 2 contexts currently in the hardware
> -	 * (usually one in execution and the other queued up by the GPU): We
> -	 * only remove elements from the head of the queue when the hardware
> -	 * informs us that an element has been completed.
> -	 *
> -	 * All accesses to the queue are mediated by a spinlock
> -	 * (ring->execlist_lock).
> -	 */
> -
> -	/** Execlist link in the submission queue.*/
> -	struct list_head execlist_link;
> -
> -	/** Execlists no. of times this request has been sent to the ELSP */
> -	int elsp_submitted;
> -
> -	/** Execlists context hardware id. */
> -	unsigned ctx_hw_id;
> -};
> -
> -struct drm_i915_gem_request * __must_check
> -i915_gem_request_alloc(struct intel_engine_cs *engine,
> -		       struct i915_gem_context *ctx);
> -void i915_gem_request_free(struct kref *req_ref);
> -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
> -				   struct drm_file *file);
> -
> -static inline uint32_t
> -i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
> -{
> -	return req ? req->seqno : 0;
> -}
> -
> -static inline struct intel_engine_cs *
> -i915_gem_request_get_engine(struct drm_i915_gem_request *req)
> -{
> -	return req ? req->engine : NULL;
> -}
> -
> -static inline struct drm_i915_gem_request *
> -i915_gem_request_reference(struct drm_i915_gem_request *req)
> -{
> -	if (req)
> -		kref_get(&req->ref);
> -	return req;
> -}
> -
> -static inline void
> -i915_gem_request_unreference(struct drm_i915_gem_request *req)
> -{
> -	kref_put(&req->ref, i915_gem_request_free);
> -}
> -
> -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
> -					   struct drm_i915_gem_request *src)
> -{
> -	if (src)
> -		i915_gem_request_reference(src);
> -
> -	if (*pdst)
> -		i915_gem_request_unreference(*pdst);
> -
> -	*pdst = src;
> -}
> -
> -/*
> - * XXX: i915_gem_request_completed should be here but currently needs the
> - * definition of i915_seqno_passed() which is below. It will be moved in
> - * a later patch when the call to i915_seqno_passed() is obsoleted...
> - */
> -
>  /*
>   * A command that requires special handling by the command parser.
>   */
> @@ -3297,37 +3133,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  		       struct drm_i915_gem_object *new,
>  		       unsigned frontbuffer_bits);
>  
> -/**
> - * Returns true if seq1 is later than seq2.
> - */
> -static inline bool
> -i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> -{
> -	return (int32_t)(seq1 - seq2) >= 0;
> -}
> -
> -static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
> -{
> -	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->previous_seqno);
> -}
> -
> -static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
> -{
> -	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->seqno);
> -}
> -
> -bool __i915_spin_request(const struct drm_i915_gem_request *request,
> -			 int state, unsigned long timeout_us);
> -static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
> -				     int state, unsigned long timeout_us)
> -{
> -	return (i915_gem_request_started(request) &&
> -		__i915_spin_request(request, state, timeout_us));
> -}
> -
> -int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
>  int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
>  
>  struct drm_i915_gem_request *
> @@ -3385,18 +3190,6 @@ void i915_gem_cleanup_engines(struct drm_device *dev);
>  int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  void i915_gem_resume(struct drm_device *dev);
> -void __i915_add_request(struct drm_i915_gem_request *req,
> -			struct drm_i915_gem_object *batch_obj,
> -			bool flush_caches);
> -#define i915_add_request(req) \
> -	__i915_add_request(req, NULL, true)
> -#define i915_add_request_no_flush(req) \
> -	__i915_add_request(req, NULL, false)
> -int __i915_wait_request(struct drm_i915_gem_request *req,
> -			bool interruptible,
> -			s64 *timeout,
> -			struct intel_rps_client *rps);
> -int __must_check i915_wait_request(struct drm_i915_gem_request *req);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
>  int __must_check
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8b42a5101f11..10e5db3f294e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1325,365 +1325,6 @@ put_rpm:
>  	return ret;
>  }
>  
> -static int
> -i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
> -{
> -	if (__i915_terminally_wedged(reset_counter))
> -		return -EIO;
> -
> -	if (__i915_reset_in_progress(reset_counter)) {
> -		/* Non-interruptible callers can't handle -EAGAIN, hence return
> -		 * -EIO unconditionally for these. */
> -		if (!interruptible)
> -			return -EIO;
> -
> -		return -EAGAIN;
> -	}
> -
> -	return 0;
> -}
> -
> -static unsigned long local_clock_us(unsigned *cpu)
> -{
> -	unsigned long t;
> -
> -	/* Cheaply and approximately convert from nanoseconds to microseconds.
> -	 * The result and subsequent calculations are also defined in the same
> -	 * approximate microseconds units. The principal source of timing
> -	 * error here is from the simple truncation.
> -	 *
> -	 * Note that local_clock() is only defined wrt to the current CPU;
> -	 * the comparisons are no longer valid if we switch CPUs. Instead of
> -	 * blocking preemption for the entire busywait, we can detect the CPU
> -	 * switch and use that as indicator of system load and a reason to
> -	 * stop busywaiting, see busywait_stop().
> -	 */
> -	*cpu = get_cpu();
> -	t = local_clock() >> 10;
> -	put_cpu();
> -
> -	return t;
> -}
> -
> -static bool busywait_stop(unsigned long timeout, unsigned cpu)
> -{
> -	unsigned this_cpu;
> -
> -	if (time_after(local_clock_us(&this_cpu), timeout))
> -		return true;
> -
> -	return this_cpu != cpu;
> -}
> -
> -bool __i915_spin_request(const struct drm_i915_gem_request *req,
> -			 int state, unsigned long timeout_us)
> -{
> -	unsigned cpu;
> -
> -	/* When waiting for high frequency requests, e.g. during synchronous
> -	 * rendering split between the CPU and GPU, the finite amount of time
> -	 * required to set up the irq and wait upon it limits the response
> -	 * rate. By busywaiting on the request completion for a short while we
> -	 * can service the high frequency waits as quick as possible. However,
> -	 * if it is a slow request, we want to sleep as quickly as possible.
> -	 * The tradeoff between waiting and sleeping is roughly the time it
> -	 * takes to sleep on a request, on the order of a microsecond.
> -	 */
> -
> -	timeout_us += local_clock_us(&cpu);
> -	do {
> -		if (i915_gem_request_completed(req))
> -			return true;
> -
> -		if (signal_pending_state(state, current))
> -			break;
> -
> -		if (busywait_stop(timeout_us, cpu))
> -			break;
> -
> -		cpu_relax_lowlatency();
> -	} while (!need_resched());
> -
> -	return false;
> -}
> -
> -/**
> - * __i915_wait_request - wait until execution of request has finished
> - * @req: duh!
> - * @interruptible: do an interruptible wait (normally yes)
> - * @timeout: in - how long to wait (NULL forever); out - how much time remaining
> - * @rps: RPS client
> - *
> - * Note: It is of utmost importance that the passed in seqno and reset_counter
> - * values have been read by the caller in an smp safe manner. Where read-side
> - * locks are involved, it is sufficient to read the reset_counter before
> - * unlocking the lock that protects the seqno. For lockless tricks, the
> - * reset_counter _must_ be read before, and an appropriate smp_rmb must be
> - * inserted.
> - *
> - * Returns 0 if the request was found within the alloted time. Else returns the
> - * errno with remaining time filled in timeout argument.
> - */
> -int __i915_wait_request(struct drm_i915_gem_request *req,
> -			bool interruptible,
> -			s64 *timeout,
> -			struct intel_rps_client *rps)
> -{
> -	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
> -	DEFINE_WAIT(reset);
> -	struct intel_wait wait;
> -	unsigned long timeout_remain;
> -	s64 before = 0; /* Only to silence a compiler warning. */
> -	int ret = 0;
> -
> -	might_sleep();
> -
> -	if (list_empty(&req->list))
> -		return 0;
> -
> -	if (i915_gem_request_completed(req))
> -		return 0;
> -
> -	timeout_remain = MAX_SCHEDULE_TIMEOUT;
> -	if (timeout) {
> -		if (WARN_ON(*timeout < 0))
> -			return -EINVAL;
> -
> -		if (*timeout == 0)
> -			return -ETIME;
> -
> -		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
> -
> -		/*
> -		 * Record current time in case interrupted by signal, or wedged.
> -		 */
> -		before = ktime_get_raw_ns();
> -	}
> -
> -	trace_i915_gem_request_wait_begin(req);
> -
> -	/* This client is about to stall waiting for the GPU. In many cases
> -	 * this is undesirable and limits the throughput of the system, as
> -	 * many clients cannot continue processing user input/output whilst
> -	 * blocked. RPS autotuning may take tens of milliseconds to respond
> -	 * to the GPU load and thus incurs additional latency for the client.
> -	 * We can circumvent that by promoting the GPU frequency to maximum
> -	 * before we wait. This makes the GPU throttle up much more quickly
> -	 * (good for benchmarks and user experience, e.g. window animations),
> -	 * but at a cost of spending more power processing the workload
> -	 * (bad for battery). Not all clients even want their results
> -	 * immediately and for them we should just let the GPU select its own
> -	 * frequency to maximise efficiency. To prevent a single client from
> -	 * forcing the clocks too high for the whole system, we only allow
> -	 * each client to waitboost once in a busy period.
> -	 */
> -	if (INTEL_INFO(req->i915)->gen >= 6)
> -		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
> -
> -	/* Optimistic spin for the next ~jiffie before touching IRQs */
> -	if (i915_spin_request(req, state, 5))
> -		goto complete;
> -
> -	set_current_state(state);
> -	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> -
> -	intel_wait_init(&wait, req->seqno);
> -	if (intel_engine_add_wait(req->engine, &wait))
> -		/* In order to check that we haven't missed the interrupt
> -		 * as we enabled it, we need to kick ourselves to do a
> -		 * coherent check on the seqno before we sleep.
> -		 */
> -		goto wakeup;
> -
> -	for (;;) {
> -		if (signal_pending_state(state, current)) {
> -			ret = -ERESTARTSYS;
> -			break;
> -		}
> -
> -		timeout_remain = io_schedule_timeout(timeout_remain);
> -		if (timeout_remain == 0) {
> -			ret = -ETIME;
> -			break;
> -		}
> -
> -		if (intel_wait_complete(&wait))
> -			break;
> -
> -		set_current_state(state);
> -
> -wakeup:
> -		/* Carefully check if the request is complete, giving time
> -		 * for the seqno to be visible following the interrupt.
> -		 * We also have to check in case we are kicked by the GPU
> -		 * reset in order to drop the struct_mutex.
> -		 */
> -		if (__i915_request_irq_complete(req))
> -			break;
> -
> -		/* Only spin if we know the GPU is processing this request */
> -		if (i915_spin_request(req, state, 2))
> -			break;
> -	}
> -	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> -
> -	intel_engine_remove_wait(req->engine, &wait);
> -	__set_current_state(TASK_RUNNING);
> -complete:
> -	trace_i915_gem_request_wait_end(req);
> -
> -	if (timeout) {
> -		s64 tres = *timeout - (ktime_get_raw_ns() - before);
> -
> -		*timeout = tres < 0 ? 0 : tres;
> -
> -		/*
> -		 * Apparently ktime isn't accurate enough and occasionally has a
> -		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
> -		 * things up to make the test happy. We allow up to 1 jiffy.
> -		 *
> -		 * This is a regrssion from the timespec->ktime conversion.
> -		 */
> -		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
> -			*timeout = 0;
> -	}
> -
> -	if (rps && req->seqno == req->engine->last_submitted_seqno) {
> -		/* The GPU is now idle and this client has stalled.
> -		 * Since no other client has submitted a request in the
> -		 * meantime, assume that this client is the only one
> -		 * supplying work to the GPU but is unable to keep that
> -		 * work supplied because it is waiting. Since the GPU is
> -		 * then never kept fully busy, RPS autoclocking will
> -		 * keep the clocks relatively low, causing further delays.
> -		 * Compensate by giving the synchronous client credit for
> -		 * a waitboost next time.
> -		 */
> -		spin_lock(&req->i915->rps.client_lock);
> -		list_del_init(&rps->link);
> -		spin_unlock(&req->i915->rps.client_lock);
> -	}
> -
> -	return ret;
> -}
> -
> -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
> -				   struct drm_file *file)
> -{
> -	struct drm_i915_file_private *file_priv;
> -
> -	WARN_ON(!req || !file || req->file_priv);
> -
> -	if (!req || !file)
> -		return -EINVAL;
> -
> -	if (req->file_priv)
> -		return -EINVAL;
> -
> -	file_priv = file->driver_priv;
> -
> -	spin_lock(&file_priv->mm.lock);
> -	req->file_priv = file_priv;
> -	list_add_tail(&req->client_list, &file_priv->mm.request_list);
> -	spin_unlock(&file_priv->mm.lock);
> -
> -	req->pid = get_pid(task_pid(current));
> -
> -	return 0;
> -}
> -
> -static inline void
> -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> -{
> -	struct drm_i915_file_private *file_priv = request->file_priv;
> -
> -	if (!file_priv)
> -		return;
> -
> -	spin_lock(&file_priv->mm.lock);
> -	list_del(&request->client_list);
> -	request->file_priv = NULL;
> -	spin_unlock(&file_priv->mm.lock);
> -
> -	put_pid(request->pid);
> -	request->pid = NULL;
> -}
> -
> -static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> -{
> -	trace_i915_gem_request_retire(request);
> -
> -	/* We know the GPU must have read the request to have
> -	 * sent us the seqno + interrupt, so use the position
> -	 * of tail of the request to update the last known position
> -	 * of the GPU head.
> -	 *
> -	 * Note this requires that we are always called in request
> -	 * completion order.
> -	 */
> -	request->ringbuf->last_retired_head = request->postfix;
> -
> -	list_del_init(&request->list);
> -	i915_gem_request_remove_from_client(request);
> -
> -	if (request->previous_context) {
> -		if (i915.enable_execlists)
> -			intel_lr_context_unpin(request->previous_context,
> -					       request->engine);
> -	}
> -
> -	i915_gem_context_unreference(request->ctx);
> -	i915_gem_request_unreference(request);
> -}
> -
> -static void
> -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	struct drm_i915_gem_request *tmp;
> -
> -	lockdep_assert_held(&engine->i915->drm.struct_mutex);
> -
> -	if (list_empty(&req->list))
> -		return;
> -
> -	do {
> -		tmp = list_first_entry(&engine->request_list,
> -				       typeof(*tmp), list);
> -
> -		i915_gem_request_retire(tmp);
> -	} while (tmp != req);
> -
> -	WARN_ON(i915_verify_lists(engine->dev));
> -}
> -
> -/**
> - * Waits for a request to be signaled, and cleans up the
> - * request and object lists appropriately for that event.
> - * @req: request to wait on
> - */
> -int
> -i915_wait_request(struct drm_i915_gem_request *req)
> -{
> -	struct drm_i915_private *dev_priv = req->i915;
> -	bool interruptible;
> -	int ret;
> -
> -	interruptible = dev_priv->mm.interruptible;
> -
> -	BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
> -
> -	ret = __i915_wait_request(req, interruptible, NULL, NULL);
> -	if (ret)
> -		return ret;
> -
> -	/* If the GPU hung, we want to keep the requests to find the guilty. */
> -	if (!i915_reset_in_progress(&dev_priv->gpu_error))
> -		__i915_gem_request_retire__upto(req);
> -
> -	return 0;
> -}
> -
>  /**
>   * Ensures that all rendering to the object has completed and the object is
>   * safe to unbind from the GTT or access from the CPU.
> @@ -1740,7 +1381,7 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
>  		i915_gem_object_retire__write(obj);
>  
>  	if (!i915_reset_in_progress(&req->i915->gpu_error))
> -		__i915_gem_request_retire__upto(req);
> +		i915_gem_request_retire_upto(req);
>  }
>  
>  /* A nonblocking variant of the above wait. This is a highly dangerous routine
> @@ -2761,193 +2402,6 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
>  	drm_gem_object_unreference(&obj->base);
>  }
>  
> -static int
> -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
> -{
> -	struct intel_engine_cs *engine;
> -	int ret;
> -
> -	/* Carefully retire all requests without writing to the rings */
> -	for_each_engine(engine, dev_priv) {
> -		ret = intel_engine_idle(engine);
> -		if (ret)
> -			return ret;
> -	}
> -	i915_gem_retire_requests(dev_priv);
> -
> -	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> -	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
> -		while (intel_kick_waiters(dev_priv) ||
> -		       intel_kick_signalers(dev_priv))
> -			yield();
> -	}
> -
> -	/* Finally reset hw state */
> -	for_each_engine(engine, dev_priv)
> -		intel_ring_init_seqno(engine, seqno);
> -
> -	return 0;
> -}
> -
> -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
> -{
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	int ret;
> -
> -	if (seqno == 0)
> -		return -EINVAL;
> -
> -	/* HWS page needs to be set less than what we
> -	 * will inject to ring
> -	 */
> -	ret = i915_gem_init_seqno(dev_priv, seqno - 1);
> -	if (ret)
> -		return ret;
> -
> -	/* Carefully set the last_seqno value so that wrap
> -	 * detection still works
> -	 */
> -	dev_priv->next_seqno = seqno;
> -	dev_priv->last_seqno = seqno - 1;
> -	if (dev_priv->last_seqno == 0)
> -		dev_priv->last_seqno--;
> -
> -	return 0;
> -}
> -
> -int
> -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
> -{
> -	/* reserve 0 for non-seqno */
> -	if (dev_priv->next_seqno == 0) {
> -		int ret = i915_gem_init_seqno(dev_priv, 0);
> -		if (ret)
> -			return ret;
> -
> -		dev_priv->next_seqno = 1;
> -	}
> -
> -	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
> -	return 0;
> -}
> -
> -static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	dev_priv->gt.active_engines |= intel_engine_flag(engine);
> -	if (dev_priv->gt.awake)
> -		return;
> -
> -	intel_runtime_pm_get_noresume(dev_priv);
> -	dev_priv->gt.awake = true;
> -
> -	intel_enable_gt_powersave(dev_priv);
> -	i915_update_gfx_val(dev_priv);
> -	if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_rps_busy(dev_priv);
> -
> -	queue_delayed_work(dev_priv->wq,
> -			   &dev_priv->gt.retire_work,
> -			   round_jiffies_up_relative(HZ));
> -}
> -
> -/*
> - * NB: This function is not allowed to fail. Doing so would mean the the
> - * request is not being tracked for completion but the work itself is
> - * going to happen on the hardware. This would be a Bad Thing(tm).
> - */
> -void __i915_add_request(struct drm_i915_gem_request *request,
> -			struct drm_i915_gem_object *obj,
> -			bool flush_caches)
> -{
> -	struct intel_engine_cs *engine;
> -	struct intel_ringbuffer *ringbuf;
> -	u32 request_start;
> -	u32 reserved_tail;
> -	int ret;
> -
> -	if (WARN_ON(request == NULL))
> -		return;
> -
> -	engine = request->engine;
> -	ringbuf = request->ringbuf;
> -
> -	/*
> -	 * To ensure that this call will not fail, space for its emissions
> -	 * should already have been reserved in the ring buffer. Let the ring
> -	 * know that it is time to use that space up.
> -	 */
> -	request_start = intel_ring_get_tail(ringbuf);
> -	reserved_tail = request->reserved_space;
> -	request->reserved_space = 0;
> -
> -	/*
> -	 * Emit any outstanding flushes - execbuf can fail to emit the flush
> -	 * after having emitted the batchbuffer command. Hence we need to fix
> -	 * things up similar to emitting the lazy request. The difference here
> -	 * is that the flush _must_ happen before the next request, no matter
> -	 * what.
> -	 */
> -	if (flush_caches) {
> -		if (i915.enable_execlists)
> -			ret = logical_ring_flush_all_caches(request);
> -		else
> -			ret = intel_ring_flush_all_caches(request);
> -		/* Not allowed to fail! */
> -		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
> -	}
> -
> -	trace_i915_gem_request_add(request);
> -
> -	request->head = request_start;
> -
> -	/* Whilst this request exists, batch_obj will be on the
> -	 * active_list, and so will hold the active reference. Only when this
> -	 * request is retired will the the batch_obj be moved onto the
> -	 * inactive_list and lose its active reference. Hence we do not need
> -	 * to explicitly hold another reference here.
> -	 */
> -	request->batch_obj = obj;
> -
> -	/* Seal the request and mark it as pending execution. Note that
> -	 * we may inspect this state, without holding any locks, during
> -	 * hangcheck. Hence we apply the barrier to ensure that we do not
> -	 * see a more recent value in the hws than we are tracking.
> -	 */
> -	request->emitted_jiffies = jiffies;
> -	request->previous_seqno = engine->last_submitted_seqno;
> -	smp_store_mb(engine->last_submitted_seqno, request->seqno);
> -	list_add_tail(&request->list, &engine->request_list);
> -
> -	/* Record the position of the start of the request so that
> -	 * should we detect the updated seqno part-way through the
> -	 * GPU processing the request, we never over-estimate the
> -	 * position of the head.
> -	 */
> -	request->postfix = intel_ring_get_tail(ringbuf);
> -
> -	if (i915.enable_execlists)
> -		ret = engine->emit_request(request);
> -	else {
> -		ret = engine->add_request(request);
> -
> -		request->tail = intel_ring_get_tail(ringbuf);
> -	}
> -	/* Not allowed to fail! */
> -	WARN(ret, "emit|add_request failed: %d!\n", ret);
> -	/* Sanity check that the reserved size was large enough. */
> -	ret = intel_ring_get_tail(ringbuf) - request_start;
> -	if (ret < 0)
> -		ret += ringbuf->size;
> -	WARN_ONCE(ret > reserved_tail,
> -		  "Not enough space reserved (%d bytes) "
> -		  "for adding the request (%d bytes)\n",
> -		  reserved_tail, ret);
> -
> -	i915_gem_mark_busy(engine);
> -}
> -
>  static bool i915_context_is_banned(const struct i915_gem_context *ctx)
>  {
>  	unsigned long elapsed;
> @@ -2979,101 +2433,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx,
>  	}
>  }
>  
> -void i915_gem_request_free(struct kref *req_ref)
> -{
> -	struct drm_i915_gem_request *req = container_of(req_ref,
> -						 typeof(*req), ref);
> -	kmem_cache_free(req->i915->requests, req);
> -}
> -
> -static inline int
> -__i915_gem_request_alloc(struct intel_engine_cs *engine,
> -			 struct i915_gem_context *ctx,
> -			 struct drm_i915_gem_request **req_out)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> -	struct drm_i915_gem_request *req;
> -	int ret;
> -
> -	if (!req_out)
> -		return -EINVAL;
> -
> -	*req_out = NULL;
> -
> -	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
> -	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
> -	 * and restart.
> -	 */
> -	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
> -	if (ret)
> -		return ret;
> -
> -	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
> -	if (req == NULL)
> -		return -ENOMEM;
> -
> -	ret = i915_gem_get_seqno(engine->i915, &req->seqno);
> -	if (ret)
> -		goto err;
> -
> -	kref_init(&req->ref);
> -	req->i915 = dev_priv;
> -	req->engine = engine;
> -	req->ctx  = ctx;
> -	i915_gem_context_reference(req->ctx);
> -
> -	/*
> -	 * Reserve space in the ring buffer for all the commands required to
> -	 * eventually emit this request. This is to guarantee that the
> -	 * i915_add_request() call can't fail. Note that the reserve may need
> -	 * to be redone if the request is not actually submitted straight
> -	 * away, e.g. because a GPU scheduler has deferred it.
> -	 */
> -	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
> -
> -	if (i915.enable_execlists)
> -		ret = intel_logical_ring_alloc_request_extras(req);
> -	else
> -		ret = intel_ring_alloc_request_extras(req);
> -	if (ret)
> -		goto err_ctx;
> -
> -	*req_out = req;
> -	return 0;
> -
> -err_ctx:
> -	i915_gem_context_unreference(ctx);
> -err:
> -	kmem_cache_free(dev_priv->requests, req);
> -	return ret;
> -}
> -
> -/**
> - * i915_gem_request_alloc - allocate a request structure
> - *
> - * @engine: engine that we wish to issue the request on.
> - * @ctx: context that the request will be associated with.
> - *       This can be NULL if the request is not directly related to
> - *       any specific user context, in which case this function will
> - *       choose an appropriate context to use.
> - *
> - * Returns a pointer to the allocated request if successful,
> - * or an error code if not.
> - */
> -struct drm_i915_gem_request *
> -i915_gem_request_alloc(struct intel_engine_cs *engine,
> -		       struct i915_gem_context *ctx)
> -{
> -	struct drm_i915_gem_request *req;
> -	int err;
> -
> -	if (ctx == NULL)
> -		ctx = engine->i915->kernel_context;
> -	err = __i915_gem_request_alloc(engine, ctx, &req);
> -	return err ? ERR_PTR(err) : req;
> -}
> -
>  struct drm_i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *engine)
>  {
> @@ -3147,14 +2506,14 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
>  	 * implicit references on things like e.g. ppgtt address spaces through
>  	 * the request.
>  	 */
> -	while (!list_empty(&engine->request_list)) {
> +	if (!list_empty(&engine->request_list)) {
>  		struct drm_i915_gem_request *request;
>  
> -		request = list_first_entry(&engine->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> +		request = list_last_entry(&engine->request_list,
> +					  struct drm_i915_gem_request,
> +					  list);
>  
> -		i915_gem_request_retire(request);
> +		i915_gem_request_retire_upto(request);
>  	}
>  
>  	/* Having flushed all requests from all queues, we know that all
> @@ -3222,7 +2581,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>  		if (!i915_gem_request_completed(request))
>  			break;
>  
> -		i915_gem_request_retire(request);
> +		i915_gem_request_retire_upto(request);
>  	}
>  
>  	/* Move any buffers on the active list that are no longer referenced
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> new file mode 100644
> index 000000000000..9e9aa6b725f7
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -0,0 +1,658 @@
> +/*
> + * Copyright © 2008-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "i915_drv.h"
> +
> +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
> +				   struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_private;
> +	struct drm_i915_file_private *file_priv;
> +
> +	WARN_ON(!req || !file || req->file_priv);
> +
> +	if (!req || !file)
> +		return -EINVAL;
> +
> +	if (req->file_priv)
> +		return -EINVAL;
> +
> +	dev_private = req->i915;
> +	file_priv = file->driver_priv;
> +
> +	spin_lock(&file_priv->mm.lock);
> +	req->file_priv = file_priv;
> +	list_add_tail(&req->client_list, &file_priv->mm.request_list);
> +	spin_unlock(&file_priv->mm.lock);
> +
> +	req->pid = get_pid(task_pid(current));
> +
> +	return 0;
> +}
> +
> +static inline void
> +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> +{
> +	struct drm_i915_file_private *file_priv = request->file_priv;
> +
> +	if (!file_priv)
> +		return;
> +
> +	spin_lock(&file_priv->mm.lock);
> +	list_del(&request->client_list);
> +	request->file_priv = NULL;
> +	spin_unlock(&file_priv->mm.lock);
> +
> +	put_pid(request->pid);
> +	request->pid = NULL;
> +}
> +
> +static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> +{
> +	trace_i915_gem_request_retire(request);
> +	list_del_init(&request->list);
> +
> +	/* We know the GPU must have read the request to have
> +	 * sent us the seqno + interrupt, so use the position
> +	 * of tail of the request to update the last known position
> +	 * of the GPU head.
> +	 *
> +	 * Note this requires that we are always called in request
> +	 * completion order.
> +	 */
> +	request->ringbuf->last_retired_head = request->postfix;
> +
> +	i915_gem_request_remove_from_client(request);
> +
> +	if (request->previous_context) {
> +		if (i915.enable_execlists)
> +			intel_lr_context_unpin(request->previous_context,
> +					       request->engine);
> +	}
> +
> +	i915_gem_context_unreference(request->ctx);
> +	i915_gem_request_unreference(request);
> +}
> +
> +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
> +{
> +	struct intel_engine_cs *engine = req->engine;
> +	struct drm_i915_gem_request *tmp;
> +
> +	lockdep_assert_held(&req->i915->drm.struct_mutex);
> +
> +	if (list_empty(&req->list))
> +		return;
> +
> +	do {
> +		tmp = list_first_entry(&engine->request_list,
> +				       typeof(*tmp), list);
> +
> +		i915_gem_request_retire(tmp);
> +	} while (tmp != req);
> +
> +	WARN_ON(i915_verify_lists(engine->dev));
> +}
> +
> +static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
> +{
> +	if (__i915_terminally_wedged(reset_counter))
> +		return -EIO;
> +
> +	if (__i915_reset_in_progress(reset_counter)) {
> +		/* Non-interruptible callers can't handle -EAGAIN, hence return
> +		 * -EIO unconditionally for these.
> +		 */
> +		if (!interruptible)
> +			return -EIO;
> +
> +		return -EAGAIN;
> +	}
> +
> +	return 0;
> +}
> +
> +static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
> +{
> +	struct intel_engine_cs *engine;
> +	int ret;
> +
> +	/* Carefully retire all requests without writing to the rings */
> +	for_each_engine(engine, dev_priv) {
> +		ret = intel_engine_idle(engine);
> +		if (ret)
> +			return ret;
> +	}
> +	i915_gem_retire_requests(dev_priv);
> +
> +	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> +	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
> +		while (intel_kick_waiters(dev_priv) ||
> +		       intel_kick_signalers(dev_priv))
> +			yield();
> +	}
> +
> +	/* Finally reset hw state */
> +	for_each_engine(engine, dev_priv)
> +		intel_ring_init_seqno(engine, seqno);
> +
> +	return 0;
> +}
> +
> +int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	int ret;
> +
> +	if (seqno == 0)
> +		return -EINVAL;
> +
> +	/* HWS page needs to be set less than what we
> +	 * will inject to ring
> +	 */
> +	ret = i915_gem_init_seqno(dev_priv, seqno - 1);
> +	if (ret)
> +		return ret;
> +
> +	/* Carefully set the last_seqno value so that wrap
> +	 * detection still works
> +	 */
> +	dev_priv->next_seqno = seqno;
> +	dev_priv->last_seqno = seqno - 1;
> +	if (dev_priv->last_seqno == 0)
> +		dev_priv->last_seqno--;
> +
> +	return 0;
> +}
> +
> +static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
> +{
> +	/* reserve 0 for non-seqno */
> +	if (unlikely(dev_priv->next_seqno == 0)) {
> +		int ret;
> +
> +		ret = i915_gem_init_seqno(dev_priv, 0);
> +		if (ret)
> +			return ret;
> +
> +		dev_priv->next_seqno = 1;
> +	}
> +
> +	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
> +	return 0;
> +}
> +
> +static inline int
> +__i915_gem_request_alloc(struct intel_engine_cs *engine,
> +			 struct i915_gem_context *ctx,
> +			 struct drm_i915_gem_request **req_out)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> +	struct drm_i915_gem_request *req;
> +	int ret;
> +
> +	if (!req_out)
> +		return -EINVAL;
> +
> +	*req_out = NULL;
> +
> +	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
> +	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
> +	 * and restart.
> +	 */
> +	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
> +	if (ret)
> +		return ret;
> +
> +	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	ret = i915_gem_get_seqno(dev_priv, &req->seqno);
> +	if (ret)
> +		goto err;
> +
> +	kref_init(&req->ref);
> +	req->i915 = dev_priv;
> +	req->engine = engine;
> +	req->ctx = ctx;
> +	i915_gem_context_reference(ctx);
> +
> +	/*
> +	 * Reserve space in the ring buffer for all the commands required to
> +	 * eventually emit this request. This is to guarantee that the
> +	 * i915_add_request() call can't fail. Note that the reserve may need
> +	 * to be redone if the request is not actually submitted straight
> +	 * away, e.g. because a GPU scheduler has deferred it.
> +	 */
> +	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
> +
> +	if (i915.enable_execlists)
> +		ret = intel_logical_ring_alloc_request_extras(req);
> +	else
> +		ret = intel_ring_alloc_request_extras(req);
> +	if (ret)
> +		goto err_ctx;
> +
> +	*req_out = req;
> +	return 0;
> +
> +err_ctx:
> +	i915_gem_context_unreference(ctx);
> +err:
> +	kmem_cache_free(dev_priv->requests, req);
> +	return ret;
> +}
> +
> +/**
> + * i915_gem_request_alloc - allocate a request structure
> + *
> + * @engine: engine that we wish to issue the request on.
> + * @ctx: context that the request will be associated with.
> + *       This can be NULL if the request is not directly related to
> + *       any specific user context, in which case this function will
> + *       choose an appropriate context to use.
> + *
> + * Returns a pointer to the allocated request if successful,
> + * or an error code if not.
> + */
> +struct drm_i915_gem_request *
> +i915_gem_request_alloc(struct intel_engine_cs *engine,
> +		       struct i915_gem_context *ctx)
> +{
> +	struct drm_i915_gem_request *req;
> +	int err;
> +
> +	if (!ctx)
> +		ctx = engine->i915->kernel_context;
> +	err = __i915_gem_request_alloc(engine, ctx, &req);
> +	return err ? ERR_PTR(err) : req;
> +}
> +
> +static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +
> +	dev_priv->gt.active_engines |= intel_engine_flag(engine);
> +	if (dev_priv->gt.awake)
> +		return;
> +
> +	intel_runtime_pm_get_noresume(dev_priv);
> +	dev_priv->gt.awake = true;
> +
> +	intel_enable_gt_powersave(dev_priv);
> +	i915_update_gfx_val(dev_priv);
> +	if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_rps_busy(dev_priv);
> +
> +	queue_delayed_work(dev_priv->wq,
> +			   &dev_priv->gt.retire_work,
> +			   round_jiffies_up_relative(HZ));
> +}
> +
> +/*
> + * NB: This function is not allowed to fail. Doing so would mean the the
> + * request is not being tracked for completion but the work itself is
> + * going to happen on the hardware. This would be a Bad Thing(tm).
> + */
> +void __i915_add_request(struct drm_i915_gem_request *request,
> +			struct drm_i915_gem_object *obj,
> +			bool flush_caches)
> +{
> +	struct intel_engine_cs *engine;
> +	struct intel_ringbuffer *ringbuf;
> +	u32 request_start;
> +	u32 reserved_tail;
> +	int ret;
> +
> +	if (WARN_ON(!request))
> +		return;
> +
> +	engine = request->engine;
> +	ringbuf = request->ringbuf;
> +
> +	/*
> +	 * To ensure that this call will not fail, space for its emissions
> +	 * should already have been reserved in the ring buffer. Let the ring
> +	 * know that it is time to use that space up.
> +	 */
> +	request_start = intel_ring_get_tail(ringbuf);
> +	reserved_tail = request->reserved_space;
> +	request->reserved_space = 0;
> +
> +	/*
> +	 * Emit any outstanding flushes - execbuf can fail to emit the flush
> +	 * after having emitted the batchbuffer command. Hence we need to fix
> +	 * things up similar to emitting the lazy request. The difference here
> +	 * is that the flush _must_ happen before the next request, no matter
> +	 * what.
> +	 */
> +	if (flush_caches) {
> +		if (i915.enable_execlists)
> +			ret = logical_ring_flush_all_caches(request);
> +		else
> +			ret = intel_ring_flush_all_caches(request);
> +		/* Not allowed to fail! */
> +		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
> +	}
> +
> +	trace_i915_gem_request_add(request);
> +
> +	request->head = request_start;
> +
> +	/* Whilst this request exists, batch_obj will be on the
> +	 * active_list, and so will hold the active reference. Only when this
> +	 * request is retired will the the batch_obj be moved onto the
> +	 * inactive_list and lose its active reference. Hence we do not need
> +	 * to explicitly hold another reference here.
> +	 */
> +	request->batch_obj = obj;
> +
> +	/* Seal the request and mark it as pending execution. Note that
> +	 * we may inspect this state, without holding any locks, during
> +	 * hangcheck. Hence we apply the barrier to ensure that we do not
> +	 * see a more recent value in the hws than we are tracking.
> +	 */
> +	request->emitted_jiffies = jiffies;
> +	request->previous_seqno = engine->last_submitted_seqno;
> +	smp_store_mb(engine->last_submitted_seqno, request->seqno);
> +	list_add_tail(&request->list, &engine->request_list);
> +
> +	/* Record the position of the start of the request so that
> +	 * should we detect the updated seqno part-way through the
> +	 * GPU processing the request, we never over-estimate the
> +	 * position of the head.
> +	 */
> +	request->postfix = intel_ring_get_tail(ringbuf);
> +
> +	if (i915.enable_execlists) {
> +		ret = engine->emit_request(request);
> +	} else {
> +		ret = engine->add_request(request);
> +
> +		request->tail = intel_ring_get_tail(ringbuf);
> +	}
> +	/* Not allowed to fail! */
> +	WARN(ret, "emit|add_request failed: %d!\n", ret);
> +	/* Sanity check that the reserved size was large enough. */
> +	ret = intel_ring_get_tail(ringbuf) - request_start;
> +	if (ret < 0)
> +		ret += ringbuf->size;
> +	WARN_ONCE(ret > reserved_tail,
> +		  "Not enough space reserved (%d bytes) "
> +		  "for adding the request (%d bytes)\n",
> +		  reserved_tail, ret);
> +
> +	i915_gem_mark_busy(engine);
> +}
> +
> +static unsigned long local_clock_us(unsigned int *cpu)
> +{
> +	unsigned long t;
> +
> +	/* Cheaply and approximately convert from nanoseconds to microseconds.
> +	 * The result and subsequent calculations are also defined in the same
> +	 * approximate microseconds units. The principal source of timing
> +	 * error here is from the simple truncation.
> +	 *
> +	 * Note that local_clock() is only defined wrt to the current CPU;
> +	 * the comparisons are no longer valid if we switch CPUs. Instead of
> +	 * blocking preemption for the entire busywait, we can detect the CPU
> +	 * switch and use that as indicator of system load and a reason to
> +	 * stop busywaiting, see busywait_stop().
> +	 */
> +	*cpu = get_cpu();
> +	t = local_clock() >> 10;
> +	put_cpu();
> +
> +	return t;
> +}
> +
> +static bool busywait_stop(unsigned long timeout, unsigned int cpu)
> +{
> +	unsigned int this_cpu;
> +
> +	if (time_after(local_clock_us(&this_cpu), timeout))
> +		return true;
> +
> +	return this_cpu != cpu;
> +}
> +
> +bool __i915_spin_request(const struct drm_i915_gem_request *req,
> +			 int state, unsigned long timeout_us)
> +{
> +	unsigned int cpu;
> +
> +	/* When waiting for high frequency requests, e.g. during synchronous
> +	 * rendering split between the CPU and GPU, the finite amount of time
> +	 * required to set up the irq and wait upon it limits the response
> +	 * rate. By busywaiting on the request completion for a short while we
> +	 * can service the high frequency waits as quick as possible. However,
> +	 * if it is a slow request, we want to sleep as quickly as possible.
> +	 * The tradeoff between waiting and sleeping is roughly the time it
> +	 * takes to sleep on a request, on the order of a microsecond.
> +	 */
> +
> +	timeout_us += local_clock_us(&cpu);
> +	do {
> +		if (i915_gem_request_completed(req))
> +			return true;
> +
> +		if (signal_pending_state(state, current))
> +			break;
> +
> +		if (busywait_stop(timeout_us, cpu))
> +			break;
> +
> +		cpu_relax_lowlatency();
> +	} while (!need_resched());
> +
> +	return false;
> +}
> +
> +/**
> + * __i915_wait_request - wait until execution of request has finished
> + * @req: duh!
> + * @interruptible: do an interruptible wait (normally yes)
> + * @timeout: in - how long to wait (NULL forever); out - how much time remaining
> + * @rps: client to charge for RPS boosting
> + *
> + * Note: It is of utmost importance that the passed in seqno and reset_counter
> + * values have been read by the caller in an smp safe manner. Where read-side
> + * locks are involved, it is sufficient to read the reset_counter before
> + * unlocking the lock that protects the seqno. For lockless tricks, the
> + * reset_counter _must_ be read before, and an appropriate smp_rmb must be
> + * inserted.
> + *
> + * Returns 0 if the request was found within the alloted time. Else returns the
> + * errno with remaining time filled in timeout argument.
> + */
> +int __i915_wait_request(struct drm_i915_gem_request *req,
> +			bool interruptible,
> +			s64 *timeout,
> +			struct intel_rps_client *rps)
> +{
> +	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
> +	DEFINE_WAIT(reset);
> +	struct intel_wait wait;
> +	unsigned long timeout_remain;
> +	int ret = 0;
> +
> +	might_sleep();
> +
> +	if (list_empty(&req->list))
> +		return 0;
> +
> +	if (i915_gem_request_completed(req))
> +		return 0;
> +
> +	timeout_remain = MAX_SCHEDULE_TIMEOUT;
> +	if (timeout) {
> +		if (WARN_ON(*timeout < 0))
> +			return -EINVAL;
> +
> +		if (*timeout == 0)
> +			return -ETIME;
> +
> +		/* Record current time in case interrupted, or wedged */
> +		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
> +		*timeout += ktime_get_raw_ns();
> +	}
> +
> +	trace_i915_gem_request_wait_begin(req);
> +
> +	/* This client is about to stall waiting for the GPU. In many cases
> +	 * this is undesirable and limits the throughput of the system, as
> +	 * many clients cannot continue processing user input/output whilst
> +	 * blocked. RPS autotuning may take tens of milliseconds to respond
> +	 * to the GPU load and thus incurs additional latency for the client.
> +	 * We can circumvent that by promoting the GPU frequency to maximum
> +	 * before we wait. This makes the GPU throttle up much more quickly
> +	 * (good for benchmarks and user experience, e.g. window animations),
> +	 * but at a cost of spending more power processing the workload
> +	 * (bad for battery). Not all clients even want their results
> +	 * immediately and for them we should just let the GPU select its own
> +	 * frequency to maximise efficiency. To prevent a single client from
> +	 * forcing the clocks too high for the whole system, we only allow
> +	 * each client to waitboost once in a busy period.
> +	 */
> +	if (INTEL_GEN(req->i915) >= 6)
> +		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
> +
> +	/* Optimistic spin for the next ~jiffie before touching IRQs */
> +	if (i915_spin_request(req, state, 5))
> +		goto complete;
> +
> +	set_current_state(state);
> +	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> +
> +	intel_wait_init(&wait, req->seqno);
> +	if (intel_engine_add_wait(req->engine, &wait))
> +		/* In order to check that we haven't missed the interrupt
> +		 * as we enabled it, we need to kick ourselves to do a
> +		 * coherent check on the seqno before we sleep.
> +		 */
> +		goto wakeup;
> +
> +	for (;;) {
> +		if (signal_pending_state(state, current)) {
> +			ret = -ERESTARTSYS;
> +			break;
> +		}
> +
> +		timeout_remain = io_schedule_timeout(timeout_remain);
> +		if (timeout_remain == 0) {
> +			ret = -ETIME;
> +			break;
> +		}
> +
> +		if (intel_wait_complete(&wait))
> +			break;
> +
> +		set_current_state(state);
> +
> +wakeup:
> +		/* Carefully check if the request is complete, giving time
> +		 * for the seqno to be visible following the interrupt.
> +		 * We also have to check in case we are kicked by the GPU
> +		 * reset in order to drop the struct_mutex.
> +		 */
> +		if (__i915_request_irq_complete(req))
> +			break;
> +
> +		/* Only spin if we know the GPU is processing this request */
> +		if (i915_spin_request(req, state, 2))
> +			break;
> +	}
> +	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> +
> +	intel_engine_remove_wait(req->engine, &wait);
> +	__set_current_state(TASK_RUNNING);
> +complete:
> +	trace_i915_gem_request_wait_end(req);
> +
> +	if (timeout) {
> +		*timeout -= ktime_get_raw_ns();
> +		if (*timeout < 0)
> +			*timeout = 0;
> +
> +		/*
> +		 * Apparently ktime isn't accurate enough and occasionally has a
> +		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
> +		 * things up to make the test happy. We allow up to 1 jiffy.
> +		 *
> +		 * This is a regrssion from the timespec->ktime conversion.
> +		 */
> +		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
> +			*timeout = 0;
> +	}
> +
> +	if (rps && req->seqno == req->engine->last_submitted_seqno) {
> +		/* The GPU is now idle and this client has stalled.
> +		 * Since no other client has submitted a request in the
> +		 * meantime, assume that this client is the only one
> +		 * supplying work to the GPU but is unable to keep that
> +		 * work supplied because it is waiting. Since the GPU is
> +		 * then never kept fully busy, RPS autoclocking will
> +		 * keep the clocks relatively low, causing further delays.
> +		 * Compensate by giving the synchronous client credit for
> +		 * a waitboost next time.
> +		 */
> +		spin_lock(&req->i915->rps.client_lock);
> +		list_del_init(&rps->link);
> +		spin_unlock(&req->i915->rps.client_lock);
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * Waits for a request to be signaled, and cleans up the
> + * request and object lists appropriately for that event.
> + */
> +int i915_wait_request(struct drm_i915_gem_request *req)
> +{
> +	int ret;
> +
> +	GEM_BUG_ON(!req);
> +	lockdep_assert_held(&req->i915->drm.struct_mutex);
> +
> +	ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL);
> +	if (ret)
> +		return ret;
> +
> +	/* If the GPU hung, we want to keep the requests to find the guilty. */
> +	if (!i915_reset_in_progress(&req->i915->gpu_error))
> +		i915_gem_request_retire_upto(req);
> +
> +	return 0;
> +}
> +
> +void i915_gem_request_free(struct kref *req_ref)
> +{
> +	struct drm_i915_gem_request *req =
> +		container_of(req_ref, typeof(*req), ref);
> +	kmem_cache_free(req->i915->requests, req);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> new file mode 100644
> index 000000000000..ea700befcc28
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -0,0 +1,238 @@
> +/*
> + * Copyright © 2008-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef I915_GEM_REQUEST_H
> +#define I915_GEM_REQUEST_H
> +
> +/**
> + * Request queue structure.
> + *
> + * The request queue allows us to note sequence numbers that have been emitted
> + * and may be associated with active buffers to be retired.
> + *
> + * By keeping this list, we can avoid having to do questionable sequence
> + * number comparisons on buffer last_read|write_seqno. It also allows an
> + * emission time to be associated with the request for tracking how far ahead
> + * of the GPU the submission is.
> + *
> + * The requests are reference counted, so upon creation they should have an
> + * initial reference taken using kref_init
> + */
> +struct drm_i915_gem_request {
> +	struct kref ref;
> +
> +	/** On Which ring this request was generated */
> +	struct drm_i915_private *i915;
> +
> +	/**
> +	 * Context and ring buffer related to this request
> +	 * Contexts are refcounted, so when this request is associated with a
> +	 * context, we must increment the context's refcount, to guarantee that
> +	 * it persists while any request is linked to it. Requests themselves
> +	 * are also refcounted, so the request will only be freed when the last
> +	 * reference to it is dismissed, and the code in
> +	 * i915_gem_request_free() will then decrement the refcount on the
> +	 * context.
> +	 */
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs *engine;
> +	struct intel_ringbuffer *ringbuf;
> +	struct intel_signal_node signaling;
> +
> +	/** GEM sequence number associated with the previous request,
> +	 * when the HWS breadcrumb is equal to this the GPU is processing
> +	 * this request.
> +	 */
> +	u32 previous_seqno;
> +
> +	/** GEM sequence number associated with this request,
> +	 * when the HWS breadcrumb is equal or greater than this the GPU
> +	 * has finished processing this request.
> +	 */
> +	u32 seqno;
> +
> +	/** Position in the ringbuffer of the start of the request */
> +	u32 head;
> +
> +	/**
> +	 * Position in the ringbuffer of the start of the postfix.
> +	 * This is required to calculate the maximum available ringbuffer
> +	 * space without overwriting the postfix.
> +	 */
> +	u32 postfix;
> +
> +	/** Position in the ringbuffer of the end of the whole request */
> +	u32 tail;
> +
> +	/** Preallocate space in the ringbuffer for the emitting the request */
> +	u32 reserved_space;
> +
> +	/**
> +	 * Context related to the previous request.
> +	 * As the contexts are accessed by the hardware until the switch is
> +	 * completed to a new context, the hardware may still be writing
> +	 * to the context object after the breadcrumb is visible. We must
> +	 * not unpin/unbind/prune that object whilst still active and so
> +	 * we keep the previous context pinned until the following (this)
> +	 * request is retired.
> +	 */
> +	struct i915_gem_context *previous_context;
> +
> +	/** Batch buffer related to this request if any (used for
> +	 * error state dump only).
> +	 */
> +	struct drm_i915_gem_object *batch_obj;
> +
> +	/** Time at which this request was emitted, in jiffies. */
> +	unsigned long emitted_jiffies;
> +
> +	/** global list entry for this request */
> +	struct list_head list;
> +
> +	struct drm_i915_file_private *file_priv;
> +	/** file_priv list entry for this request */
> +	struct list_head client_list;
> +
> +	/** process identifier submitting this request */
> +	struct pid *pid;
> +
> +	/**
> +	 * The ELSP only accepts two elements at a time, so we queue
> +	 * context/tail pairs on a given queue (ring->execlist_queue) until the
> +	 * hardware is available. The queue serves a double purpose: we also use
> +	 * it to keep track of the up to 2 contexts currently in the hardware
> +	 * (usually one in execution and the other queued up by the GPU): We
> +	 * only remove elements from the head of the queue when the hardware
> +	 * informs us that an element has been completed.
> +	 *
> +	 * All accesses to the queue are mediated by a spinlock
> +	 * (ring->execlist_lock).
> +	 */
> +
> +	/** Execlist link in the submission queue.*/
> +	struct list_head execlist_link;
> +
> +	/** Execlists no. of times this request has been sent to the ELSP */
> +	int elsp_submitted;
> +
> +	/** Execlists context hardware id. */
> +	unsigned int ctx_hw_id;
> +};
> +
> +struct drm_i915_gem_request * __must_check
> +i915_gem_request_alloc(struct intel_engine_cs *engine,
> +		       struct i915_gem_context *ctx);
> +void i915_gem_request_free(struct kref *req_ref);
> +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
> +				   struct drm_file *file);
> +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
> +
> +static inline u32
> +i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
> +{
> +	return req ? req->seqno : 0;
> +}
> +
> +static inline struct intel_engine_cs *
> +i915_gem_request_get_engine(struct drm_i915_gem_request *req)
> +{
> +	return req ? req->engine : NULL;
> +}
> +
> +static inline struct drm_i915_gem_request *
> +i915_gem_request_reference(struct drm_i915_gem_request *req)
> +{
> +	if (req)
> +		kref_get(&req->ref);
> +	return req;
> +}
> +
> +static inline void
> +i915_gem_request_unreference(struct drm_i915_gem_request *req)
> +{
> +	kref_put(&req->ref, i915_gem_request_free);
> +}
> +
> +static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
> +					   struct drm_i915_gem_request *src)
> +{
> +	if (src)
> +		i915_gem_request_reference(src);
> +
> +	if (*pdst)
> +		i915_gem_request_unreference(*pdst);
> +
> +	*pdst = src;
> +}
> +
> +void __i915_add_request(struct drm_i915_gem_request *req,
> +			struct drm_i915_gem_object *batch_obj,
> +			bool flush_caches);
> +#define i915_add_request(req) \
> +	__i915_add_request(req, NULL, true)
> +#define i915_add_request_no_flush(req) \
> +	__i915_add_request(req, NULL, false)
> +
> +struct intel_rps_client;
> +
> +int __i915_wait_request(struct drm_i915_gem_request *req,
> +			bool interruptible,
> +			s64 *timeout,
> +			struct intel_rps_client *rps);
> +int __must_check i915_wait_request(struct drm_i915_gem_request *req);
> +
> +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
> +
> +/**
> + * Returns true if seq1 is later than seq2.
> + */
> +static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
> +{
> +	return (s32)(seq1 - seq2) >= 0;
> +}
> +
> +static inline bool
> +i915_gem_request_started(const struct drm_i915_gem_request *req)
> +{
> +	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> +				 req->previous_seqno);
> +}
> +
> +static inline bool
> +i915_gem_request_completed(const struct drm_i915_gem_request *req)
> +{
> +	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> +				 req->seqno);
> +}
> +
> +bool __i915_spin_request(const struct drm_i915_gem_request *request,
> +			 int state, unsigned long timeout_us);
> +static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
> +				     int state, unsigned long timeout_us)
> +{
> +	return (i915_gem_request_started(request) &&
> +		__i915_spin_request(request, state, timeout_us));
> +}
> +
> +#endif /* I915_GEM_REQUEST_H */
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 10:11 ` [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
@ 2016-07-15 10:47   ` Joonas Lahtinen
  2016-07-15 11:08     ` Chris Wilson
  2016-07-15 10:49   ` Mika Kuoppala
  2016-07-15 11:53   ` [PATCH v2] " Chris Wilson
  2 siblings, 1 reply; 29+ messages in thread
From: Joonas Lahtinen @ 2016-07-15 10:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-07-15 at 11:11 +0100, Chris Wilson wrote:
> +#define NO_WAITBOOST ERR_PTR(-1)

This does not seem to be a very common idiom in kernel, just add flags
maybe.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✗ Ro.CI.BAT: failure for series starting with [1/9] drm/i915: Flush logical context image out to memory upon suspend
  2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
                   ` (8 preceding siblings ...)
  2016-07-15 10:11 ` [PATCH 9/9] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
@ 2016-07-15 10:48 ` Patchwork
  9 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2016-07-15 10:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Flush logical context image out to memory upon suspend
URL   : https://patchwork.freedesktop.org/series/9922/
State : failure

== Summary ==

Series 9922v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/9922/revisions/1/mbox

Test drv_module_reload_basic:
                skip       -> PASS       (ro-ivb-i7-3770)
Test gem_exec_suspend:
        Subgroup basic-s3:
                dmesg-warn -> PASS       (ro-bdw-i7-5557U)
Test gem_sync:
        Subgroup basic-store-each:
                pass       -> DMESG-FAIL (ro-bdw-i7-5600u)
Test kms_cursor_legacy:
        Subgroup basic-flip-vs-cursor:
                dmesg-warn -> PASS       (ro-byt-n2820)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                skip       -> DMESG-WARN (ro-bdw-i7-5557U)
Test vgem_basic:
        Subgroup debugfs:
                incomplete -> PASS       (ro-snb-i7-2620M)

fi-kbl-qkkr      total:241  pass:174  dwarn:26  dfail:0   fail:10  skip:31 
fi-snb-i7-2600   total:241  pass:190  dwarn:0   dfail:0   fail:7   skip:44 
ro-bdw-i5-5250u  total:241  pass:213  dwarn:4   dfail:0   fail:7   skip:17 
ro-bdw-i7-5557U  total:241  pass:214  dwarn:1   dfail:0   fail:7   skip:19 
ro-bdw-i7-5600u  total:241  pass:198  dwarn:0   dfail:1   fail:7   skip:35 
ro-byt-n2820     total:241  pass:191  dwarn:0   dfail:0   fail:8   skip:42 
ro-hsw-i3-4010u  total:241  pass:206  dwarn:0   dfail:0   fail:7   skip:28 
ro-hsw-i7-4770r  total:241  pass:206  dwarn:0   dfail:0   fail:7   skip:28 
ro-ilk-i7-620lm  total:241  pass:166  dwarn:0   dfail:0   fail:8   skip:67 
ro-ilk1-i5-650   total:236  pass:166  dwarn:0   dfail:0   fail:8   skip:62 
ro-ivb-i7-3770   total:241  pass:197  dwarn:0   dfail:0   fail:7   skip:37 
ro-skl3-i5-6260u total:241  pass:217  dwarn:1   dfail:0   fail:7   skip:16 
ro-snb-i7-2620M  total:241  pass:188  dwarn:0   dfail:0   fail:8   skip:45 

Results at /archive/results/CI_IGT_test/RO_Patchwork_1500/

c01b445 drm-intel-nightly: 2016y-07m-15d-07h-02m-07s UTC integration manifest
acd076e drm/i915: Mark imported dma-buf objects as being coherent
ab7e428f drm/i915: Wait on external rendering for GEM objects
bf0ac62 drm/i915: Disable waitboosting for mmioflips/semaphores
fdf757d drm/i915: Disable waitboosting for fence_wait()
4b17dce drm/i915: Derive GEM requests from dma-fence
880dc6c drm/i915: Mark all current requests as complete before resetting them
b7f8075 drm/i915: Retire oldest completed request before allocating next
13b5f9c drm/i915: Move GEM request routines to i915_gem_request.c
af13418 drm/i915: Flush logical context image out to memory upon suspend

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 10:11 ` [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
  2016-07-15 10:47   ` Joonas Lahtinen
@ 2016-07-15 10:49   ` Mika Kuoppala
  2016-07-15 11:06     ` Chris Wilson
  2016-07-15 11:53   ` [PATCH v2] " Chris Wilson
  2 siblings, 1 reply; 29+ messages in thread
From: Mika Kuoppala @ 2016-07-15 10:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We want to restrict waitboosting to known process contexts, where we can
> track which clients are receiving waitboosts and prevent excessive power
> wasting. For fence_wait() we do not have any client tracking and so that
> leaves it open to abuse.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++---
>  drivers/gpu/drm/i915/i915_gem_request.h | 1 +
>  2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 6528536878f2..bfec4a88707d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -70,7 +70,7 @@ static signed long i915_fence_wait(struct fence *fence,
>  
>  	ret = __i915_wait_request(to_request(fence),
>  				  interruptible, timeout,
> -				  NULL);
> +				  NO_WAITBOOST);
>  	if (ret == -ETIME)
>  		return 0;
>  
> @@ -642,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  	 * forcing the clocks too high for the whole system, we only allow
>  	 * each client to waitboost once in a busy period.
>  	 */
> -	if (INTEL_GEN(req->i915) >= 6)
> +	if (!IS_ERR(rps) && INTEL_GEN(req->i915) >= 6)
>  		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
>

The intention here is to boost even if rps client is NULL?
Assuming so could you elaborate why.

Thanks,
-Mika


>  	/* Optimistic spin for the next ~jiffie before touching IRQs */
> @@ -713,7 +713,8 @@ complete:
>  			*timeout = 0;
>  	}
>  
> -	if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
> +	if (!IS_ERR_OR_NULL(rps) &&
> +	    req->fence.seqno == req->engine->last_submitted_seqno) {
>  		/* The GPU is now idle and this client has stalled.
>  		 * Since no other client has submitted a request in the
>  		 * meantime, assume that this client is the only one
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 6f2c820785f3..f7f497a07893 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -206,6 +206,7 @@ void __i915_add_request(struct drm_i915_gem_request *req,
>  	__i915_add_request(req, NULL, false)
>  
>  struct intel_rps_client;
> +#define NO_WAITBOOST ERR_PTR(-1)
>  
>  int __i915_wait_request(struct drm_i915_gem_request *req,
>  			bool interruptible,
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 10:49   ` Mika Kuoppala
@ 2016-07-15 11:06     ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 11:06 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Fri, Jul 15, 2016 at 01:49:44PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > We want to restrict waitboosting to known process contexts, where we can
> > track which clients are receiving waitboosts and prevent excessive power
> > wasting. For fence_wait() we do not have any client tracking and so that
> > leaves it open to abuse.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++---
> >  drivers/gpu/drm/i915/i915_gem_request.h | 1 +
> >  2 files changed, 5 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> > index 6528536878f2..bfec4a88707d 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_request.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> > @@ -70,7 +70,7 @@ static signed long i915_fence_wait(struct fence *fence,
> >  
> >  	ret = __i915_wait_request(to_request(fence),
> >  				  interruptible, timeout,
> > -				  NULL);
> > +				  NO_WAITBOOST);
> >  	if (ret == -ETIME)
> >  		return 0;
> >  
> > @@ -642,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
> >  	 * forcing the clocks too high for the whole system, we only allow
> >  	 * each client to waitboost once in a busy period.
> >  	 */
> > -	if (INTEL_GEN(req->i915) >= 6)
> > +	if (!IS_ERR(rps) && INTEL_GEN(req->i915) >= 6)
> >  		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
> >
> 
> The intention here is to boost even if rps client is NULL?
> Assuming so could you elaborate why.

Yes, NULL is the kernel request for rps boosting, not associated with
any client (and may be affecting multiple clients at any one time).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 10:47   ` Joonas Lahtinen
@ 2016-07-15 11:08     ` Chris Wilson
  2016-07-15 11:49       ` Chris Wilson
  0 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 11:08 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Jul 15, 2016 at 01:47:34PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-07-15 at 11:11 +0100, Chris Wilson wrote:
> > +#define NO_WAITBOOST ERR_PTR(-1)
> 
> This does not seem to be a very common idiom in kernel, just add flags
> maybe.

I find the special values for the client (including the existing NULL
client) much more informative.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores
  2016-07-15 10:11 ` [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
@ 2016-07-15 11:08   ` Mika Kuoppala
  2016-07-15 12:50     ` Chris Wilson
  2016-07-15 12:26   ` Mika Kuoppala
  1 sibling, 1 reply; 29+ messages in thread
From: Mika Kuoppala @ 2016-07-15 11:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since commit a6f766f39751 ("drm/i915: Limit ring synchronisation (sw
> sempahores) RPS boosts") and commit bcafc4e38b6a ("drm/i915: Limit mmio
> flip RPS boosts") we have limited the waitboosting for semaphores and
> flips. Ideally we do not want to boost in either of these instances as no
> consumer is waiting upon the results. With the introduction of
> NO_WAITBOOST

Is consumer a synonym for an userspace client in this context?

> in the previous patch, we can finally disable these needless boosts.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Seems to do what it advertizes. Might need to change the NO_WAITBOOST
define tho as Joonas pointed out.

-Mika

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  | 8 +-------
>  drivers/gpu/drm/i915/i915_drv.h      | 2 --
>  drivers/gpu/drm/i915/i915_gem.c      | 2 +-
>  drivers/gpu/drm/i915/intel_display.c | 2 +-
>  drivers/gpu/drm/i915/intel_pm.c      | 2 --
>  5 files changed, 3 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 55fd3d9cc448..618f8cf210fc 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2465,13 +2465,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  			   list_empty(&file_priv->rps.link) ? "" : ", active");
>  		rcu_read_unlock();
>  	}
> -	seq_printf(m, "Semaphore boosts: %d%s\n",
> -		   dev_priv->rps.semaphores.boosts,
> -		   list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
> -	seq_printf(m, "MMIO flip boosts: %d%s\n",
> -		   dev_priv->rps.mmioflips.boosts,
> -		   list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
> -	seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
> +	seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
>  	spin_unlock(&dev_priv->rps.client_lock);
>  	mutex_unlock(&dev->filelist_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a4767c198413..08d9c081e9c8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1195,8 +1195,6 @@ struct intel_gen6_power_mgmt {
>  	struct delayed_work autoenable_work;
>  	unsigned boosts;
>  
> -	struct intel_rps_client semaphores, mmioflips;
> -
>  	/* manual wa residency calculations */
>  	struct intel_rps_ei up_ei, down_ei;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 40cc3da68611..2ec78aa7279e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2849,7 +2849,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  		ret = __i915_wait_request(from_req,
>  					  i915->mm.interruptible,
>  					  NULL,
> -					  &i915->rps.semaphores);
> +					  NO_WAITBOOST);
>  		if (ret)
>  			return ret;
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 9337d3a7b83d..813c9c3b32ca 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11473,7 +11473,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
>  	if (work->flip_queued_req)
>  		WARN_ON(__i915_wait_request(work->flip_queued_req,
>  					    false, NULL,
> -					    &dev_priv->rps.mmioflips));
> +					    NO_WAITBOOST));
>  
>  	/* For framebuffer backed by dmabuf, wait for fence */
>  	resv = i915_gem_object_get_dmabuf_resv(obj);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index fa6b341c2792..a1bf5f8fbb1c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7810,8 +7810,6 @@ void intel_pm_setup(struct drm_device *dev)
>  	INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
>  			  __intel_autoenable_gt_powersave);
>  	INIT_LIST_HEAD(&dev_priv->rps.clients);
> -	INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
> -	INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
>  
>  	dev_priv->pm.suspended = false;
>  	atomic_set(&dev_priv->pm.wakeref_count, 0);
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend
  2016-07-15 10:41   ` Mika Kuoppala
@ 2016-07-15 11:11     ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 11:11 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Fri, Jul 15, 2016 at 01:41:33PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Before suspend, and especially before building the hibernation image, we
> > need to context image to be coherent in memory. To do this we require
> > that we perform a context switch to a disposable context (i.e. the
> > dev_priv->kernel_context) - when that switch is complete, all other
> > context images will be complete. This leaves the kernel_context image as
> > incomplete, but fortunately that is disposable and we can do a quick
> > fixup of the logical state after resuming.
> >
> > Testcase: igt/gem_exec_suspend # bsw
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=96526
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c |  4 +---
> >  drivers/gpu/drm/i915/i915_drv.h |  1 +
> >  drivers/gpu/drm/i915/i915_gem.c | 46 +++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 48 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 15440123e38d..c5b7b8e0678a 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1590,9 +1590,7 @@ static int i915_drm_resume(struct drm_device *dev)
> >  
> >  	intel_csr_ucode_resume(dev_priv);
> >  
> > -	mutex_lock(&dev->struct_mutex);
> > -	i915_gem_restore_gtt_mappings(dev);
> > -	mutex_unlock(&dev->struct_mutex);
> > +	i915_gem_resume(dev);
> >  
> >  	i915_restore_state(dev);
> >  	intel_opregion_setup(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 1ec523d29789..e73c0fc84c73 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -3384,6 +3384,7 @@ void i915_gem_init_swizzling(struct drm_device *dev);
> >  void i915_gem_cleanup_engines(struct drm_device *dev);
> >  int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv);
> >  int __must_check i915_gem_suspend(struct drm_device *dev);
> > +void i915_gem_resume(struct drm_device *dev);
> >  void __i915_add_request(struct drm_i915_gem_request *req,
> >  			struct drm_i915_gem_object *batch_obj,
> >  			bool flush_caches);
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index cf0e8aa8035c..8b42a5101f11 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4974,6 +4974,35 @@ i915_gem_stop_engines(struct drm_device *dev)
> >  		dev_priv->gt.stop_engine(engine);
> >  }
> >  
> > +static int switch_to_kernel_context(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_engine_cs *engine;
> > +
> > +	for_each_engine(engine, dev_priv) {
> > +		struct drm_i915_gem_request *req;
> > +		int ret;
> > +
> > +		if (engine->last_context == NULL)
> > +			continue;
> > +
> > +		if (engine->last_context == dev_priv->kernel_context)
> > +			continue;
> > +
> > +		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
> > +		if (IS_ERR(req))
> > +			return PTR_ERR(req);
> > +
> > +		ret = 0;
> > +		if (!i915.enable_execlists)
> > +			ret = i915_switch_context(req);
> > +		i915_add_request_no_flush(req);
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> 
> Why do you want keep this separate? Altho the evict one is a little
> bit different and as I didn't see any perf implications: Why not
> consolidate the evict one and this one and have i915_gem_idle which
> would park to default and wait for idle.

I found i915_gem_idle() to be easily confused with the idle functions we
call upon idling. And later on you will see that i915_gem_park() itself
is not common to anything but suspend. So I don't feel like introducing
that as nebulous concept. But with a functional change to evict noted,
we can share the switch-context code...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 9/9] drm/i915: Mark imported dma-buf objects as being coherent
  2016-07-15 10:11 ` [PATCH 9/9] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
@ 2016-07-15 11:33   ` Mika Kuoppala
  0 siblings, 0 replies; 29+ messages in thread
From: Mika Kuoppala @ 2016-07-15 11:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> A foreign dma-buf does not share our cache domain tracking, and we rely
> on the producer ensuring cache coherency. Marking them as being in the
> CPU domain is incorrect.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 80bbe43a2e92..c7d734248ed0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -299,6 +299,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
>  	obj->base.import_attach = attach;
> +	obj->base.read_domains = I915_GEM_DOMAIN_GTT;

I was a confused about the coherency guarantees of this _GTT.

But after chatting with Chris I am convinced that what comes to
coherency for other guests and sharing, this is it.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

-Mika

> +	obj->base.write_domain = 0;
>  
>  	return &obj->base;
>  
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 11:08     ` Chris Wilson
@ 2016-07-15 11:49       ` Chris Wilson
  2016-07-15 12:07         ` Joonas Lahtinen
  0 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 11:49 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

On Fri, Jul 15, 2016 at 12:08:05PM +0100, Chris Wilson wrote:
> On Fri, Jul 15, 2016 at 01:47:34PM +0300, Joonas Lahtinen wrote:
> > On pe, 2016-07-15 at 11:11 +0100, Chris Wilson wrote:
> > > +#define NO_WAITBOOST ERR_PTR(-1)
> > 
> > This does not seem to be a very common idiom in kernel, just add flags
> > maybe.
> 
> I find the special values for the client (including the existing NULL
> client) much more informative.

Is it more pleasant with...?

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bfec4a88707d..aedc15d46463 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -713,7 +713,7 @@ complete:
                        *timeout = 0;
        }
 
-       if (!IS_ERR_OR_NULL(rps) &&
+       if (IS_RPS_CLIENT(rps) &&
            req->fence.seqno == req->engine->last_submitted_seqno) {
                /* The GPU is now idle and this client has stalled.
                 * Since no other client has submitted a request in the
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index f7f497a07893..b3fe80d4eb63 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -207,6 +207,7 @@ void __i915_add_request(struct drm_i915_gem_request *req,
 
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
+#define IS_RPS_CLIENT(p) (!IS_ERR_OR_NULL(p))

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 10:11 ` [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
  2016-07-15 10:47   ` Joonas Lahtinen
  2016-07-15 10:49   ` Mika Kuoppala
@ 2016-07-15 11:53   ` Chris Wilson
  2016-07-15 12:25     ` Mika Kuoppala
  2 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 11:53 UTC (permalink / raw)
  To: intel-gfx

We want to restrict waitboosting to known process contexts, where we can
track which clients are receiving waitboosts and prevent excessive power
wasting. For fence_wait() we do not have any client tracking and so that
leaves it open to abuse.

v2: Hide the IS_ERR_OR_NULL testing for special clients

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++---
 drivers/gpu/drm/i915/i915_gem_request.h | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6528536878f2..f483e605a715 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -70,7 +70,7 @@ static signed long i915_fence_wait(struct fence *fence,
 
 	ret = __i915_wait_request(to_request(fence),
 				  interruptible, timeout,
-				  NULL);
+				  NO_WAITBOOST);
 	if (ret == -ETIME)
 		return 0;
 
@@ -642,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	 * forcing the clocks too high for the whole system, we only allow
 	 * each client to waitboost once in a busy period.
 	 */
-	if (INTEL_GEN(req->i915) >= 6)
+	if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
 		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
 	/* Optimistic spin for the next ~jiffie before touching IRQs */
@@ -713,7 +713,8 @@ complete:
 			*timeout = 0;
 	}
 
-	if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
+	if (IS_RPS_USER(rps) &&
+	    req->fence.seqno == req->engine->last_submitted_seqno) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6f2c820785f3..0a01d01cac51 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -206,6 +206,9 @@ void __i915_add_request(struct drm_i915_gem_request *req,
 	__i915_add_request(req, NULL, false)
 
 struct intel_rps_client;
+#define NO_WAITBOOST ERR_PTR(-1)
+#define IS_RPS_CLIENT(p) (!IS_ERR(p))
+#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
 
 int __i915_wait_request(struct drm_i915_gem_request *req,
 			bool interruptible,
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 11:49       ` Chris Wilson
@ 2016-07-15 12:07         ` Joonas Lahtinen
  0 siblings, 0 replies; 29+ messages in thread
From: Joonas Lahtinen @ 2016-07-15 12:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-07-15 at 12:49 +0100, Chris Wilson wrote:
> On Fri, Jul 15, 2016 at 12:08:05PM +0100, Chris Wilson wrote:
> > 
> > On Fri, Jul 15, 2016 at 01:47:34PM +0300, Joonas Lahtinen wrote:
> > > 
> > > On pe, 2016-07-15 at 11:11 +0100, Chris Wilson wrote:
> > > > 
> > > > +#define NO_WAITBOOST ERR_PTR(-1)
> > > This does not seem to be a very common idiom in kernel, just add flags
> > > maybe.
> > I find the special values for the client (including the existing NULL
> > client) much more informative.
> Is it more pleasant with...?
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index bfec4a88707d..aedc15d46463 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -713,7 +713,7 @@ complete:
>                         *timeout = 0;
>         }
>  
> -       if (!IS_ERR_OR_NULL(rps) &&
> +       if (IS_RPS_CLIENT(rps) &&
>             req->fence.seqno == req->engine->last_submitted_seqno) {
>                 /* The GPU is now idle and this client has stalled.
>                  * Since no other client has submitted a request in the
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index f7f497a07893..b3fe80d4eb63 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -207,6 +207,7 @@ void __i915_add_request(struct drm_i915_gem_request *req,
>  
>  struct intel_rps_client;

I almost suggested #define RPS_CLIENT(n) ERR_PTR(-n), but this is
somewhat better already.

>  #define NO_WAITBOOST ERR_PTR(-1)
> +#define IS_RPS_CLIENT(p) (!IS_ERR_OR_NULL(p))
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] drm/i915: Disable waitboosting for fence_wait()
  2016-07-15 11:53   ` [PATCH v2] " Chris Wilson
@ 2016-07-15 12:25     ` Mika Kuoppala
  0 siblings, 0 replies; 29+ messages in thread
From: Mika Kuoppala @ 2016-07-15 12:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We want to restrict waitboosting to known process contexts, where we can
> track which clients are receiving waitboosts and prevent excessive power
> wasting. For fence_wait() we do not have any client tracking and so that
> leaves it open to abuse.
>
> v2: Hide the IS_ERR_OR_NULL testing for special clients
>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++---
>  drivers/gpu/drm/i915/i915_gem_request.h | 3 +++
>  2 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 6528536878f2..f483e605a715 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -70,7 +70,7 @@ static signed long i915_fence_wait(struct fence *fence,
>  
>  	ret = __i915_wait_request(to_request(fence),
>  				  interruptible, timeout,
> -				  NULL);
> +				  NO_WAITBOOST);
>  	if (ret == -ETIME)
>  		return 0;
>  
> @@ -642,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  	 * forcing the clocks too high for the whole system, we only allow
>  	 * each client to waitboost once in a busy period.
>  	 */
> -	if (INTEL_GEN(req->i915) >= 6)
> +	if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
>  		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
>  
>  	/* Optimistic spin for the next ~jiffie before touching IRQs */
> @@ -713,7 +713,8 @@ complete:
>  			*timeout = 0;
>  	}
>  
> -	if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
> +	if (IS_RPS_USER(rps) &&
> +	    req->fence.seqno == req->engine->last_submitted_seqno) {
>  		/* The GPU is now idle and this client has stalled.
>  		 * Since no other client has submitted a request in the
>  		 * meantime, assume that this client is the only one
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 6f2c820785f3..0a01d01cac51 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -206,6 +206,9 @@ void __i915_add_request(struct drm_i915_gem_request *req,
>  	__i915_add_request(req, NULL, false)
>  
>  struct intel_rps_client;
> +#define NO_WAITBOOST ERR_PTR(-1)
> +#define IS_RPS_CLIENT(p) (!IS_ERR(p))
> +#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
>  
>  int __i915_wait_request(struct drm_i915_gem_request *req,
>  			bool interruptible,
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores
  2016-07-15 10:11 ` [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
  2016-07-15 11:08   ` Mika Kuoppala
@ 2016-07-15 12:26   ` Mika Kuoppala
  1 sibling, 0 replies; 29+ messages in thread
From: Mika Kuoppala @ 2016-07-15 12:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since commit a6f766f39751 ("drm/i915: Limit ring synchronisation (sw
> sempahores) RPS boosts") and commit bcafc4e38b6a ("drm/i915: Limit mmio
> flip RPS boosts") we have limited the waitboosting for semaphores and
> flips. Ideally we do not want to boost in either of these instances as no
> consumer is waiting upon the results. With the introduction of NO_WAITBOOST
> in the previous patch, we can finally disable these needless boosts.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  | 8 +-------
>  drivers/gpu/drm/i915/i915_drv.h      | 2 --
>  drivers/gpu/drm/i915/i915_gem.c      | 2 +-
>  drivers/gpu/drm/i915/intel_display.c | 2 +-
>  drivers/gpu/drm/i915/intel_pm.c      | 2 --
>  5 files changed, 3 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 55fd3d9cc448..618f8cf210fc 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2465,13 +2465,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  			   list_empty(&file_priv->rps.link) ? "" : ", active");
>  		rcu_read_unlock();
>  	}
> -	seq_printf(m, "Semaphore boosts: %d%s\n",
> -		   dev_priv->rps.semaphores.boosts,
> -		   list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
> -	seq_printf(m, "MMIO flip boosts: %d%s\n",
> -		   dev_priv->rps.mmioflips.boosts,
> -		   list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
> -	seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
> +	seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
>  	spin_unlock(&dev_priv->rps.client_lock);
>  	mutex_unlock(&dev->filelist_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a4767c198413..08d9c081e9c8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1195,8 +1195,6 @@ struct intel_gen6_power_mgmt {
>  	struct delayed_work autoenable_work;
>  	unsigned boosts;
>  
> -	struct intel_rps_client semaphores, mmioflips;
> -
>  	/* manual wa residency calculations */
>  	struct intel_rps_ei up_ei, down_ei;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 40cc3da68611..2ec78aa7279e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2849,7 +2849,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  		ret = __i915_wait_request(from_req,
>  					  i915->mm.interruptible,
>  					  NULL,
> -					  &i915->rps.semaphores);
> +					  NO_WAITBOOST);
>  		if (ret)
>  			return ret;
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 9337d3a7b83d..813c9c3b32ca 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11473,7 +11473,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
>  	if (work->flip_queued_req)
>  		WARN_ON(__i915_wait_request(work->flip_queued_req,
>  					    false, NULL,
> -					    &dev_priv->rps.mmioflips));
> +					    NO_WAITBOOST));
>  
>  	/* For framebuffer backed by dmabuf, wait for fence */
>  	resv = i915_gem_object_get_dmabuf_resv(obj);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index fa6b341c2792..a1bf5f8fbb1c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7810,8 +7810,6 @@ void intel_pm_setup(struct drm_device *dev)
>  	INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
>  			  __intel_autoenable_gt_powersave);
>  	INIT_LIST_HEAD(&dev_priv->rps.clients);
> -	INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
> -	INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
>  
>  	dev_priv->pm.suspended = false;
>  	atomic_set(&dev_priv->pm.wakeref_count, 0);
> -- 
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores
  2016-07-15 11:08   ` Mika Kuoppala
@ 2016-07-15 12:50     ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2016-07-15 12:50 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Fri, Jul 15, 2016 at 02:08:17PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Since commit a6f766f39751 ("drm/i915: Limit ring synchronisation (sw
> > sempahores) RPS boosts") and commit bcafc4e38b6a ("drm/i915: Limit mmio
> > flip RPS boosts") we have limited the waitboosting for semaphores and
> > flips. Ideally we do not want to boost in either of these instances as no
> > consumer is waiting upon the results. With the introduction of
> > NO_WAITBOOST
> 
> Is consumer a synonym for an userspace client in this context?

Yes, s/and consumer/no userspace consumer/

Technically there is a client waiting since we currently stall the
execbuf, so I was trying to distinguish between execbuf as the producer
and being throttled due to submitting to much work and somebody waiting
upon the result not being able to make progress on other tasks.

The flips are decoupled from the user, we just want to disable the
boosting as that is explicitly handled when we detect a missed flip.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 8/9] drm/i915: Wait on external rendering for GEM objects
  2016-07-15 10:11 ` [PATCH 8/9] drm/i915: Wait on external rendering for GEM objects Chris Wilson
@ 2016-07-18 10:18   ` Chris Wilson
  2016-07-19  6:45     ` Daniel Vetter
  0 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2016-07-18 10:18 UTC (permalink / raw)
  To: intel-gfx

On Fri, Jul 15, 2016 at 11:11:20AM +0100, Chris Wilson wrote:
> @@ -1366,6 +1368,16 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  		GEM_BUG_ON(obj->active);
>  	}
>  
> +	resv = i915_gem_object_get_dmabuf_resv(obj);
> +	if (resv) {
> +		long err;
> +
> +		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
> +							  MAX_SCHEDULE_TIMEOUT);
> +		if (err < 0)
> +			return err;
> +	}

This introduces a potential WARN_ON into i915_gem_object_free() as the
current i915_vma_unbind() calls i915_gem_object_wait_rendering(). To
hit this path we first need to render with the GPU, have a dma-buf
attached with an unsignaled fence and then interrupt the wait. It does
get fixed later in the series (when i915_vma_unbind() only waits on the
active VMA and not all, including third-party, rendering.

It's only a WARN, so I don't think it is worth deferring it to after
the vma fix is in place.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 8/9] drm/i915: Wait on external rendering for GEM objects
  2016-07-18 10:18   ` Chris Wilson
@ 2016-07-19  6:45     ` Daniel Vetter
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2016-07-19  6:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Mon, Jul 18, 2016 at 11:18:27AM +0100, Chris Wilson wrote:
> On Fri, Jul 15, 2016 at 11:11:20AM +0100, Chris Wilson wrote:
> > @@ -1366,6 +1368,16 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> >  		GEM_BUG_ON(obj->active);
> >  	}
> >  
> > +	resv = i915_gem_object_get_dmabuf_resv(obj);
> > +	if (resv) {
> > +		long err;
> > +
> > +		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
> > +							  MAX_SCHEDULE_TIMEOUT);
> > +		if (err < 0)
> > +			return err;
> > +	}
> 
> This introduces a potential WARN_ON into i915_gem_object_free() as the
> current i915_vma_unbind() calls i915_gem_object_wait_rendering(). To
> hit this path we first need to render with the GPU, have a dma-buf
> attached with an unsignaled fence and then interrupt the wait. It does
> get fixed later in the series (when i915_vma_unbind() only waits on the
> active VMA and not all, including third-party, rendering.
> 
> It's only a WARN, so I don't think it is worth deferring it to after
> the vma fix is in place.

Adding the above in a FIXME comment right before the callt to
resv_obj_wait might be a good interim idea. But yes, given that prime
buffer sharing always resulted in lockdep unhappiness around struct_mutex,
horrible tearing and other sad things I don't think a temporary WARN_ON is
all that bad. Acked.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2016-07-19  6:46 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-15 10:11 Derive requests from dma-buf fence Chris Wilson
2016-07-15 10:11 ` [PATCH 1/9] drm/i915: Flush logical context image out to memory upon suspend Chris Wilson
2016-07-15 10:41   ` Mika Kuoppala
2016-07-15 11:11     ` Chris Wilson
2016-07-15 10:11 ` [PATCH 2/9] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
2016-07-15 10:41   ` Joonas Lahtinen
2016-07-15 10:43   ` Mika Kuoppala
2016-07-15 10:11 ` [PATCH 3/9] drm/i915: Retire oldest completed request before allocating next Chris Wilson
2016-07-15 10:11 ` [PATCH 4/9] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
2016-07-15 10:11 ` [PATCH 5/9] drm/i915: Derive GEM requests from dma-fence Chris Wilson
2016-07-15 10:11 ` [PATCH 6/9] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
2016-07-15 10:47   ` Joonas Lahtinen
2016-07-15 11:08     ` Chris Wilson
2016-07-15 11:49       ` Chris Wilson
2016-07-15 12:07         ` Joonas Lahtinen
2016-07-15 10:49   ` Mika Kuoppala
2016-07-15 11:06     ` Chris Wilson
2016-07-15 11:53   ` [PATCH v2] " Chris Wilson
2016-07-15 12:25     ` Mika Kuoppala
2016-07-15 10:11 ` [PATCH 7/9] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
2016-07-15 11:08   ` Mika Kuoppala
2016-07-15 12:50     ` Chris Wilson
2016-07-15 12:26   ` Mika Kuoppala
2016-07-15 10:11 ` [PATCH 8/9] drm/i915: Wait on external rendering for GEM objects Chris Wilson
2016-07-18 10:18   ` Chris Wilson
2016-07-19  6:45     ` Daniel Vetter
2016-07-15 10:11 ` [PATCH 9/9] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
2016-07-15 11:33   ` Mika Kuoppala
2016-07-15 10:48 ` ✗ Ro.CI.BAT: failure for series starting with [1/9] drm/i915: Flush logical context image out to memory upon suspend Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.