All of lore.kernel.org
 help / color / mirror / Atom feed
* [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences
@ 2016-09-09 11:00 Chris Wilson
  2016-09-09 11:00 ` [CI 02/21] drm/i915: Only queue requests during execlists submission Chris Wilson
                   ` (20 more replies)
  0 siblings, 21 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

This is really a core kernel struct in disguise until we can finally
place it in kernel/. There is an immediate need for a fence collection
mechanism that is more flexible than fence-array, in particular being
able to easily drive request submission via events (and not just
interrupt driven). The same mechanism would be useful for handling
nonblocking and asynchronous atomic modesets, parallel execution and
more, but for the time being just create a local sw fence for execbuf.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile        |   1 +
 drivers/gpu/drm/i915/i915_sw_fence.c | 362 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_sw_fence.h |  65 +++++++
 3 files changed, 428 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.c
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a7da24640e88..a998c2bce70a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
+	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
new file mode 100644
index 000000000000..3552a336296c
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -0,0 +1,362 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/slab.h>
+#include <linux/fence.h>
+#include <linux/reservation.h>
+
+#include "i915_sw_fence.h"
+
+static DEFINE_SPINLOCK(i915_sw_fence_lock);
+
+static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
+				  enum i915_sw_fence_notify state)
+{
+	i915_sw_fence_notify_t fn;
+
+	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
+	return fn(fence, state);
+}
+
+static void i915_sw_fence_free(struct kref *kref)
+{
+	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
+
+	WARN_ON(atomic_read(&fence->pending) > 0);
+
+	if (fence->flags & I915_SW_FENCE_MASK)
+		__i915_sw_fence_notify(fence, FENCE_FREE);
+	else
+		kfree(fence);
+}
+
+static void i915_sw_fence_put(struct i915_sw_fence *fence)
+{
+	kref_put(&fence->kref, i915_sw_fence_free);
+}
+
+static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
+{
+	kref_get(&fence->kref);
+	return fence;
+}
+
+static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
+					struct list_head *continuation)
+{
+	wait_queue_head_t *x = &fence->wait;
+	wait_queue_t *pos, *next;
+	unsigned long flags;
+
+	smp_wmb();
+	atomic_set(&fence->pending, -1); /* 0 -> -1 [done] */
+
+	/*
+	 * To prevent unbounded recursion as we traverse the graph of
+	 * i915_sw_fences, we move the task_list from this, the next ready
+	 * fence, to the tail of the original fence's task_list
+	 * (and so added to the list to be woken).
+	 */
+
+	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
+	if (continuation) {
+		list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
+			if (pos->func == autoremove_wake_function)
+				pos->func(pos, TASK_NORMAL, 0, continuation);
+			else
+				list_move_tail(&pos->task_list, continuation);
+		}
+	} else {
+		LIST_HEAD(extra);
+
+		do {
+			list_for_each_entry_safe(pos, next,
+						 &x->task_list, task_list)
+				pos->func(pos, TASK_NORMAL, 0, &extra);
+
+			if (list_empty(&extra))
+				break;
+
+			list_splice_tail_init(&extra, &x->task_list);
+		} while (1);
+	}
+	spin_unlock_irqrestore(&x->lock, flags);
+}
+
+static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
+				     struct list_head *continuation)
+{
+	if (!atomic_dec_and_test(&fence->pending))
+		return;
+
+	if (fence->flags & I915_SW_FENCE_MASK &&
+	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+		return;
+
+	__i915_sw_fence_wake_up_all(fence, continuation);
+}
+
+static void i915_sw_fence_complete(struct i915_sw_fence *fence)
+{
+	if (WARN_ON(i915_sw_fence_done(fence)))
+		return;
+
+	__i915_sw_fence_complete(fence, NULL);
+}
+
+static void i915_sw_fence_await(struct i915_sw_fence *fence)
+{
+	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+}
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
+{
+	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+
+	init_waitqueue_head(&fence->wait);
+	kref_init(&fence->kref);
+	atomic_set(&fence->pending, 1);
+	fence->flags = (unsigned long)fn;
+}
+
+void i915_sw_fence_commit(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_complete(fence);
+	i915_sw_fence_put(fence);
+}
+
+static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+{
+	list_del(&wq->task_list);
+	__i915_sw_fence_complete(wq->private, key);
+	i915_sw_fence_put(wq->private);
+	return 0;
+}
+
+static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				    const struct i915_sw_fence * const signaler)
+{
+	wait_queue_t *wq;
+
+	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return false;
+
+	if (fence == signaler)
+		return true;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		if (__i915_sw_fence_check_if_after(wq->private, signaler))
+			return true;
+	}
+
+	return false;
+}
+
+static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
+{
+	wait_queue_t *wq;
+
+	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		__i915_sw_fence_clear_checked_bit(wq->private);
+	}
+}
+
+static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				  const struct i915_sw_fence * const signaler)
+{
+	unsigned long flags;
+	bool err;
+
+	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+		return false;
+
+	spin_lock_irqsave(&i915_sw_fence_lock, flags);
+	err = __i915_sw_fence_check_if_after(fence, signaler);
+	__i915_sw_fence_clear_checked_bit(fence);
+	spin_unlock_irqrestore(&i915_sw_fence_lock, flags);
+
+	return err;
+}
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *signaler,
+				 wait_queue_t *wq)
+{
+	unsigned long flags;
+	int pending;
+
+	if (i915_sw_fence_done(signaler))
+		return 0;
+
+	/* The dependency graph must be acyclic. */
+	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&wq->task_list);
+	wq->flags = 0;
+	wq->func = i915_sw_fence_wake;
+	wq->private = i915_sw_fence_get(fence);
+
+	i915_sw_fence_await(fence);
+
+	spin_lock_irqsave(&signaler->wait.lock, flags);
+	if (likely(!i915_sw_fence_done(signaler))) {
+		__add_wait_queue_tail(&signaler->wait, wq);
+		pending = 1;
+	} else {
+		i915_sw_fence_wake(wq, 0, 0, NULL);
+		pending = 0;
+	}
+	spin_unlock_irqrestore(&signaler->wait.lock, flags);
+
+	return pending;
+}
+
+struct dma_fence_cb {
+	struct fence_cb base;
+	struct i915_sw_fence *fence;
+	struct fence *dma;
+	struct timer_list timer;
+};
+
+static void timer_i915_sw_fence_wake(unsigned long data)
+{
+	struct dma_fence_cb *cb = (struct dma_fence_cb *)data;
+
+	printk(KERN_WARNING "asynchronous wait on fence %s:%s:%d timed out\n",
+	       cb->dma->ops->get_driver_name(cb->dma),
+	       cb->dma->ops->get_timeline_name(cb->dma),
+	       cb->dma->seqno);
+	fence_put(cb->dma);
+	cb->dma = NULL;
+
+	i915_sw_fence_commit(cb->fence);
+	cb->timer.function = NULL;
+}
+
+static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data)
+{
+	struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	del_timer_sync(&cb->timer);
+	if (cb->timer.function)
+		i915_sw_fence_commit(cb->fence);
+	fence_put(cb->dma);
+
+	kfree(cb);
+}
+
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma,
+				  unsigned long timeout,
+				  gfp_t gfp)
+{
+	struct dma_fence_cb *cb;
+	int ret;
+
+	if (fence_is_signaled(dma))
+		return 0;
+
+	cb = kmalloc(sizeof(*cb), gfp);
+	if (!cb) {
+		if (!gfpflags_allow_blocking(gfp))
+			return -ENOMEM;
+
+		return fence_wait(dma, false);
+	}
+
+	cb->fence = i915_sw_fence_get(fence);
+	i915_sw_fence_await(fence);
+
+	__setup_timer(&cb->timer,
+		      timer_i915_sw_fence_wake, (unsigned long)cb,
+		      TIMER_IRQSAFE);
+	if (timeout) {
+		cb->dma = fence_get(dma);
+		mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout));
+	}
+
+	ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		dma_i915_sw_fence_wake(dma, &cb->base);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+				    struct reservation_object *resv,
+				    const struct fence_ops *exclude,
+				    bool write,
+				    unsigned long timeout,
+				    gfp_t gfp)
+{
+	struct fence *excl;
+	int ret = 0, pending;
+
+	if (write) {
+		struct fence **shared;
+		unsigned int count, i;
+
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			if (shared[i]->ops == exclude)
+				continue;
+
+			pending = i915_sw_fence_await_dma_fence(fence,
+								shared[i],
+								timeout,
+								gfp);
+			if (pending < 0) {
+				ret = pending;
+				break;
+			}
+
+			ret |= pending;
+		}
+
+		for (i = 0; i < count; i++)
+			fence_put(shared[i]);
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
+	}
+
+	if (ret >= 0 && excl && excl->ops != exclude) {
+		pending = i915_sw_fence_await_dma_fence(fence,
+							excl,
+							timeout,
+							gfp);
+		if (pending < 0)
+			ret = pending;
+		else
+			ret |= pending;
+	}
+
+	fence_put(excl);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
new file mode 100644
index 000000000000..373141602ca4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -0,0 +1,65 @@
+/*
+ * i915_sw_fence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _I915_SW_FENCE_H_
+#define _I915_SW_FENCE_H_
+
+#include <linux/gfp.h>
+#include <linux/kref.h>
+#include <linux/notifier.h> /* for NOTIFY_DONE */
+#include <linux/wait.h>
+
+struct completion;
+struct fence;
+struct fence_ops;
+struct reservation_object;
+
+struct i915_sw_fence {
+	wait_queue_head_t wait;
+	unsigned long flags;
+	struct kref kref;
+	atomic_t pending;
+};
+
+#define I915_SW_FENCE_CHECKED_BIT	0 /* used internally for DAG checking */
+#define I915_SW_FENCE_PRIVATE_BIT	1 /* available for use by owner */
+#define I915_SW_FENCE_MASK		(~3)
+
+enum i915_sw_fence_notify {
+	FENCE_COMPLETE,
+	FENCE_FREE
+};
+
+typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
+				      enum i915_sw_fence_notify state);
+#define __i915_sw_fence_call __aligned(4)
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
+void i915_sw_fence_commit(struct i915_sw_fence *fence);
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *after,
+				 wait_queue_t *wq);
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma,
+				  unsigned long timeout,
+				  gfp_t gfp);
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+				    struct reservation_object *resv,
+				    const struct fence_ops *exclude,
+				    bool write,
+				    unsigned long timeout,
+				    gfp_t gfp);
+
+static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
+{
+	return atomic_read(&fence->pending) < 0;
+}
+
+#endif /* _I915_SW_FENCE_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 02/21] drm/i915: Only queue requests during execlists submission
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 03/21] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
                   ` (19 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Leave the more complicated request dequeueing to the tasklet and instead
just kick start the tasklet if we detect we are adding the first
request.

v2: Play around with list operators until we agree upon something

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 92bfe47ad33c..9bfe304c5256 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -609,35 +609,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct drm_i915_gem_request *cursor;
-	int num_elements = 0;
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
-		if (++num_elements > 2)
-			break;
-
-	if (num_elements > 2) {
-		struct drm_i915_gem_request *tail_req;
-
-		tail_req = list_last_entry(&engine->execlist_queue,
-					   struct drm_i915_gem_request,
-					   execlist_link);
-
-		if (request->ctx == tail_req->ctx) {
-			WARN(tail_req->elsp_submitted != 0,
-				"More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			i915_gem_request_put(tail_req);
-		}
-	}
-
 	i915_gem_request_get(request);
-	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	request->ctx_hw_id = request->ctx->hw_id;
-	if (num_elements == 0)
-		execlists_unqueue(engine);
+
+	if (list_empty(&engine->execlist_queue))
+		tasklet_hi_schedule(&engine->irq_tasklet);
+	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 03/21] drm/i915: Record the position of the workarounds in the tail of the request
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
  2016-09-09 11:00 ` [CI 02/21] drm/i915: Only queue requests during execlists submission Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 04/21] drm/i915: Compute the ELSP register location once Chris Wilson
                   ` (18 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Rather than blindly assuming we need to advance the tail for
resubmitting the request via the ELSP, record the position.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.h | 15 +++++++++------
 drivers/gpu/drm/i915/intel_lrc.c        |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 91014de8bfbc..2faa3bb4c39b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -88,20 +88,23 @@ struct drm_i915_gem_request {
 	 */
 	u32 previous_seqno;
 
-	/** Position in the ringbuffer of the start of the request */
+	/** Position in the ring of the start of the request */
 	u32 head;
 
 	/**
-	 * Position in the ringbuffer of the start of the postfix.
-	 * This is required to calculate the maximum available ringbuffer
-	 * space without overwriting the postfix.
+	 * Position in the ring of the start of the postfix.
+	 * This is required to calculate the maximum available ring space
+	 * without overwriting the postfix.
 	 */
 	u32 postfix;
 
-	/** Position in the ringbuffer of the end of the whole request */
+	/** Position in the ring of the end of the whole request */
 	u32 tail;
 
-	/** Preallocate space in the ringbuffer for the emitting the request */
+	/** Position in the ring of the end of any workarounds after the tail */
+	u32 wa_tail;
+
+	/** Preallocate space in the ring for the emitting the request */
 	u32 reserved_space;
 
 	/**
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9bfe304c5256..d7fa9b3a55c3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -482,8 +482,7 @@ static void execlists_unqueue(struct intel_engine_cs *engine)
 		 * resubmit the request. See gen8_emit_request() for where we
 		 * prepare the padding after the end of the request.
 		 */
-		req0->tail += 8;
-		req0->tail &= req0->ring->size - 1;
+		req0->tail = req0->wa_tail;
 	}
 
 	execlists_elsp_submit_contexts(req0, req1);
@@ -711,6 +710,7 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
+	request->wa_tail = ring->tail;
 
 	/* We keep the previous context alive until we retire the following
 	 * request. This ensures that any the context object is still pinned
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 04/21] drm/i915: Compute the ELSP register location once
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
  2016-09-09 11:00 ` [CI 02/21] drm/i915: Only queue requests during execlists submission Chris Wilson
  2016-09-09 11:00 ` [CI 03/21] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 05/21] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
                   ` (17 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Similar to the issue with reading from the context status buffer,
see commit 26720ab97fea ("drm/i915: Move CSB MMIO reads out of the
execlists lock"), we frequently write to the ELSP register (4 writes per
interrupt) and know we hold the required spinlock and forcewake throughout.
We can further reduce the cost of writing these registers beyond the
I915_WRITE_FW() by precomputing the address of the ELSP register. We also
note that the subsequent read serves no purpose here, and are happy to
see it go.

v2: Address I915_WRITE mistakes in changelog

   text    data     bss     dec     hex filename
1259784    4581     576 1264941  134d2d drivers/gpu/drm/i915/i915.ko
1259720    4581     576 1264877  134ced drivers/gpu/drm/i915/i915.ko

Saves 64 bytes of address recomputation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7fa9b3a55c3..a6b9033203e5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -331,10 +331,11 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 				 struct drm_i915_gem_request *rq1)
 {
-
 	struct intel_engine_cs *engine = rq0->engine;
 	struct drm_i915_private *dev_priv = rq0->i915;
-	uint64_t desc[2];
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
 
 	if (rq1) {
 		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
@@ -347,15 +348,12 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	rq0->elsp_submitted++;
 
 	/* You must always write both descriptors in the order below. */
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1]));
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1]));
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
 
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0]));
+	writel(upper_32_bits(desc[0]), elsp);
 	/* The context is automatically loaded after the following */
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0]));
-
-	/* ELSP is a wo register, use another nearby reg for posting */
-	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine));
+	writel(lower_32_bits(desc[0]), elsp);
 }
 
 static void
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 05/21] drm/i915: Reorder submitting the requests to ELSP
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (2 preceding siblings ...)
  2016-09-09 11:00 ` [CI 04/21] drm/i915: Compute the ELSP register location once Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 06/21] drm/i915: Simplify ELSP queue request tracking Chris Wilson
                   ` (16 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Just rearrange the code to reduce churn in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 76 ++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a6b9033203e5..7bb743f79d18 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -328,32 +328,18 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static inline void
+execlists_context_status_change(struct drm_i915_gem_request *rq,
+				unsigned long status)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
-	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
-	} else {
-		desc[1] = 0;
-	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
-
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+	/*
+	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
+	 * The compiler should eliminate this function as dead-code.
+	 */
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return;
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
 }
 
 static void
@@ -382,6 +368,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 		execlists_update_context_pdps(ppgtt, reg_state);
 }
 
+static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
+				 struct drm_i915_gem_request *rq1)
+{
+	struct intel_engine_cs *engine = rq0->engine;
+	struct drm_i915_private *dev_priv = rq0->i915;
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
+
+	if (rq1) {
+		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
+		rq1->elsp_submitted++;
+	} else {
+		desc[1] = 0;
+	}
+
+	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
+	rq0->elsp_submitted++;
+
+	/* You must always write both descriptors in the order below. */
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
+
+	writel(upper_32_bits(desc[0]), elsp);
+	/* The context is automatically loaded after the following */
+	writel(lower_32_bits(desc[0]), elsp);
+}
+
 static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 					   struct drm_i915_gem_request *rq1)
 {
@@ -402,20 +416,6 @@ static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
-static inline void execlists_context_status_change(
-		struct drm_i915_gem_request *rq,
-		unsigned long status)
-{
-	/*
-	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
-	 * The compiler should eliminate this function as dead-code.
-	 */
-	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
-		return;
-
-	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
-}
-
 static void execlists_unqueue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 06/21] drm/i915: Simplify ELSP queue request tracking
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (3 preceding siblings ...)
  2016-09-09 11:00 ` [CI 05/21] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 07/21] drm/i915: Separate out reset flags from the reset counter Chris Wilson
                   ` (15 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Emulate HW to track and manage ELSP queue. A set of SW ports are defined
and requests are assigned to these ports before submitting them to HW. This
helps in cleaning up incomplete requests during reset recovery easier
especially after engine reset by decoupling elsp queue management. This
will become more clear in the next patch.

In the engine reset case we want to resume where we left-off after skipping
the incomplete batch which requires checking the elsp queue, removing
element and fixing elsp_submitted counts in some cases. Instead of directly
manipulating the elsp queue from reset path we can examine these ports, fix
up ringbuffer pointers using the incomplete request and restart submissions
again after reset.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-3-git-send-email-arun.siluvery@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   2 +-
 drivers/gpu/drm/i915/i915_gem.c         |  13 +-
 drivers/gpu/drm/i915/i915_gem_request.c |   1 -
 drivers/gpu/drm/i915/i915_gem_request.h |  21 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 402 +++++++++++++-------------------
 drivers/gpu/drm/i915/intel_lrc.h        |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |   7 +-
 7 files changed, 184 insertions(+), 264 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 02b627e03dd8..98af57cd5096 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2051,7 +2051,7 @@ static int i915_execlists(struct seq_file *m, void *data)
 		status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
 
-		read_pointer = engine->next_context_status_buffer;
+		read_pointer = GEN8_CSB_READ_PTR(status_pointer);
 		write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
 		if (read_pointer > write_pointer)
 			write_pointer += GEN8_CSB_ENTRIES;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2401818171f5..b00fb8548d50 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2575,6 +2575,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *request;
 	struct intel_ring *ring;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
@@ -2588,10 +2591,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	 */
 
 	if (i915.enable_execlists) {
-		/* Ensure irq handler finishes or is cancelled. */
-		tasklet_kill(&engine->irq_tasklet);
-
-		intel_execlists_cancel_requests(engine);
+		spin_lock(&engine->execlist_lock);
+		INIT_LIST_HEAD(&engine->execlist_queue);
+		i915_gem_request_put(engine->execlist_port[0].request);
+		i915_gem_request_put(engine->execlist_port[1].request);
+		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+		spin_unlock(&engine->execlist_lock);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9cc08a1e43c6..ec613fd5e01c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -402,7 +402,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->previous_context = NULL;
 	req->file_priv = NULL;
 	req->batch = NULL;
-	req->elsp_submitted = 0;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 2faa3bb4c39b..a231bd318ef0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -137,27 +137,8 @@ struct drm_i915_gem_request {
 	/** file_priv list entry for this request */
 	struct list_head client_list;
 
-	/**
-	 * The ELSP only accepts two elements at a time, so we queue
-	 * context/tail pairs on a given queue (ring->execlist_queue) until the
-	 * hardware is available. The queue serves a double purpose: we also use
-	 * it to keep track of the up to 2 contexts currently in the hardware
-	 * (usually one in execution and the other queued up by the GPU): We
-	 * only remove elements from the head of the queue when the hardware
-	 * informs us that an element has been completed.
-	 *
-	 * All accesses to the queue are mediated by a spinlock
-	 * (ring->execlist_lock).
-	 */
-
-	/** Execlist link in the submission queue.*/
+	/** Link in the execlist submission queue, guarded by execlist_lock. */
 	struct list_head execlist_link;
-
-	/** Execlists no. of times this request has been sent to the ELSP */
-	int elsp_submitted;
-
-	/** Execlists context hardware id. */
-	unsigned int ctx_hw_id;
 };
 
 extern const struct fence_ops i915_fence_ops;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7bb743f79d18..a33687d294b5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -156,6 +156,11 @@
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
 
+#define GEN8_CTX_STATUS_COMPLETED_MASK \
+	 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
+	  GEN8_CTX_STATUS_PREEMPTED | \
+	  GEN8_CTX_STATUS_ELEMENT_SWITCH)
+
 #define CTX_LRI_HEADER_0		0x01
 #define CTX_CONTEXT_CONTROL		0x02
 #define CTX_RING_HEAD			0x04
@@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
-	if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
-		engine->idle_lite_restore_wa = ~0;
-
-	engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
-					IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
-					(engine->id == VCS || engine->id == VCS2);
+	engine->disable_lite_restore_wa =
+		(IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
+		 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+		(engine->id == VCS || engine->id == VCS2);
 
 	engine->ctx_desc_template = GEN8_CTX_VALID;
 	if (IS_GEN8(dev_priv))
@@ -351,11 +354,11 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 }
 
-static void execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 {
-	struct intel_engine_cs *engine = rq->engine;
+	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
+	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
 
@@ -366,26 +369,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 	 */
 	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
 		execlists_update_context_pdps(ppgtt, reg_state);
+
+	return ce->lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
 		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	u64 desc[2];
 
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
+	if (!port[0].count)
+		execlists_context_status_change(port[0].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+	desc[0] = execlists_update_context(port[0].request);
+	engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */
+
+	if (port[1].request) {
+		GEM_BUG_ON(port[1].count);
+		execlists_context_status_change(port[1].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+		desc[1] = execlists_update_context(port[1].request);
+		port[1].count = 1;
 	} else {
 		desc[1] = 0;
 	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
+	GEM_BUG_ON(desc[0] == desc[1]);
 
 	/* You must always write both descriptors in the order below. */
 	writel(upper_32_bits(desc[1]), elsp);
@@ -396,141 +407,125 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	writel(lower_32_bits(desc[0]), elsp);
 }
 
-static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
-					   struct drm_i915_gem_request *rq1)
+static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
 {
-	struct drm_i915_private *dev_priv = rq0->i915;
-	unsigned int fw_domains = rq0->engine->fw_domains;
-
-	execlists_update_context(rq0);
-
-	if (rq1)
-		execlists_update_context(rq1);
+	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+		ctx->execlists_force_single_submission);
+}
 
-	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
+static bool can_merge_ctx(const struct i915_gem_context *prev,
+			  const struct i915_gem_context *next)
+{
+	if (prev != next)
+		return false;
 
-	execlists_elsp_write(rq0, rq1);
+	if (ctx_single_port_submission(prev))
+		return false;
 
-	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
-	spin_unlock_irq(&dev_priv->uncore.lock);
+	return true;
 }
 
-static void execlists_unqueue(struct intel_engine_cs *engine)
+static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-	struct drm_i915_gem_request *cursor, *tmp;
+	struct drm_i915_gem_request *cursor, *last;
+	struct execlist_port *port = engine->execlist_port;
+	bool submit = false;
+
+	last = port->request;
+	if (last)
+		/* WaIdleLiteRestore:bdw,skl
+		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
+		 * as we resubmit the request. See gen8_emit_request()
+		 * for where we prepare the padding after the end of the
+		 * request.
+		 */
+		last->tail = last->wa_tail;
 
-	assert_spin_locked(&engine->execlist_lock);
+	GEM_BUG_ON(port[1].request);
 
-	/*
-	 * If irqs are not active generate a warning as batches that finish
-	 * without the irqs may get lost and a GPU Hang may occur.
+	/* Hardware submission is through 2 ports. Conceptually each port
+	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
+	 * static for a context, and unique to each, so we only execute
+	 * requests belonging to a single context from each ring. RING_HEAD
+	 * is maintained by the CS in the context image, it marks the place
+	 * where it got up to last time, and through RING_TAIL we tell the CS
+	 * where we want to execute up to this time.
+	 *
+	 * In this list the requests are in order of execution. Consecutive
+	 * requests from the same context are adjacent in the ringbuffer. We
+	 * can combine these requests into a single RING_TAIL update:
+	 *
+	 *              RING_HEAD...req1...req2
+	 *                                    ^- RING_TAIL
+	 * since to execute req2 the CS must first execute req1.
+	 *
+	 * Our goal then is to point each port to the end of a consecutive
+	 * sequence of requests as being the most optimal (fewest wake ups
+	 * and context switches) submission.
 	 */
-	WARN_ON(!intel_irqs_enabled(engine->i915));
-
-	/* Try to read in pairs */
-	list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
-				 execlist_link) {
-		if (!req0) {
-			req0 = cursor;
-		} else if (req0->ctx == cursor->ctx) {
-			/* Same ctx: ignore first request, as second request
-			 * will update tail past first request's workload */
-			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			i915_gem_request_put(req0);
-			req0 = cursor;
-		} else {
-			if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
-				/*
-				 * req0 (after merged) ctx requires single
-				 * submission, stop picking
-				 */
-				if (req0->ctx->execlists_force_single_submission)
-					break;
-				/*
-				 * req0 ctx doesn't require single submission,
-				 * but next req ctx requires, stop picking
-				 */
-				if (cursor->ctx->execlists_force_single_submission)
-					break;
-			}
-			req1 = cursor;
-			WARN_ON(req1->elsp_submitted);
-			break;
-		}
-	}
 
-	if (unlikely(!req0))
-		return;
-
-	execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req1)
-		execlists_context_status_change(req1,
-						INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
-		/*
-		 * WaIdleLiteRestore: make sure we never cause a lite restore
-		 * with HEAD==TAIL.
+	spin_lock(&engine->execlist_lock);
+	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+		/* Can we combine this request with the current port? It has to
+		 * be the same context/ringbuffer and not have any exceptions
+		 * (e.g. GVT saying never to combine contexts).
 		 *
-		 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
-		 * resubmit the request. See gen8_emit_request() for where we
-		 * prepare the padding after the end of the request.
+		 * If we can combine the requests, we can execute both by
+		 * updating the RING_TAIL to point to the end of the second
+		 * request, and so we never need to tell the hardware about
+		 * the first.
 		 */
-		req0->tail = req0->wa_tail;
+		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
+			/* If we are on the second port and cannot combine
+			 * this request with the last, then we are done.
+			 */
+			if (port != engine->execlist_port)
+				break;
+
+			/* If GVT overrides us we only ever submit port[0],
+			 * leaving port[1] empty. Note that we also have
+			 * to be careful that we don't queue the same
+			 * context (even though a different request) to
+			 * the second port.
+			 */
+			if (ctx_single_port_submission(cursor->ctx))
+				break;
+
+			GEM_BUG_ON(last->ctx == cursor->ctx);
+
+			i915_gem_request_assign(&port->request, last);
+			port++;
+		}
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		/* Decouple all the requests submitted from the queue */
+		engine->execlist_queue.next = &cursor->execlist_link;
+		cursor->execlist_link.prev = &engine->execlist_queue;
+
+		i915_gem_request_assign(&port->request, last);
 	}
+	spin_unlock(&engine->execlist_lock);
 
-	execlists_elsp_submit_contexts(req0, req1);
+	if (submit)
+		execlists_submit_ports(engine);
 }
 
-static unsigned int
-execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
+static bool execlists_elsp_idle(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *head_req;
-
-	assert_spin_locked(&engine->execlist_lock);
-
-	head_req = list_first_entry_or_null(&engine->execlist_queue,
-					    struct drm_i915_gem_request,
-					    execlist_link);
-
-	if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id)))
-               return 0;
-
-	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
-
-	if (--head_req->elsp_submitted > 0)
-		return 0;
-
-	execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
-
-	list_del(&head_req->execlist_link);
-	i915_gem_request_put(head_req);
-
-	return 1;
+	return !engine->execlist_port[0].request;
 }
 
-static u32
-get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
-		   u32 *context_id)
+static bool execlists_elsp_ready(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status;
-
-	read_pointer %= GEN8_CSB_ENTRIES;
-
-	status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer));
-
-	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
-		return 0;
+	int port;
 
-	*context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine,
-							      read_pointer));
+	port = 1; /* wait for a free slot */
+	if (engine->disable_lite_restore_wa || engine->preempt_wa)
+		port = 0; /* wait for GPU to be idle before continuing */
 
-	return status;
+	return !engine->execlist_port[port].request;
 }
 
 /*
@@ -540,67 +535,56 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
 static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
 	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status_pointer;
-	unsigned int read_pointer, write_pointer;
-	u32 csb[GEN8_CSB_ENTRIES][2];
-	unsigned int csb_read = 0, i;
-	unsigned int submit_contexts = 0;
 
 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
-	status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
-
-	read_pointer = engine->next_context_status_buffer;
-	write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
-	if (read_pointer > write_pointer)
-		write_pointer += GEN8_CSB_ENTRIES;
-
-	while (read_pointer < write_pointer) {
-		if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES))
-			break;
-		csb[csb_read][0] = get_context_status(engine, ++read_pointer,
-						      &csb[csb_read][1]);
-		csb_read++;
-	}
-
-	engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
-
-	/* Update the read pointer to the old write pointer. Manual ringbuffer
-	 * management ftw </sarcasm> */
-	I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine),
-		      _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-				    engine->next_context_status_buffer << 8));
-
-	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
-
-	spin_lock(&engine->execlist_lock);
+	if (!execlists_elsp_idle(engine)) {
+		u32 __iomem *csb_mmio =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+		u32 __iomem *buf =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+		unsigned int csb, head, tail;
+
+		csb = readl(csb_mmio);
+		head = GEN8_CSB_READ_PTR(csb);
+		tail = GEN8_CSB_WRITE_PTR(csb);
+		if (tail < head)
+			tail += GEN8_CSB_ENTRIES;
+		while (head < tail) {
+			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
+			unsigned int status = readl(buf + 2 * idx);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
+
+			GEM_BUG_ON(port[0].count == 0);
+			if (--port[0].count == 0) {
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+				execlists_context_status_change(port[0].request,
+								INTEL_CONTEXT_SCHEDULE_OUT);
+
+				i915_gem_request_put(port[0].request);
+				port[0] = port[1];
+				memset(&port[1], 0, sizeof(port[1]));
+
+				engine->preempt_wa = false;
+			}
 
-	for (i = 0; i < csb_read; i++) {
-		if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) {
-			if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(engine, csb[i][1]))
-					WARN(1, "Lite Restored request removed from queue\n");
-			} else
-				WARN(1, "Preemption without Lite Restore\n");
+			GEM_BUG_ON(port[0].count == 0 &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
-		if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE |
-		    GEN8_CTX_STATUS_ELEMENT_SWITCH))
-			submit_contexts +=
-				execlists_check_remove_request(engine, csb[i][1]);
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+				     GEN8_CSB_WRITE_PTR(csb) << 8),
+		       csb_mmio);
 	}
 
-	if (submit_contexts) {
-		if (!engine->disable_lite_restore_wa ||
-		    (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
-			execlists_unqueue(engine);
-	}
+	if (execlists_elsp_ready(engine))
+		execlists_dequeue(engine);
 
-	spin_unlock(&engine->execlist_lock);
-
-	if (unlikely(submit_contexts > 2))
-		DRM_ERROR("More than two context complete events?\n");
+	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
 static void execlists_submit_request(struct drm_i915_gem_request *request)
@@ -609,12 +593,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	i915_gem_request_get(request);
-	request->ctx_hw_id = request->ctx->hw_id;
-
-	if (list_empty(&engine->execlist_queue))
-		tasklet_hi_schedule(&engine->irq_tasklet);
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
+	if (execlists_elsp_idle(engine))
+		tasklet_hi_schedule(&engine->irq_tasklet);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
@@ -721,23 +702,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	return 0;
 }
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
-{
-	struct drm_i915_gem_request *req, *tmp;
-	LIST_HEAD(cancel_list);
-
-	WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex));
-
-	spin_lock_bh(&engine->execlist_lock);
-	list_replace_init(&engine->execlist_queue, &cancel_list);
-	spin_unlock_bh(&engine->execlist_lock);
-
-	list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
-		list_del(&req->execlist_link);
-		i915_gem_request_put(req);
-	}
-}
-
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine)
 {
@@ -1258,7 +1222,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int next_context_status_buffer_hw;
 
 	lrc_init_hws(engine);
 
@@ -1269,32 +1232,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	I915_WRITE(RING_MODE_GEN7(engine),
 		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
 		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
-	POSTING_READ(RING_MODE_GEN7(engine));
-
-	/*
-	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
-	 * zero, we need to read the write pointer from hardware and use its
-	 * value because "this register is power context save restored".
-	 * Effectively, these states have been observed:
-	 *
-	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
-	 * BDW  | CSB regs not reset       | CSB regs reset       |
-	 * CHT  | CSB regs not reset       | CSB regs not reset   |
-	 * SKL  |         ?                |         ?            |
-	 * BXT  |         ?                |         ?            |
-	 */
-	next_context_status_buffer_hw =
-		GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine)));
 
-	/*
-	 * When the CSB registers are reset (also after power-up / gpu reset),
-	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
-	 * this special case, so the first element read is CSB[0].
-	 */
-	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
-		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
-
-	engine->next_context_status_buffer = next_context_status_buffer_hw;
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
 	intel_engine_init_hangcheck(engine);
@@ -1680,10 +1618,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	}
 	intel_lr_context_unpin(dev_priv->kernel_context, engine);
 
-	engine->idle_lite_restore_wa = 0;
-	engine->disable_lite_restore_wa = false;
-	engine->ctx_desc_template = 0;
-
 	lrc_destroy_wa_ctx_obj(engine);
 	engine->i915 = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a52cf57dbd40..4d70346500c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
 				    int enable_execlists);
 void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 84aea549de5d..2181d0a41a96 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -298,11 +298,14 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
 	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
+	struct execlist_port {
+		struct drm_i915_gem_request *request;
+		unsigned int count;
+	} execlist_port[2];
 	struct list_head execlist_queue;
 	unsigned int fw_domains;
-	unsigned int next_context_status_buffer;
-	unsigned int idle_lite_restore_wa;
 	bool disable_lite_restore_wa;
+	bool preempt_wa;
 	u32 ctx_desc_template;
 
 	/**
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 07/21] drm/i915: Separate out reset flags from the reset counter
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (4 preceding siblings ...)
  2016-09-09 11:00 ` [CI 06/21] drm/i915: Simplify ELSP queue request tracking Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
                   ` (14 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

In preparation for introducing a per-engine reset, we can first separate
the mixing of the reset state from the global reset counter.

The loss of atomicity in updating the reset state poses a small problem
for handling the waiters. For requests, this is solved by advancing the
seqno so that a waiter waking up after the reset knows the request is
complete. For pending flips, we still rely on the increment of the
global reset epoch (as well as the reset-in-progress flag) to signify
when the hardware was reset.

The advantage, now that we do not inspect the reset state during reset
itself i.e. we no longer emit requests during reset, is that we can use
the atomic updates of the state flags to ensure that only one reset
worker is active.

v2: Mika spotted that I transformed the i915_gem_wait_for_error() wakeup
into a waiter wakeup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   9 +++
 drivers/gpu/drm/i915/i915_drv.c         |  16 ++---
 drivers/gpu/drm/i915/i915_drv.h         |  46 +++++---------
 drivers/gpu/drm/i915/i915_gem.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  13 ++--
 drivers/gpu/drm/i915/i915_irq.c         | 103 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_display.c    |  25 +++++---
 drivers/gpu/drm/i915/intel_drv.h        |   4 +-
 8 files changed, 101 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 98af57cd5096..2a80cd1c9351 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1287,6 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	enum intel_engine_id id;
 	int j;
 
+	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+		seq_printf(m, "Wedged\n");
+	if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags))
+		seq_printf(m, "Reset in progress\n");
+	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
+		seq_printf(m, "Waiter holding struct mutex\n");
+	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
+		seq_printf(m, "struct_mutex blocked for reset\n");
+
 	if (!i915.enable_hangcheck) {
 		seq_printf(m, "Hangcheck disabled\n");
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 02c34d6996ea..47a676d859db 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1579,7 +1579,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1741,20 +1741,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	unsigned reset_counter;
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	atomic_andnot(I915_WEDGED, &error->reset_counter);
-
-	/* Clear the reset-in-progress flag and increment the reset epoch. */
-	reset_counter = atomic_inc_return(&error->reset_counter);
-	if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
-		ret = -EIO;
-		goto error;
-	}
+	__clear_bit(I915_WEDGED, &error->flags);
+	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
@@ -1791,6 +1784,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 
 	/*
@@ -1805,7 +1799,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	return 0;
 
 error:
-	atomic_or(I915_WEDGED, &error->reset_counter);
+	set_bit(I915_WEDGED, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f39bede7664c..dced7e72b625 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1405,9 +1405,10 @@ struct i915_gpu_error {
 	 * State variable controlling the reset flow and count
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
-	 * and again when reset has been handled. So odd values (lowest bit set)
-	 * means that reset is in progress and even values that
-	 * (reset_counter >> 1):th reset was successfully completed.
+	 *
+	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * meaning that any waiters holding onto the struct_mutex should
+	 * relinquish the lock immediately in order for the reset to start.
 	 *
 	 * If reset is not completed succesfully, the I915_WEDGE bit is
 	 * set meaning that hardware is terminally sour and there is no
@@ -1422,10 +1423,11 @@ struct i915_gpu_error {
 	 * naturally enforces the correct ordering between the bail-out of the
 	 * waiter and the gpu reset work code.
 	 */
-	atomic_t reset_counter;
+	unsigned long reset_count;
 
-#define I915_RESET_IN_PROGRESS_FLAG	1
-#define I915_WEDGED			(1 << 31)
+	unsigned long flags;
+#define I915_RESET_IN_PROGRESS	0
+#define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3241,44 +3243,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
 
-static inline u32 i915_reset_counter(struct i915_gpu_error *error)
-{
-	return atomic_read(&error->reset_counter);
-}
-
-static inline bool __i915_reset_in_progress(u32 reset)
-{
-	return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
-}
-
-static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
-{
-	return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
-}
-
-static inline bool __i915_terminally_wedged(u32 reset)
-{
-	return unlikely(reset & I915_WEDGED);
-}
-
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress(i915_reset_counter(error));
+	return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
 }
 
-static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
+	return unlikely(test_bit(I915_WEDGED, &error->flags));
 }
 
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
 {
-	return __i915_terminally_wedged(i915_reset_counter(error));
+	return i915_reset_in_progress(error) | i915_terminally_wedged(error);
 }
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
-	return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
+	return READ_ONCE(error->reset_count);
 }
 
 void i915_gem_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b00fb8548d50..87a4f3543f0b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4525,7 +4525,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index ec613fd5e01c..24eb4b1b7540 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -233,16 +233,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
 {
-	if (__i915_terminally_wedged(reset_counter))
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+	if (i915_terminally_wedged(error))
 		return -EIO;
 
-	if (__i915_reset_in_progress(reset_counter)) {
+	if (i915_reset_in_progress(error)) {
 		/* Non-interruptible callers can't handle -EAGAIN, hence return
 		 * -EIO unconditionally for these.
 		 */
-		if (!interruptible)
+		if (!dev_priv->mm.interruptible)
 			return -EIO;
 
 		return -EAGAIN;
@@ -331,7 +333,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
 	u32 seqno;
 	int ret;
@@ -340,7 +341,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 	 * and restart.
 	 */
-	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+	ret = i915_gem_check_wedge(dev_priv);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 82358d4e0cc2..ed172d7beecb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2501,53 +2501,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
 	/*
-	 * Note that there's only one work item which does gpu resets, so we
-	 * need not worry about concurrent gpu resets potentially incrementing
-	 * error->reset_counter twice. We only need to take care of another
-	 * racing irq/hangcheck declaring the gpu dead for a second time. A
-	 * quick check for that is good enough: schedule_work ensures the
-	 * correct ordering between hang detection and this work item, and since
-	 * the reset in-progress bit is only ever set by code outside of this
-	 * work we don't need to worry about any other races.
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugs, so get an RPM reference.
 	 */
-	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
-		DRM_DEBUG_DRIVER("resetting chip\n");
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-		/*
-		 * In most cases it's guaranteed that we get here with an RPM
-		 * reference held, for example because there is a pending GPU
-		 * request that won't finish until the reset is done. This
-		 * isn't the case at least when we get here by doing a
-		 * simulated reset via debugs, so get an RPM reference.
-		 */
-		intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(dev_priv);
 
-		intel_prepare_reset(dev_priv);
+	intel_prepare_reset(dev_priv);
 
-		/*
-		 * All state reset _must_ be completed before we update the
-		 * reset counter, for otherwise waiters might miss the reset
-		 * pending state and not properly drop locks, resulting in
-		 * deadlocks with the reset work.
-		 */
-		ret = i915_reset(dev_priv);
+	/*
+	 * All state reset _must_ be completed before we update the
+	 * reset counter, for otherwise waiters might miss the reset
+	 * pending state and not properly drop locks, resulting in
+	 * deadlocks with the reset work.
+	 */
+	ret = i915_reset(dev_priv);
 
-		intel_finish_reset(dev_priv);
+	intel_finish_reset(dev_priv);
 
-		intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv);
 
-		if (ret == 0)
-			kobject_uevent_env(kobj,
-					   KOBJ_CHANGE, reset_done_event);
+	if (ret == 0)
+		kobject_uevent_env(kobj,
+				   KOBJ_CHANGE, reset_done_event);
 
-		/*
-		 * Note: The wake_up also serves as a memory barrier so that
-		 * waiters see the update value of the reset counter atomic_t.
-		 */
-		wake_up_all(&dev_priv->gpu_error.reset_queue);
-	}
+	/*
+	 * Note: The wake_up also serves as a memory barrier so that
+	 * waiters see the updated value of the dev_priv->gpu_error.
+	 */
+	wake_up_all(&dev_priv->gpu_error.reset_queue);
 }
 
 static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
@@ -2666,25 +2654,26 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
 	i915_report_and_clear_eir(dev_priv);
 
-	if (engine_mask) {
-		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
-				&dev_priv->gpu_error.reset_counter);
+	if (!engine_mask)
+		return;
 
-		/*
-		 * Wakeup waiting processes so that the reset function
-		 * i915_reset_and_wakeup doesn't deadlock trying to grab
-		 * various locks. By bumping the reset counter first, the woken
-		 * processes will see a reset in progress and back off,
-		 * releasing their locks and then wait for the reset completion.
-		 * We must do this for _all_ gpu waiters that might hold locks
-		 * that the reset work needs to acquire.
-		 *
-		 * Note: The wake_up serves as the required memory barrier to
-		 * ensure that the waiters see the updated value of the reset
-		 * counter atomic_t.
-		 */
-		i915_error_wake_up(dev_priv);
-	}
+	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+			     &dev_priv->gpu_error.flags))
+		return;
+
+	/*
+	 * Wakeup waiting processes so that the reset function
+	 * i915_reset_and_wakeup doesn't deadlock trying to grab
+	 * various locks. By bumping the reset counter first, the woken
+	 * processes will see a reset in progress and back off,
+	 * releasing their locks and then wait for the reset completion.
+	 * We must do this for _all_ gpu waiters that might hold locks
+	 * that the reset work needs to acquire.
+	 *
+	 * Note: The wake_up also provides a memory barrier to ensure that the
+	 * waiters see the updated value of the reset flags.
+	 */
+	i915_error_wake_up(dev_priv);
 
 	i915_reset_and_wakeup(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0c65212781e4..cb88e29d828d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3648,15 +3648,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+	struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+	if (i915_reset_in_progress(error))
+		return true;
+
+	if (crtc->reset_count != i915_reset_count(error))
+		return true;
+
+	return false;
+}
+
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned reset_counter;
 	bool pending;
 
-	reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
-	if (intel_crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(intel_crtc))
 		return false;
 
 	spin_lock_irq(&dev->event_lock);
@@ -11535,10 +11546,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned reset_counter;
 
-	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(crtc))
 		return true;
 
 	/*
@@ -12204,8 +12213,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (ret)
 		goto cleanup;
 
-	intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+	intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+	if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
 		ret = -EIO;
 		goto cleanup;
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 7868d5c7f347..a3b3d37f9221 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -706,8 +706,8 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
-	/* reset counter value when the last flip was submitted */
-	unsigned int reset_counter;
+	/* global reset count when the last flip was submitted */
+	unsigned int reset_count;
 
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (5 preceding siblings ...)
  2016-09-09 11:00 ` [CI 07/21] drm/i915: Separate out reset flags from the reset counter Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
                   ` (13 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Access to intel_init_emon() is strictly ordered by gt_powersave, using
struct_mutex around it is overkill (and will conflict with the caller
holding struct_mutex themselves).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4f833a077089..6af438ffef9a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6773,9 +6773,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
 
 	if (IS_IRONLAKE_M(dev_priv)) {
 		ironlake_enable_drps(dev_priv);
-		mutex_lock(&dev_priv->drm.struct_mutex);
 		intel_init_emon(dev_priv);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	} else if (INTEL_INFO(dev_priv)->gen >= 6) {
 		/*
 		 * PCU communication is slow and this doesn't need to be
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (6 preceding siblings ...)
  2016-09-09 11:00 ` [CI 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 10/21] drm/i915: Mark up all locked waiters Chris Wilson
                   ` (12 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

We need finer control over wakeup behaviour during i915_wait_request(),
so expand the current bool interruptible to a bitmask.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  2 +-
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/i915_gem.c          | 16 +++++++++-------
 drivers/gpu/drm/i915/i915_gem_evict.c    |  4 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c  |  9 +++++----
 drivers/gpu/drm/i915/i915_gem_request.h  | 13 +++++++------
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/intel_display.c     |  8 ++++----
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 ++--
 12 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2a80cd1c9351..620e7daa133b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4803,7 +4803,7 @@ i915_drop_caches_set(void *data, u64 val)
 		return ret;
 
 	if (val & DROP_ACTIVE) {
-		ret = i915_gem_wait_for_idle(dev_priv, true);
+		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dced7e72b625..20b7743f8ec5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3270,7 +3270,7 @@ int __must_check i915_gem_init_hw(struct drm_device *dev);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-					bool interruptible);
+					unsigned int flags);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void i915_gem_resume(struct drm_device *dev);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 87a4f3543f0b..4617250c3000 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -386,7 +386,8 @@ __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
 		int ret;
 
 		ret = i915_gem_active_wait_unlocked(&active[idx],
-						    true, NULL, rps);
+						    I915_WAIT_INTERRUPTIBLE,
+						    NULL, rps);
 		if (ret)
 			return ret;
 	}
@@ -2026,7 +2027,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	 * to claim that space for ourselves, we need to take the big
 	 * struct_mutex to free the requests+objects and allocate our slot.
 	 */
-	err = i915_gem_wait_for_idle(dev_priv, true);
+	err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (err)
 		return err;
 
@@ -2779,7 +2780,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	active = __I915_BO_ACTIVE(obj);
 	for_each_active(active, idx) {
 		s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
-		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true,
+		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
+						    I915_WAIT_INTERRUPTIBLE,
 						    timeout, rps);
 		if (ret)
 			break;
@@ -2982,7 +2984,7 @@ destroy:
 }
 
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   bool interruptible)
+			   unsigned int flags)
 {
 	struct intel_engine_cs *engine;
 	int ret;
@@ -2991,7 +2993,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 		if (engine->last_context == NULL)
 			continue;
 
-		ret = intel_engine_idle(engine, interruptible);
+		ret = intel_engine_idle(engine, flags);
 		if (ret)
 			return ret;
 	}
@@ -3746,7 +3748,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	if (target == NULL)
 		return 0;
 
-	ret = i915_wait_request(target, true, NULL, NULL);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	i915_gem_request_put(target);
 
 	return ret;
@@ -4302,7 +4304,7 @@ int i915_gem_suspend(struct drm_device *dev)
 	if (ret)
 		goto err;
 
-	ret = i915_gem_wait_for_idle(dev_priv, true);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 815d5fbe07ac..103085246975 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -170,7 +170,7 @@ search_again:
 	if (ret)
 		return ret;
 
-	ret = i915_gem_wait_for_idle(dev_priv, true);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (ret)
 		return ret;
 
@@ -275,7 +275,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 				return ret;
 		}
 
-		ret = i915_gem_wait_for_idle(dev_priv, true);
+		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e16c38086abe..9bcac52b8268 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2683,7 +2683,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, false)) {
+		if (i915_gem_wait_for_idle(dev_priv, 0)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 24eb4b1b7540..f4c15f319d08 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -260,7 +260,7 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 
 	/* Carefully retire all requests without writing to the rings */
 	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine, true);
+		ret = intel_engine_idle(engine, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 	}
@@ -598,7 +598,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: duh!
- * @interruptible: do an interruptible wait (normally yes)
+ * @flags: how to wait
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
  * @rps: client to charge for RPS boosting
  *
@@ -613,11 +613,12 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
  * errno with remaining time filled in timeout argument.
  */
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 {
-	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
+		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
 	unsigned long timeout_remain;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a231bd318ef0..479896ef791e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -218,10 +218,11 @@ struct intel_rps_client;
 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
 
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
+#define I915_WAIT_INTERRUPTIBLE BIT(0)
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -575,13 +576,13 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, true, NULL, NULL);
+	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 }
 
 /**
  * i915_gem_active_wait_unlocked - waits until the request is completed
  * @active - the active request on which to wait
- * @interruptible - whether the wait can be woken by a userspace signal
+ * @flags - how to wait
  * @timeout - how long to wait at most
  * @rps - userspace client to charge for a waitboost
  *
@@ -602,7 +603,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
  */
 static inline int
 i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
-			      bool interruptible,
+			      unsigned int flags,
 			      s64 *timeout,
 			      struct intel_rps_client *rps)
 {
@@ -611,7 +612,7 @@ i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
 
 	request = i915_gem_active_get_unlocked(active);
 	if (request) {
-		ret = i915_wait_request(request, interruptible, timeout, rps);
+		ret = i915_wait_request(request, flags, timeout, rps);
 		i915_gem_request_put(request);
 	}
 
@@ -638,7 +639,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, true, NULL, NULL);
+	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index b80802b35353..35a05f4c51c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -323,7 +323,7 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
 
 	do {
-		if (i915_gem_wait_for_idle(dev_priv, false) == 0 &&
+		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
 		    i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
 			break;
 
@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
-	ret = i915_gem_wait_for_idle(dev_priv, false);
+	ret = i915_gem_wait_for_idle(dev_priv, 0);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index be54825ef3e8..e537930c64b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -68,7 +68,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
 
 	for_each_active(active, idx)
 		i915_gem_active_wait_unlocked(&obj->last_read[idx],
-					      false, NULL, NULL);
+					      0, NULL, NULL);
 }
 
 static void cancel_userptr(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index cb88e29d828d..5771ad802498 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12024,8 +12024,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 
 	if (work->flip_queued_req)
 		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  false, NULL,
-					  NO_WAITBOOST));
+					  0, NULL, NO_WAITBOOST));
 
 	/* For framebuffer backed by dmabuf, wait for fence */
 	resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -14077,7 +14076,8 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 				continue;
 
 			ret = i915_wait_request(intel_plane_state->wait_req,
-						true, NULL, NULL);
+						I915_WAIT_INTERRUPTIBLE,
+						NULL, NULL);
 			if (ret) {
 				/* Any hang should be swallowed by the wait */
 				WARN_ON(ret == -EIO);
@@ -14295,7 +14295,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 			continue;
 
 		ret = i915_wait_request(intel_plane_state->wait_req,
-					true, NULL, NULL);
+					0, NULL, NULL);
 		/* EIO should be eaten, and we can't get interrupted in the
 		 * worker, and blocking commits have waited already. */
 		WARN_ON(ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fd8fcc6ec970..a5bf18877068 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, true, NULL, NO_WAITBOOST);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2181d0a41a96..18848acf5e74 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -489,11 +489,11 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
 static inline int intel_engine_idle(struct intel_engine_cs *engine,
-				    bool interruptible)
+				    unsigned int flags)
 {
 	/* Wait upon the last request to be completed */
 	return i915_gem_active_wait_unlocked(&engine->last_request,
-					     interruptible, NULL, NULL);
+					     flags, NULL, NULL);
 }
 
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 10/21] drm/i915: Mark up all locked waiters
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (7 preceding siblings ...)
  2016-09-09 11:00 ` [CI 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 11/21] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
                   ` (11 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

In the next patch we want to handle reset directly by a locked waiter in
order to avoid issues with returning before the reset is handled. To
handle the reset, we must first know whether we hold the struct_mutex.
If we do not hold the struct_mtuex we can not perform the reset, but we do
not block the reset worker either (and so we can just continue to wait for
request completion) - otherwise we must relinquish the mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c          |  7 +++++--
 drivers/gpu/drm/i915/i915_gem_evict.c    |  8 ++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c  | 15 ++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.h  | 11 ++++++++---
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++-
 8 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 620e7daa133b..64702cc68e3a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4803,7 +4803,9 @@ i915_drop_caches_set(void *data, u64 val)
 		return ret;
 
 	if (val & DROP_ACTIVE) {
-		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
 		if (ret)
 			goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4617250c3000..23069a2d2850 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2802,7 +2802,8 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to,
 
 	if (!i915.semaphores) {
 		ret = i915_wait_request(from,
-					from->i915->mm.interruptible,
+					from->i915->mm.interruptible |
+					I915_WAIT_LOCKED,
 					NULL,
 					NO_WAITBOOST);
 		if (ret)
@@ -4304,7 +4305,9 @@ int i915_gem_suspend(struct drm_device *dev)
 	if (ret)
 		goto err;
 
-	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+	ret = i915_gem_wait_for_idle(dev_priv,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 103085246975..5b6f81c1dbca 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -170,7 +170,9 @@ search_again:
 	if (ret)
 		return ret;
 
-	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+	ret = i915_gem_wait_for_idle(dev_priv,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
 	if (ret)
 		return ret;
 
@@ -275,7 +277,9 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 				return ret;
 		}
 
-		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9bcac52b8268..f3c6876da521 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2683,7 +2683,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, 0)) {
+		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f4c15f319d08..5f89801e6a16 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -260,7 +260,9 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 
 	/* Carefully retire all requests without writing to the rings */
 	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine, I915_WAIT_INTERRUPTIBLE);
+		ret = intel_engine_idle(engine,
+					I915_WAIT_INTERRUPTIBLE |
+					I915_WAIT_LOCKED);
 		if (ret)
 			return ret;
 	}
@@ -625,6 +627,10 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 	int ret = 0;
 
 	might_sleep();
+#if IS_ENABLED(CONFIG_LOCKDEP)
+	GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
+		   !!(flags & I915_WAIT_LOCKED));
+#endif
 
 	if (i915_gem_request_completed(req))
 		return 0;
@@ -667,7 +673,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_wait_init(&wait, req->fence.seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
@@ -707,10 +714,12 @@ wakeup:
 		if (i915_spin_request(req, state, 2))
 			break;
 	}
-	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_engine_remove_wait(req->engine, &wait);
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 	__set_current_state(TASK_RUNNING);
+
 complete:
 	trace_i915_gem_request_wait_end(req);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 479896ef791e..def35721e9ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -222,7 +222,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
-#define I915_WAIT_INTERRUPTIBLE BIT(0)
+#define I915_WAIT_INTERRUPTIBLE	BIT(0)
+#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -576,7 +577,9 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	return i915_wait_request(request,
+				 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				 NULL, NULL);
 }
 
 /**
@@ -639,7 +642,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	ret = i915_wait_request(request,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 35a05f4c51c1..1c237d02f30b 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
-	ret = i915_gem_wait_for_idle(dev_priv, 0);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a5bf18877068..8687a84a7ff3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+	ret = i915_wait_request(target,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
 				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 11/21] drm/i915: Perform a direct reset of the GPU from the waiter
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (8 preceding siblings ...)
  2016-09-09 11:00 ` [CI 10/21] drm/i915: Mark up all locked waiters Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
                   ` (10 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

If a waiter is holding the struct_mutex, then the reset worker cannot
reset the GPU until the waiter returns. We do not want to return -EAGAIN
form i915_wait_request as that breaks delicate operations like
i915_vma_unbind() which often cannot be restarted easily, and returning
-EIO is just as useless (and has in the past proven dangerous). The
remaining WARN_ON(i915_wait_request) serve as a valuable reminder that
handling errors from an indefinite wait are tricky.

We can keep the current semantic that knowing after a reset is complete,
so is the request, by performing the reset ourselves if we hold the
mutex.

uevent emission is still handled by the reset worker, so it may appear
slightly out of order with respect to the actual reset (and concurrent
use of the device).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         | 11 ++++++-----
 drivers/gpu/drm/i915/i915_drv.h         | 15 +++------------
 drivers/gpu/drm/i915/i915_gem_request.c | 29 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_irq.c         |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  3 ---
 5 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 47a676d859db..ff4173e6e298 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1729,6 +1729,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
  * Reset the chip.  Useful if a hang is detected. Returns zero on successful
  * reset or otherwise an error code.
  *
+ * Caller must hold the struct_mutex.
+ *
  * Procedure is fairly simple:
  *   - reset the chip using the reset reg
  *   - re-init context state
@@ -1743,7 +1745,10 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
+	lockdep_assert_held(&dev->struct_mutex);
+
+	if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
+		return test_bit(I915_WEDGED, &error->flags) ? -EIO : 0;
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	__clear_bit(I915_WEDGED, &error->flags);
@@ -1784,9 +1789,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
-
 	/*
 	 * rps/rc6 re-init is necessary to restore state lost after the
 	 * reset and the re-install of gt irqs. Skip for ironlake per
@@ -1800,7 +1802,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 
 error:
 	set_bit(I915_WEDGED, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 20b7743f8ec5..15f1977e356a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3863,7 +3863,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 			    schedule_timeout_uninterruptible(remaining_jiffies);
 	}
 }
-static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
+
+static inline bool
+__i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
 
@@ -3925,17 +3927,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			return true;
 	}
 
-	/* We need to check whether any gpu reset happened in between
-	 * the request being submitted and now. If a reset has occurred,
-	 * the seqno will have been advance past ours and our request
-	 * is complete. If we are in the process of handling a reset,
-	 * the request is effectively complete as the rendering will
-	 * be discarded, but we need to return in order to drop the
-	 * struct_mutex.
-	 */
-	if (i915_reset_in_progress(&req->i915->gpu_error))
-		return true;
-
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5f89801e6a16..64c370681a81 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -533,6 +533,16 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	engine->submit_request(request);
 }
 
+static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	if (list_empty(&wait->task_list))
+		__add_wait_queue(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+
 static unsigned long local_clock_us(unsigned int *cpu)
 {
 	unsigned long t;
@@ -710,6 +720,25 @@ wakeup:
 		if (__i915_request_irq_complete(req))
 			break;
 
+		/* If the GPU is hung, and we hold the lock, reset the GPU
+		 * and then check for completion. On a full reset, the engine's
+		 * HW seqno will be advanced passed us and we are complete.
+		 * If we do a partial reset, we have to wait for the GPU to
+		 * resume and update the breadcrumb.
+		 *
+		 * If we don't hold the mutex, we can just wait for the worker
+		 * to come along and update the breadcrumb (either directly
+		 * itself, or indirectly by recovering the GPU).
+		 */
+		if (flags & I915_WAIT_LOCKED &&
+		    i915_reset_in_progress(&req->i915->gpu_error)) {
+			__set_current_state(TASK_RUNNING);
+			i915_reset(req->i915);
+			reset_wait_queue(&req->i915->gpu_error.wait_queue,
+					 &reset);
+			continue;
+		}
+
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ed172d7beecb..2c7cb5041511 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2521,7 +2521,9 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	 * pending state and not properly drop locks, resulting in
 	 * deadlocks with the reset work.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	ret = i915_reset(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_finish_reset(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8687a84a7ff3..d2d85fc869e1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2229,9 +2229,6 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (ret)
 		return ret;
 
-	if (i915_reset_in_progress(&target->i915->gpu_error))
-		return -EAGAIN;
-
 	i915_gem_request_retire_upto(target);
 
 	intel_ring_update_space(ring);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (9 preceding siblings ...)
  2016-09-09 11:00 ` [CI 11/21] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 13/21] drm/i915: Update reset path to fix incomplete requests Chris Wilson
                   ` (9 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Since we have a cooperative mode now with a direct reset, we can avoid
the contention on struct_mutex and instead try then sleep on the
I915_RESET_IN_PROGRESS bit. If the mutex is held and that bit is
cleared, all is fine. Otherwise, we sleep for a bit and try again. In
the worst case we sleep for an extra second waiting for the mutex to be
released (no one touching the GPU is allowed the struct_mutex whilst the
I915_RESET_IN_PROGRESS bit is set). But when we have a direct reset,
this allows us to clean up the reset worker faster.

v2: Remember to call wake_up_bit() after changing (for the faster wakeup
as promised)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 14 ++++++++------
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_irq.c | 31 ++++++++++++++++++-------------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ff4173e6e298..f2614b2f59f7 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1726,8 +1726,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
  * i915_reset - reset chip after a hang
  * @dev: drm device to reset
  *
- * Reset the chip.  Useful if a hang is detected. Returns zero on successful
- * reset or otherwise an error code.
+ * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
+ * on failure.
  *
  * Caller must hold the struct_mutex.
  *
@@ -1739,7 +1739,7 @@ int i915_resume_switcheroo(struct drm_device *dev)
  *   - re-init interrupt state
  *   - re-init display
  */
-int i915_reset(struct drm_i915_private *dev_priv)
+void i915_reset(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
@@ -1748,7 +1748,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	lockdep_assert_held(&dev->struct_mutex);
 
 	if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
-		return test_bit(I915_WEDGED, &error->flags) ? -EIO : 0;
+		return;
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	__clear_bit(I915_WEDGED, &error->flags);
@@ -1798,11 +1798,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	intel_sanitize_gt_powersave(dev_priv);
 	intel_autoenable_gt_powersave(dev_priv);
 
-	return 0;
+wakeup:
+	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
+	return;
 
 error:
 	set_bit(I915_WEDGED, &error->flags);
-	return ret;
+	goto wakeup;
 }
 
 static int i915_pm_suspend(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 15f1977e356a..9a9f07f3574c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2884,7 +2884,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
 #endif
 extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
 extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
-extern int i915_reset(struct drm_i915_private *dev_priv);
+extern void i915_reset(struct drm_i915_private *dev_priv);
 extern int intel_guc_reset(struct drm_i915_private *dev_priv);
 extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2c7cb5041511..ef2d40278191 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2497,7 +2497,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-	int ret;
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
@@ -2512,24 +2511,30 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	 * simulated reset via debugs, so get an RPM reference.
 	 */
 	intel_runtime_pm_get(dev_priv);
-
 	intel_prepare_reset(dev_priv);
 
-	/*
-	 * All state reset _must_ be completed before we update the
-	 * reset counter, for otherwise waiters might miss the reset
-	 * pending state and not properly drop locks, resulting in
-	 * deadlocks with the reset work.
-	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	ret = i915_reset(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	do {
+		/*
+		 * All state reset _must_ be completed before we update the
+		 * reset counter, for otherwise waiters might miss the reset
+		 * pending state and not properly drop locks, resulting in
+		 * deadlocks with the reset work.
+		 */
+		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+			i915_reset(dev_priv);
+			mutex_unlock(&dev_priv->drm.struct_mutex);
+		}
 
-	intel_finish_reset(dev_priv);
+		/* We need to wait for anyone holding the lock to wakeup */
+	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+				     I915_RESET_IN_PROGRESS,
+				     TASK_UNINTERRUPTIBLE,
+				     HZ));
 
+	intel_finish_reset(dev_priv);
 	intel_runtime_pm_put(dev_priv);
 
-	if (ret == 0)
+	if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
 		kobject_uevent_env(kobj,
 				   KOBJ_CHANGE, reset_done_event);
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 13/21] drm/i915: Update reset path to fix incomplete requests
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (10 preceding siblings ...)
  2016-09-09 11:00 ` [CI 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 14/21] drm/i915: Drive request submission through fence callbacks Chris Wilson
                   ` (8 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Update reset path in preparation for engine reset which requires
identification of incomplete requests and associated context and fixing
their state so that engine can resume correctly after reset.

The request that caused the hang will be skipped and head is reset to the
start of breadcrumb. This allows us to resume from where we left-off.
Since this request didn't complete normally we also need to cleanup elsp
queue manually. This is vital if we employ nonblocking request
submission where we may have a web of dependencies upon the hung request
and so advancing the seqno manually is no longer trivial.

ABI: gem_reset_stats / DRM_IOCTL_I915_GET_RESET_STATS

We change the way we count pending batches. Only the active context
involved in the reset is marked as either innocent or guilty, and not
mark the entire world as pending. By inspection this only affects
igt/gem_reset_stats (which assumes implementation details) and not
piglit.

ARB_robustness gives this guide on how we expect the user of this
interface to behave:

 * Provide a mechanism for an OpenGL application to learn about
   graphics resets that affect the context.  When a graphics reset
   occurs, the OpenGL context becomes unusable and the application
   must create a new context to continue operation. Detecting a
   graphics reset happens through an inexpensive query.

And with regards to the actual meaning of the reset values:

   Certain events can result in a reset of the GL context. Such a reset
   causes all context state to be lost. Recovery from such events
   requires recreation of all objects in the affected context. The
   current status of the graphics reset state is returned by

	enum GetGraphicsResetStatusARB();

   The symbolic constant returned indicates if the GL context has been
   in a reset state at any point since the last call to
   GetGraphicsResetStatusARB. NO_ERROR indicates that the GL context
   has not been in a reset state since the last call.
   GUILTY_CONTEXT_RESET_ARB indicates that a reset has been detected
   that is attributable to the current GL context.
   INNOCENT_CONTEXT_RESET_ARB indicates a reset has been detected that
   is not attributable to the current GL context.
   UNKNOWN_CONTEXT_RESET_ARB indicates a detected graphics reset whose
   cause is unknown.

The language here is explicit in that we must mark up the guilty batch,
but is loose enough for us to relax the innocent (i.e. pending)
accounting as only the active batches are involved with the reset.

In the future, we are looking towards single engine resetting (with
minimal locking), where it seems inappropriate to mark the entire world
as innocent since the reset occurred on a different engine. Reducing the
information available means we only have to encounter the pain once, and
also reduces the information leaking from one context to another.

v2: Legacy ringbuffer submission required a reset following hibernation,
or else we restore stale values to the RING_HEAD and walked over
stolen garbage.

v3: GuC requires replaying the requests after a reset.

v4: Restore engine IRQ after reset (so waiters will be woken!)
    Rearm hangcheck if resetting with a waiter.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |   9 +--
 drivers/gpu/drm/i915/i915_drv.h            |   5 +-
 drivers/gpu/drm/i915/i915_gem.c            | 125 +++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_context.c    |  16 ----
 drivers/gpu/drm/i915/i915_guc_submission.c |   8 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     |  15 +++-
 drivers/gpu/drm/i915/intel_lrc.c           |  49 +++++++++--
 drivers/gpu/drm/i915/intel_lrc.h           |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  47 +++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 +-
 10 files changed, 185 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f2614b2f59f7..7f4e8adec8a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -559,7 +559,6 @@ static void i915_gem_fini(struct drm_device *dev)
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_reset(dev);
 	i915_gem_cleanup_engines(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
@@ -1579,7 +1578,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1755,9 +1754,6 @@ void i915_reset(struct drm_i915_private *dev_priv)
 	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
-
-	i915_gem_reset(dev);
-
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
 	if (ret) {
 		if (ret != -ENODEV)
@@ -1767,6 +1763,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	i915_gem_reset(dev_priv);
 	intel_overlay_reset(dev_priv);
 
 	/* Ok, now get things going again... */
@@ -1803,7 +1800,7 @@ wakeup:
 	return;
 
 error:
-	set_bit(I915_WEDGED, &error->flags);
+	i915_gem_set_wedged(dev_priv);
 	goto wakeup;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9a9f07f3574c..37978d3f62ce 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2042,6 +2042,7 @@ struct drm_i915_private {
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
+		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
 		/**
@@ -3263,7 +3264,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset(struct drm_device *dev);
+void i915_gem_reset(struct drm_i915_private *dev_priv);
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -3392,7 +3394,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_lost(struct drm_i915_private *dev_priv);
 void i915_gem_context_fini(struct drm_device *dev);
-void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 int i915_switch_context(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 23069a2d2850..674e0eaf39ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2555,29 +2555,85 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return NULL;
 }
 
-static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
+static void reset_request(struct drm_i915_gem_request *request)
+{
+	void *vaddr = request->ring->vaddr;
+	u32 head;
+
+	/* As this request likely depends on state from the lost
+	 * context, clear out all the user operations leaving the
+	 * breadcrumb at the end (so we get the fence notifications).
+	 */
+	head = request->head;
+	if (request->postfix < head) {
+		memset(vaddr + head, 0, request->ring->size - head);
+		head = 0;
+	}
+	memset(vaddr + head, 0, request->postfix - head);
+}
+
+static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
+	struct i915_gem_context *incomplete_ctx;
 	bool ring_hung;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+	if (engine->irq_seqno_barrier)
+		engine->irq_seqno_barrier(engine);
+
 	request = i915_gem_find_active_request(engine);
-	if (request == NULL)
+	if (!request)
 		return;
 
 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
-
 	i915_set_reset_status(request->ctx, ring_hung);
+	if (!ring_hung)
+		return;
+
+	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+			 engine->name, request->fence.seqno);
+
+	/* Setup the CS to resume from the breadcrumb of the hung request */
+	engine->reset_hw(engine, request);
+
+	/* Users of the default context do not rely on logical state
+	 * preserved between batches. They have to emit full state on
+	 * every batch and so it is safe to execute queued requests following
+	 * the hang.
+	 *
+	 * Other contexts preserve state, now corrupt. We want to skip all
+	 * queued requests that reference the corrupt context.
+	 */
+	incomplete_ctx = request->ctx;
+	if (i915_gem_context_is_default(incomplete_ctx))
+		return;
+
 	list_for_each_entry_continue(request, &engine->request_list, link)
-		i915_set_reset_status(request->ctx, false);
+		if (request->ctx == incomplete_ctx)
+			reset_request(request);
 }
 
-static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_request *request;
-	struct intel_ring *ring;
+	struct intel_engine_cs *engine;
 
-	/* Ensure irq handler finishes, and not run again. */
-	tasklet_kill(&engine->irq_tasklet);
+	i915_gem_retire_requests(dev_priv);
+
+	for_each_engine(engine, dev_priv)
+		i915_gem_reset_engine(engine);
+
+	i915_gem_restore_fences(&dev_priv->drm);
+}
+
+static void nop_submit_request(struct drm_i915_gem_request *request)
+{
+}
+
+static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
+{
+	engine->submit_request = nop_submit_request;
 
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
@@ -2600,54 +2656,22 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 		spin_unlock(&engine->execlist_lock);
 	}
 
-	/*
-	 * We must free the requests after all the corresponding objects have
-	 * been moved off active lists. Which is the same order as the normal
-	 * retire_requests function does. This is important if object hold
-	 * implicit references on things like e.g. ppgtt address spaces through
-	 * the request.
-	 */
-	request = i915_gem_active_raw(&engine->last_request,
-				      &engine->i915->drm.struct_mutex);
-	if (request)
-		i915_gem_request_retire_upto(request);
-	GEM_BUG_ON(intel_engine_is_active(engine));
-
-	/* Having flushed all requests from all queues, we know that all
-	 * ringbuffers must now be empty. However, since we do not reclaim
-	 * all space when retiring the request (to prevent HEADs colliding
-	 * with rapid ringbuffer wraparound) the amount of available space
-	 * upon reset is less than when we start. Do one more pass over
-	 * all the ringbuffers to reset last_retired_head.
-	 */
-	list_for_each_entry(ring, &engine->buffers, link) {
-		ring->last_retired_head = ring->tail;
-		intel_ring_update_space(ring);
-	}
-
 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
-void i915_gem_reset(struct drm_device *dev)
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_engine_cs *engine;
 
-	/*
-	 * Before we free the objects from the requests, we need to inspect
-	 * them for finding the guilty party. As the requests only borrow
-	 * their reference to the objects, the inspection must be done first.
-	 */
-	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_status(engine);
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 
+	i915_gem_context_lost(dev_priv);
 	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_cleanup(engine);
+		i915_gem_cleanup_engine(engine);
 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 
-	i915_gem_context_reset(dev);
-
-	i915_gem_restore_fences(dev);
+	i915_gem_retire_requests(dev_priv);
 }
 
 static void
@@ -4343,8 +4367,7 @@ void i915_gem_resume(struct drm_device *dev)
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
-	if (i915.enable_execlists)
-		intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
+	dev_priv->gt.resume(dev_priv);
 
 	mutex_unlock(&dev->struct_mutex);
 }
@@ -4496,8 +4519,10 @@ int i915_gem_init(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 
 	if (!i915.enable_execlists) {
+		dev_priv->gt.resume = intel_legacy_submission_resume;
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	} else {
+		dev_priv->gt.resume = intel_lr_context_resume;
 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
 	}
 
@@ -4530,7 +4555,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 35950ee46a1d..df10f4e95736 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -420,22 +420,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 	}
 }
 
-void i915_gem_context_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	lockdep_assert_held(&dev->struct_mutex);
-
-	if (i915.enable_execlists) {
-		struct i915_gem_context *ctx;
-
-		list_for_each_entry(ctx, &dev_priv->context_list, link)
-			intel_lr_context_reset(dev_priv, ctx);
-	}
-
-	i915_gem_context_lost(dev_priv);
-}
-
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 77526d7f41f8..d5a4e9edccc5 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -994,6 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client;
 	struct intel_engine_cs *engine;
+	struct drm_i915_gem_request *request;
 
 	/* client for execbuf submission */
 	client = guc_client_alloc(dev_priv,
@@ -1010,9 +1011,14 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	guc_init_doorbell_hw(guc);
 
 	/* Take over from manual control of ELSP (execlists) */
-	for_each_engine(engine, dev_priv)
+	for_each_engine(engine, dev_priv) {
 		engine->submit_request = i915_guc_submit;
 
+		/* Replay the current set of previously submitted requests */
+		list_for_each_entry(request, &engine->request_list, link)
+			i915_guc_submit(request);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2e96a86105c2..e405f1080296 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -211,6 +211,8 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 {
 	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
 	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+	if (intel_engine_has_waiter(engine))
+		i915_queue_hangcheck(engine->i915);
 }
 
 static void intel_engine_init_requests(struct intel_engine_cs *engine)
@@ -230,7 +232,6 @@ static void intel_engine_init_requests(struct intel_engine_cs *engine)
  */
 void intel_engine_setup_common(struct intel_engine_cs *engine)
 {
-	INIT_LIST_HEAD(&engine->buffers);
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
 
@@ -306,6 +307,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	return 0;
 }
 
+void intel_engine_reset_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	if (intel_engine_has_waiter(engine))
+		engine->irq_enable(engine);
+	else
+		engine->irq_disable(engine);
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
 /**
  * intel_engines_cleanup_common - cleans up the engine state created by
  *                                the common initiailizers.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a33687d294b5..61549a623e2c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1222,11 +1222,16 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	ret = intel_mocs_init_engine(engine);
+	if (ret)
+		return ret;
 
 	lrc_init_hws(engine);
 
-	I915_WRITE_IMR(engine,
-		       ~(engine->irq_enable_mask | engine->irq_keep_mask));
+	intel_engine_reset_irq(engine);
+
 	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
 
 	I915_WRITE(RING_MODE_GEN7(engine),
@@ -1237,7 +1242,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	intel_engine_init_hangcheck(engine);
 
-	return intel_mocs_init_engine(engine);
+	if (!execlists_elsp_idle(engine))
+		execlists_submit_ports(engine);
+
+	return 0;
 }
 
 static int gen8_init_render_ring(struct intel_engine_cs *engine)
@@ -1273,6 +1281,36 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
+static void reset_common_ring(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
+	struct intel_context *ce = &request->ctx->engine[engine->id];
+
+	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+	request->ring->head = request->postfix;
+	request->ring->last_retired_head = -1;
+	intel_ring_update_space(request->ring);
+
+	if (i915.enable_guc_submission)
+		return;
+
+	/* Catch up with any missed context-switch interrupts */
+	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
+	if (request->ctx != port[0].request->ctx) {
+		i915_gem_request_put(port[0].request);
+		port[0] = port[1];
+		memset(&port[1], 0, sizeof(port[1]));
+	}
+
+	/* CS is stopped, and we will resubmit both ports on resume */
+	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	port[0].count = 0;
+	port[1].count = 0;
+}
+
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
 {
 	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
@@ -1635,6 +1673,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
+	engine->reset_hw = reset_common_ring;
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_request = gen8_emit_request;
 	engine->submit_request = execlists_submit_request;
@@ -2087,9 +2126,9 @@ error_deref_obj:
 	return ret;
 }
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx)
+void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 {
+	struct i915_gem_context *ctx = dev_priv->kernel_context;
 	struct intel_engine_cs *engine;
 
 	for_each_engine(engine, dev_priv) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4d70346500c2..4fed8165f98a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,8 +87,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
 
 struct drm_i915_private;
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx);
+void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d2d85fc869e1..7a74750076c5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -564,6 +564,8 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	else
 		intel_ring_setup_status_page(engine);
 
+	intel_engine_reset_irq(engine);
+
 	/* Enforce ordering by reading HEAD register back */
 	I915_READ_HEAD(engine);
 
@@ -577,34 +579,33 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	if (I915_READ_HEAD(engine))
 		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
 			  engine->name, I915_READ_HEAD(engine));
-	I915_WRITE_HEAD(engine, 0);
-	(void)I915_READ_HEAD(engine);
+
+	intel_ring_update_space(ring);
+	I915_WRITE_HEAD(engine, ring->head);
+	I915_WRITE_TAIL(engine, ring->tail);
+	(void)I915_READ_TAIL(engine);
 
 	I915_WRITE_CTL(engine,
 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
 			| RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
-	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
-		     I915_READ_START(engine) == i915_ggtt_offset(ring->vma) &&
-		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
+	if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
+				       RING_VALID, RING_VALID,
+				       50)) {
 		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
+			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 			  engine->name,
 			  I915_READ_CTL(engine),
 			  I915_READ_CTL(engine) & RING_VALID,
-			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
+			  I915_READ_HEAD(engine), ring->head,
+			  I915_READ_TAIL(engine), ring->tail,
 			  I915_READ_START(engine),
 			  i915_ggtt_offset(ring->vma));
 		ret = -EIO;
 		goto out;
 	}
 
-	ring->last_retired_head = -1;
-	ring->head = I915_READ_HEAD(engine);
-	ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
-	intel_ring_update_space(ring);
-
 	intel_engine_init_hangcheck(engine);
 
 out:
@@ -613,6 +614,15 @@ out:
 	return ret;
 }
 
+static void reset_ring_common(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct intel_ring *ring = request->ring;
+
+	ring->head = request->postfix;
+	ring->last_retired_head = -1;
+}
+
 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 {
 	struct intel_ring *ring = req->ring;
@@ -2007,7 +2017,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	}
 	ring->vma = vma;
 
-	list_add(&ring->link, &engine->buffers);
 	return ring;
 }
 
@@ -2015,7 +2024,6 @@ void
 intel_ring_free(struct intel_ring *ring)
 {
 	i915_vma_put(ring->vma);
-	list_del(&ring->link);
 	kfree(ring);
 }
 
@@ -2169,6 +2177,16 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
 	engine->i915 = NULL;
 }
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv) {
+		engine->buffer->head = engine->buffer->tail;
+		engine->buffer->last_retired_head = -1;
+	}
+}
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
 	int ret;
@@ -2654,6 +2672,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	intel_ring_init_semaphores(dev_priv, engine);
 
 	engine->init_hw = init_ring_common;
+	engine->reset_hw = reset_ring_common;
 
 	engine->emit_request = i9xx_emit_request;
 	if (i915.semaphores)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 18848acf5e74..32f527447310 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -87,7 +87,6 @@ struct intel_ring {
 	void *vaddr;
 
 	struct intel_engine_cs *engine;
-	struct list_head link;
 
 	struct list_head request_list;
 
@@ -157,7 +156,6 @@ struct intel_engine_cs {
 	u32		mmio_base;
 	unsigned int irq_shift;
 	struct intel_ring *buffer;
-	struct list_head buffers;
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
@@ -211,6 +209,8 @@ struct intel_engine_cs {
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
 	int		(*init_hw)(struct intel_engine_cs *engine);
+	void		(*reset_hw)(struct intel_engine_cs *engine,
+				    struct drm_i915_gem_request *req);
 
 	int		(*init_context)(struct drm_i915_gem_request *req);
 
@@ -444,6 +444,8 @@ void intel_ring_free(struct intel_ring *ring);
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
@@ -482,6 +484,7 @@ int __intel_ring_space(int head, int tail, int size);
 void intel_ring_update_space(struct intel_ring *ring);
 
 void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_reset_irq(struct intel_engine_cs *engine);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 14/21] drm/i915: Drive request submission through fence callbacks
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (11 preceding siblings ...)
  2016-09-09 11:00 ` [CI 13/21] drm/i915: Update reset path to fix incomplete requests Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 15/21] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
                   ` (7 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Drive final request submission from a callback from the fence. This way
the request is queued until all dependencies are resolved, at which
point it is handed to the backend for queueing to hardware. At this
point, no dependencies are set on the request, so the callback is
immediate.

A side-effect of imposing a heavier-irqsafe spinlock for execlist
submission is that we lose the softirq enabling after scheduling the
execlists tasklet. To compensate, we manually kickstart the softirq by
disabling and enabling the bh around the fence signaling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |  3 +++
 drivers/gpu/drm/i915/i915_gem_request.c    | 27 ++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_request.h    |  3 +++
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 ++-
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |  3 +++
 drivers/gpu/drm/i915/intel_lrc.c           |  5 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++++++++
 7 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 674e0eaf39ea..89a5f8d948e7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2549,6 +2549,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 		if (i915_gem_request_completed(request))
 			continue;
 
+		if (!i915_sw_fence_done(&request->submit))
+			break;
+
 		return request;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 64c370681a81..074fc06ff488 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -318,6 +318,26 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 	return 0;
 }
 
+static int __i915_sw_fence_call
+submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct drm_i915_gem_request *request =
+		container_of(fence, typeof(*request), submit);
+
+	/* Will be called from irq-context when using foreign DMA fences */
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		request->engine->submit_request(request);
+		break;
+
+	case FENCE_FREE:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -396,6 +416,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		   engine->fence_context,
 		   seqno);
 
+	i915_sw_fence_init(&req->submit, submit_notify);
+
 	INIT_LIST_HEAD(&req->active_list);
 	req->i915 = dev_priv;
 	req->engine = engine;
@@ -530,7 +552,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		  reserved_tail, ret);
 
 	i915_gem_mark_busy(engine);
-	engine->submit_request(request);
+
+	local_bh_disable();
+	i915_sw_fence_commit(&request->submit);
+	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 }
 
 static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index def35721e9ed..e141b1cca16a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -28,6 +28,7 @@
 #include <linux/fence.h>
 
 #include "i915_gem.h"
+#include "i915_sw_fence.h"
 
 struct intel_wait {
 	struct rb_node node;
@@ -82,6 +83,8 @@ struct drm_i915_gem_request {
 	struct intel_ring *ring;
 	struct intel_signal_node signaling;
 
+	struct i915_sw_fence submit;
+
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
 	 * this request.
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index d5a4e9edccc5..0eb6b71935cf 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1016,7 +1016,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 
 		/* Replay the current set of previously submitted requests */
 		list_for_each_entry(request, &engine->request_list, link)
-			i915_guc_submit(request);
+			if (i915_sw_fence_done(&request->submit))
+				i915_guc_submit(request);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 2491e4c1eaf0..9bad14d22c95 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -462,7 +462,10 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			intel_engine_remove_wait(engine,
 						 &request->signaling.wait);
+
+			local_bh_disable();
 			fence_signal(&request->fence);
+			local_bh_enable(); /* kick start the tasklets */
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 61549a623e2c..16d7cdd11082 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -590,14 +590,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
 
-	spin_lock_bh(&engine->execlist_lock);
+	spin_lock_irqsave(&engine->execlist_lock, flags);
 
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	if (execlists_elsp_idle(engine))
 		tasklet_hi_schedule(&engine->irq_tasklet);
 
-	spin_unlock_bh(&engine->execlist_lock);
+	spin_unlock_irqrestore(&engine->execlist_lock, flags);
 }
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32f527447310..7f64d611159b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -226,7 +226,15 @@ struct intel_engine_cs {
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
 	int		(*emit_request)(struct drm_i915_gem_request *req);
+
+	/* Pass the request to the hardware queue (e.g. directly into
+	 * the legacy ringbuffer or to the end of an execlist).
+	 *
+	 * This is called from an atomic context with irqs disabled; must
+	 * be irq safe.
+	 */
 	void		(*submit_request)(struct drm_i915_gem_request *req);
+
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 15/21] drm/i915: Reorder i915_add_request to separate the phases better
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (12 preceding siblings ...)
  2016-09-09 11:00 ` [CI 14/21] drm/i915: Drive request submission through fence callbacks Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 16/21] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
                   ` (6 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Let's avoid mixing sealing the hardware commands for the request and
adding the request to the software tracking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 074fc06ff488..a149310c82ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -494,6 +494,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	u32 reserved_tail;
 	int ret;
 
+	trace_i915_gem_request_add(request);
+
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
@@ -517,20 +519,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
 	}
 
-	trace_i915_gem_request_add(request);
-
-	/* Seal the request and mark it as pending execution. Note that
-	 * we may inspect this state, without holding any locks, during
-	 * hangcheck. Hence we apply the barrier to ensure that we do not
-	 * see a more recent value in the hws than we are tracking.
-	 */
-	request->emitted_jiffies = jiffies;
-	request->previous_seqno = engine->last_submitted_seqno;
-	engine->last_submitted_seqno = request->fence.seqno;
-	i915_gem_active_set(&engine->last_request, request);
-	list_add_tail(&request->link, &engine->request_list);
-	list_add_tail(&request->ring_link, &ring->request_list);
-
 	/* Record the position of the start of the breadcrumb so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -551,6 +539,18 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		  "for adding the request (%d bytes)\n",
 		  reserved_tail, ret);
 
+	/* Seal the request and mark it as pending execution. Note that
+	 * we may inspect this state, without holding any locks, during
+	 * hangcheck. Hence we apply the barrier to ensure that we do not
+	 * see a more recent value in the hws than we are tracking.
+	 */
+	request->emitted_jiffies = jiffies;
+	request->previous_seqno = engine->last_submitted_seqno;
+	engine->last_submitted_seqno = request->fence.seqno;
+	i915_gem_active_set(&engine->last_request, request);
+	list_add_tail(&request->link, &engine->request_list);
+	list_add_tail(&request->ring_link, &ring->request_list);
+
 	i915_gem_mark_busy(engine);
 
 	local_bh_disable();
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 16/21] drm/i915: Prepare object synchronisation for asynchronicity
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (13 preceding siblings ...)
  2016-09-09 11:00 ` [CI 15/21] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
                   ` (5 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

We are about to specialize object synchronisation to enable nonblocking
execbuf submission. First we make a copy of the current object
synchronisation for execbuffer. The general i915_gem_object_sync() will
be removed following the removal of CS flips in the near future.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  2 -
 drivers/gpu/drm/i915/i915_gem.c            | 91 ------------------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 87 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h    |  5 ++
 drivers/gpu/drm/i915/intel_display.c       |  2 +-
 6 files changed, 95 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 37978d3f62ce..1e2dda88a483 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3221,8 +3221,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req,
 			     unsigned int flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 89a5f8d948e7..4b5364d477f1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2818,97 +2818,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	return ret;
 }
 
-static int
-__i915_gem_object_sync(struct drm_i915_gem_request *to,
-		       struct drm_i915_gem_request *from)
-{
-	int ret;
-
-	if (to->engine == from->engine)
-		return 0;
-
-	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					from->i915->mm.interruptible |
-					I915_WAIT_LOCKED,
-					NULL,
-					NO_WAITBOOST);
-		if (ret)
-			return ret;
-	} else {
-		int idx = intel_engine_sync_index(from->engine, to->engine);
-		if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
-			return 0;
-
-		trace_i915_gem_ring_sync_to(to, from);
-		ret = to->engine->semaphore.sync_to(to, from);
-		if (ret)
-			return ret;
-
-		from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
-	}
-
-	return 0;
-}
-
-/**
- * i915_gem_object_sync - sync an object to a ring.
- *
- * @obj: object which may be in use on another ring.
- * @to: request we are wishing to use
- *
- * This code is meant to abstract object synchronization with the GPU.
- * Conceptually we serialise writes between engines inside the GPU.
- * We only allow one engine to write into a buffer at any time, but
- * multiple readers. To ensure each has a coherent view of memory, we must:
- *
- * - If there is an outstanding write request to the object, the new
- *   request must wait for it to complete (either CPU or in hw, requests
- *   on the same ring will be naturally ordered).
- *
- * - If we are a write request (pending_write_domain is set), the new
- *   request must wait for outstanding read requests to complete.
- *
- * Returns 0 if successful, else propagates up the lower layer error.
- */
-int
-i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct drm_i915_gem_request *to)
-{
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	active_mask = i915_gem_object_get_active(obj);
-	if (!active_mask)
-		return 0;
-
-	if (obj->base.pending_write_domain) {
-		active = obj->last_read;
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-		int ret;
-
-		request = i915_gem_active_peek(&active[idx],
-					       &obj->base.dev->struct_mutex);
-		if (!request)
-			continue;
-
-		ret = __i915_gem_object_sync(to, request);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9432d4ce9ffb..ccaf15ba4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1133,7 +1133,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->flags & other_rings) {
-			ret = i915_gem_object_sync(obj, req);
+			ret = i915_gem_request_await_object
+				(req, obj, obj->base.pending_write_domain);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a149310c82ce..017cadf54d80 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -460,6 +460,93 @@ err:
 	return ERR_PTR(ret);
 }
 
+static int
+i915_gem_request_await_request(struct drm_i915_gem_request *to,
+			       struct drm_i915_gem_request *from)
+{
+	int idx, ret;
+
+	GEM_BUG_ON(to == from);
+
+	if (to->engine == from->engine)
+		return 0;
+
+	idx = intel_engine_sync_index(from->engine, to->engine);
+	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+		return 0;
+
+	trace_i915_gem_ring_sync_to(to, from);
+	if (!i915.semaphores) {
+		ret = i915_wait_request(from,
+					I915_WAIT_INTERRUPTIBLE |
+					I915_WAIT_LOCKED,
+					NULL, NO_WAITBOOST);
+		if (ret)
+			return ret;
+	} else {
+		ret = to->engine->semaphore.sync_to(to, from);
+		if (ret)
+			return ret;
+	}
+
+	from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+	return 0;
+}
+
+/**
+ * i915_gem_request_await_object - set this request to (async) wait upon a bo
+ *
+ * @to: request we are wishing to use
+ * @obj: object which may be in use on another ring.
+ *
+ * This code is meant to abstract object synchronization with the GPU.
+ * Conceptually we serialise writes between engines inside the GPU.
+ * We only allow one engine to write into a buffer at any time, but
+ * multiple readers. To ensure each has a coherent view of memory, we must:
+ *
+ * - If there is an outstanding write request to the object, the new
+ *   request must wait for it to complete (either CPU or in hw, requests
+ *   on the same ring will be naturally ordered).
+ *
+ * - If we are a write request (pending_write_domain is set), the new
+ *   request must wait for outstanding read requests to complete.
+ *
+ * Returns 0 if successful, else propagates up the lower layer error.
+ */
+int
+i915_gem_request_await_object(struct drm_i915_gem_request *to,
+			      struct drm_i915_gem_object *obj,
+			      bool write)
+{
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int idx;
+
+	if (write) {
+		active_mask = i915_gem_object_get_active(obj);
+		active = obj->last_read;
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, idx) {
+		struct drm_i915_gem_request *request;
+		int ret;
+
+		request = i915_gem_active_peek(&active[idx],
+					       &obj->base.dev->struct_mutex);
+		if (!request)
+			continue;
+
+		ret = i915_gem_request_await_request(to, request);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index e141b1cca16a..883df3bdb381 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -209,6 +209,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
+int
+i915_gem_request_await_object(struct drm_i915_gem_request *to,
+			      struct drm_i915_gem_object *obj,
+			      bool write);
+
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
 	__i915_add_request(req, true)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5771ad802498..8d4c35d55b1b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12275,7 +12275,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			goto cleanup_unpin;
 		}
 
-		ret = i915_gem_object_sync(obj, request);
+		ret = i915_gem_request_await_object(request, obj, false);
 		if (ret)
 			goto cleanup_request;
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (14 preceding siblings ...)
  2016-09-09 11:00 ` [CI 16/21] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 18/21] drm/i915: Ignore valid but unknown semaphores Chris Wilson
                   ` (4 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

Currently the presumption is that the request construction and its
submission to the GuC are all under the same holding of struct_mutex. We
wish to relax this to separate the request construction and the later
submission to the GuC. This requires us to reserve some space in the
GuC command queue for the future submission. For flexibility to handle
out-of-order request submission we do not preallocate the next slot in
the GuC command queue during request construction, just ensuring that
there is enough space later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 35 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_guc.h           |  2 ++
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 0eb6b71935cf..279a4d060288 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -432,20 +432,23 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 {
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
-	struct guc_process_desc *desc;
+	struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset;
 	u32 freespace;
+	int ret;
 
-	GEM_BUG_ON(gc == NULL);
-
-	desc = gc->client_base + gc->proc_desc_offset;
-
+	spin_lock(&gc->wq_lock);
 	freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
-	if (likely(freespace >= wqi_size))
-		return 0;
-
-	gc->no_wq_space += 1;
+	freespace -= gc->wq_rsvd;
+	if (likely(freespace >= wqi_size)) {
+		gc->wq_rsvd += wqi_size;
+		ret = 0;
+	} else {
+		gc->no_wq_space++;
+		ret = -EAGAIN;
+	}
+	spin_unlock(&gc->wq_lock);
 
-	return -EAGAIN;
+	return ret;
 }
 
 static void guc_add_workqueue_item(struct i915_guc_client *gc,
@@ -480,12 +483,14 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	 * workqueue buffer dw by dw.
 	 */
 	BUILD_BUG_ON(wqi_size != 16);
+	GEM_BUG_ON(gc->wq_rsvd < wqi_size);
 
 	/* postincrement WQ tail for next time */
 	wq_off = gc->wq_tail;
+	GEM_BUG_ON(wq_off & (wqi_size - 1));
 	gc->wq_tail += wqi_size;
 	gc->wq_tail &= gc->wq_size - 1;
-	GEM_BUG_ON(wq_off & (wqi_size - 1));
+	gc->wq_rsvd -= wqi_size;
 
 	/* WQ starts from the page after doorbell / process_desc */
 	wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
@@ -589,6 +594,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
 	struct i915_guc_client *client = guc->execbuf_client;
 	int b_ret;
 
+	spin_lock(&client->wq_lock);
 	guc_add_workqueue_item(client, rq);
 	b_ret = guc_ring_doorbell(client);
 
@@ -599,6 +605,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
 
 	guc->submissions[engine_id] += 1;
 	guc->last_seqno[engine_id] = rq->fence.seqno;
+	spin_unlock(&client->wq_lock);
 }
 
 /*
@@ -789,6 +796,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
 	client->vma = vma;
 	client->client_base = kmap(i915_vma_first_page(vma));
+
+	spin_lock_init(&client->wq_lock);
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
 
@@ -1015,9 +1024,11 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 		engine->submit_request = i915_guc_submit;
 
 		/* Replay the current set of previously submitted requests */
-		list_for_each_entry(request, &engine->request_list, link)
+		list_for_each_entry(request, &engine->request_list, link) {
+			client->wq_rsvd += sizeof(struct guc_wq_item);
 			if (i915_sw_fence_done(&request->submit))
 				i915_guc_submit(request);
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index c97326269588..467845967e0b 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -78,9 +78,11 @@ struct i915_guc_client {
 	uint16_t doorbell_id;
 	uint16_t padding[3];		/* Maintain alignment		*/
 
+	spinlock_t wq_lock;
 	uint32_t wq_offset;
 	uint32_t wq_size;
 	uint32_t wq_tail;
+	uint32_t wq_rsvd;
 	uint32_t no_wq_space;
 	uint32_t b_fail;
 	int retcode;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 18/21] drm/i915: Ignore valid but unknown semaphores
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (15 preceding siblings ...)
  2016-09-09 11:00 ` [CI 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 11:00 ` [CI 19/21] drm/i915: Avoid incrementing hangcheck whilst waiting for external fence Chris Wilson
                   ` (3 subsequent siblings)
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

If we find a ring waiting on a semaphore for another assigned but not yet
emitted request, treat it as valid and waiting.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ef2d40278191..b76d45d91a84 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2831,10 +2831,10 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
 		}
 	}
 
-	DRM_ERROR("No signaller ring found for ring %i, ipehr 0x%08x, offset 0x%016llx\n",
-		  engine->id, ipehr, offset);
+	DRM_DEBUG_DRIVER("No signaller ring found for ring %i, ipehr 0x%08x, offset 0x%016llx\n",
+			 engine->id, ipehr, offset);
 
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static struct intel_engine_cs *
@@ -2922,6 +2922,9 @@ static int semaphore_passed(struct intel_engine_cs *engine)
 	if (signaller == NULL)
 		return -1;
 
+	if (IS_ERR(signaller))
+		return 0;
+
 	/* Prevent pathological recursion due to driver bugs */
 	if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
 		return -1;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 19/21] drm/i915: Avoid incrementing hangcheck whilst waiting for external fence
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (16 preceding siblings ...)
  2016-09-09 11:00 ` [CI 18/21] drm/i915: Ignore valid but unknown semaphores Chris Wilson
@ 2016-09-09 11:00 ` Chris Wilson
  2016-09-09 12:48   ` Mika Kuoppala
  2016-09-09 11:01 ` [CI 20/21] drm/i915: Nonblocking request submission Chris Wilson
                   ` (2 subsequent siblings)
  20 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:00 UTC (permalink / raw)
  To: intel-gfx

If we are waiting upon an external fence, from the pov of hangcheck the
engine is stuck on the last submitted seqno. Currently we give a small
increment to the hangcheck score in order to catch a stuck waiter /
driver. Now that we both have an independent wait hangcheck and may be
stuck waiting on an external fence, resetting the GPU has little effect
on that external fence. As we cannot advance by resetting, skip
incrementing the hangcheck score.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b76d45d91a84..8462817a7dae 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3099,10 +3099,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 		if (engine->hangcheck.seqno == seqno) {
 			if (i915_seqno_passed(seqno, submit)) {
 				engine->hangcheck.action = HANGCHECK_IDLE;
-				if (busy) {
-					/* Safeguard against driver failure */
-					engine->hangcheck.score += BUSY;
-				}
 			} else {
 				/* We always increment the hangcheck score
 				 * if the engine is busy and still processing
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 20/21] drm/i915: Nonblocking request submission
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (17 preceding siblings ...)
  2016-09-09 11:00 ` [CI 19/21] drm/i915: Avoid incrementing hangcheck whilst waiting for external fence Chris Wilson
@ 2016-09-09 11:01 ` Chris Wilson
  2016-09-09 11:01 ` [CI 21/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
  2016-09-09 11:56 ` ✗ Fi.CI.BAT: failure for series starting with [CI,01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:01 UTC (permalink / raw)
  To: intel-gfx

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

v2: Do the fence ordering first, where we can still fail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 +++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_request.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 017cadf54d80..40978bc12ceb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -477,12 +477,13 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence, 0,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
@@ -577,6 +578,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_ring *ring = request->ring;
+	struct drm_i915_gem_request *prev;
 	u32 request_start;
 	u32 reserved_tail;
 	int ret;
@@ -631,6 +633,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
 	 * see a more recent value in the hws than we are tracking.
 	 */
+
+	prev = i915_gem_active_raw(&engine->last_request,
+				   &request->i915->drm.struct_mutex);
+	if (prev)
+		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
+					     &request->submitq);
+
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
 	engine->last_submitted_seqno = request->fence.seqno;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 883df3bdb381..974bd7bcc801 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -84,6 +84,7 @@ struct drm_i915_gem_request {
 	struct intel_signal_node signaling;
 
 	struct i915_sw_fence submit;
+	wait_queue_t submitq;
 
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 21/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (18 preceding siblings ...)
  2016-09-09 11:01 ` [CI 20/21] drm/i915: Nonblocking request submission Chris Wilson
@ 2016-09-09 11:01 ` Chris Wilson
  2016-09-09 11:56 ` ✗ Fi.CI.BAT: failure for series starting with [CI,01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
  20 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 11:01 UTC (permalink / raw)
  To: intel-gfx

Now that we can wait upon fences before emitting the request, it becomes
trivial to wait upon any implicit fence provided by the dma-buf
reservation object.

Testcase: igt/prime_vgem/fence-wait
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ccaf15ba4e32..33c85227643d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1131,6 +1131,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
+		struct reservation_object *resv;
 
 		if (obj->flags & other_rings) {
 			ret = i915_gem_request_await_object
@@ -1139,6 +1140,16 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				return ret;
 		}
 
+		resv = i915_gem_object_get_dmabuf_resv(obj);
+		if (resv) {
+			ret = i915_sw_fence_await_reservation
+				(&req->submit, resv, &i915_fence_ops,
+				 obj->base.pending_write_domain, 10*HZ,
+				 GFP_KERNEL | __GFP_NOWARN);
+			if (ret < 0)
+				return ret;
+		}
+
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
 	}
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [CI,01/21] drm/i915: Add a sw fence for collecting up dma fences
  2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (19 preceding siblings ...)
  2016-09-09 11:01 ` [CI 21/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
@ 2016-09-09 11:56 ` Patchwork
  2016-09-09 12:01   ` Chris Wilson
  20 siblings, 1 reply; 26+ messages in thread
From: Patchwork @ 2016-09-09 11:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,01/21] drm/i915: Add a sw fence for collecting up dma fences
URL   : https://patchwork.freedesktop.org/series/12233/
State : failure

== Summary ==

Series 12233v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/12233/revisions/1/mbox/

Test gem_cpu_reloc:
        Subgroup basic:
                pass       -> INCOMPLETE (fi-bsw-n3050)
Test gem_sync:
        Subgroup basic-store-all:
                pass       -> INCOMPLETE (fi-skl-6700k)
                pass       -> INCOMPLETE (fi-skl-6260u)
                pass       -> INCOMPLETE (fi-skl-6700hq)
                pass       -> INCOMPLETE (fi-ilk-650)
                pass       -> INCOMPLETE (fi-bdw-5557u)
Test prime_vgem:
        Subgroup basic-fence-wait-default:
                fail       -> PASS       (fi-snb-2520m)
                fail       -> PASS       (fi-byt-n2820)
                fail       -> PASS       (fi-hsw-4770k)
                fail       -> PASS       (fi-snb-2600)
                fail       -> PASS       (fi-ivb-3520m)
                fail       -> PASS       (fi-hsw-4770r)
                fail       -> PASS       (fi-ivb-3770)

fi-bdw-5557u     total:22   pass:21   dwarn:0   dfail:0   fail:0   skip:0  
fi-bsw-n3050     total:3    pass:2    dwarn:0   dfail:0   fail:0   skip:0  
fi-byt-n2820     total:254  pass:212  dwarn:0   dfail:0   fail:1   skip:41 
fi-hsw-4770k     total:254  pass:232  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r     total:254  pass:228  dwarn:0   dfail:0   fail:0   skip:26 
fi-ilk-650       total:22   pass:7    dwarn:0   dfail:0   fail:0   skip:14 
fi-ivb-3520m     total:254  pass:223  dwarn:0   dfail:0   fail:0   skip:31 
fi-ivb-3770      total:254  pass:223  dwarn:0   dfail:0   fail:0   skip:31 
fi-skl-6260u     total:22   pass:21   dwarn:0   dfail:0   fail:0   skip:0  
fi-skl-6700hq    total:22   pass:19   dwarn:0   dfail:0   fail:0   skip:2  
fi-skl-6700k     total:22   pass:19   dwarn:0   dfail:0   fail:0   skip:2  
fi-snb-2520m     total:254  pass:209  dwarn:0   dfail:0   fail:0   skip:45 
fi-snb-2600      total:254  pass:209  dwarn:0   dfail:0   fail:0   skip:45 

Results at /archive/results/CI_IGT_test/Patchwork_2499/

5986f290e25f42d3d5df390411cc43683deb1301 drm-intel-nightly: 2016y-09m-08d-09h-11m-50s UTC integration manifest
f3f19e1 drm/i915: Serialise execbuf operation after a dma-buf reservation object
8d14999 drm/i915: Nonblocking request submission
14a39dc drm/i915: Avoid incrementing hangcheck whilst waiting for external fence
e464926 drm/i915: Ignore valid but unknown semaphores
ede8492 drm/i915/guc: Prepare for nonblocking execbuf submission
5a61e9e drm/i915: Prepare object synchronisation for asynchronicity
2b2927e drm/i915: Reorder i915_add_request to separate the phases better
abbf49e drm/i915: Drive request submission through fence callbacks
3f55ce1 drm/i915: Update reset path to fix incomplete requests
4f6dddc drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
9ff768b drm/i915: Perform a direct reset of the GPU from the waiter
6e86e44 drm/i915: Mark up all locked waiters
e54b735 drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
04921fce drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
faea45a drm/i915: Separate out reset flags from the reset counter
c00ae80 drm/i915: Simplify ELSP queue request tracking
057e6b3 drm/i915: Reorder submitting the requests to ELSP
6e45d15 drm/i915: Compute the ELSP register location once
98fe5f0 drm/i915: Record the position of the workarounds in the tail of the request
35a116c drm/i915: Only queue requests during execlists submission
0f329a7 drm/i915: Add a sw fence for collecting up dma fences

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [CI,01/21]  drm/i915: Add a sw fence for collecting up dma fences
  2016-09-09 11:56 ` ✗ Fi.CI.BAT: failure for series starting with [CI,01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
@ 2016-09-09 12:01   ` Chris Wilson
  0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 12:01 UTC (permalink / raw)
  To: intel-gfx

On Fri, Sep 09, 2016 at 11:56:37AM -0000, Patchwork wrote:
> == Series Details ==
> 
> Series: series starting with [CI,01/21] drm/i915: Add a sw fence for collecting up dma fences
> URL   : https://patchwork.freedesktop.org/series/12233/
> State : failure
> 
> == Summary ==
> 
> Series 12233v1 Series without cover letter
> http://patchwork.freedesktop.org/api/1.0/series/12233/revisions/1/mbox/
> 
> Test gem_cpu_reloc:
>         Subgroup basic:
>                 pass       -> INCOMPLETE (fi-bsw-n3050)
> Test gem_sync:
>         Subgroup basic-store-all:
>                 pass       -> INCOMPLETE (fi-skl-6700k)
>                 pass       -> INCOMPLETE (fi-skl-6260u)
>                 pass       -> INCOMPLETE (fi-skl-6700hq)
>                 pass       -> INCOMPLETE (fi-ilk-650)
>                 pass       -> INCOMPLETE (fi-bdw-5557u)

Success! I was hoping CI would spot the deliberate error. Saves me
having to write another test.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [CI 19/21] drm/i915: Avoid incrementing hangcheck whilst waiting for external fence
  2016-09-09 11:00 ` [CI 19/21] drm/i915: Avoid incrementing hangcheck whilst waiting for external fence Chris Wilson
@ 2016-09-09 12:48   ` Mika Kuoppala
  0 siblings, 0 replies; 26+ messages in thread
From: Mika Kuoppala @ 2016-09-09 12:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> If we are waiting upon an external fence, from the pov of hangcheck the
> engine is stuck on the last submitted seqno. Currently we give a small
> increment to the hangcheck score in order to catch a stuck waiter /
> driver. Now that we both have an independent wait hangcheck and may be
> stuck waiting on an external fence, resetting the GPU has little effect
> on that external fence. As we cannot advance by resetting, skip
> incrementing the hangcheck score.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_irq.c | 4 ----
>  1 file changed, 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index b76d45d91a84..8462817a7dae 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3099,10 +3099,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  		if (engine->hangcheck.seqno == seqno) {
>  			if (i915_seqno_passed(seqno, submit)) {
>  				engine->hangcheck.action = HANGCHECK_IDLE;
> -				if (busy) {
> -					/* Safeguard against driver failure */
> -					engine->hangcheck.score += BUSY;
> -				}
>  			} else {
>  				/* We always increment the hangcheck score
>  				 * if the engine is busy and still processing
> -- 
> 2.9.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [CI 20/21] drm/i915: Nonblocking request submission
  2016-09-09 13:11 [CI 01/21] " Chris Wilson
@ 2016-09-09 13:12 ` Chris Wilson
  0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09 13:12 UTC (permalink / raw)
  To: intel-gfx

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

v2: Do the fence ordering first, where we can still fail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 +++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_request.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 017cadf54d80..40978bc12ceb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -477,12 +477,13 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence, 0,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
@@ -577,6 +578,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_ring *ring = request->ring;
+	struct drm_i915_gem_request *prev;
 	u32 request_start;
 	u32 reserved_tail;
 	int ret;
@@ -631,6 +633,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
 	 * see a more recent value in the hws than we are tracking.
 	 */
+
+	prev = i915_gem_active_raw(&engine->last_request,
+				   &request->i915->drm.struct_mutex);
+	if (prev)
+		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
+					     &request->submitq);
+
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
 	engine->last_submitted_seqno = request->fence.seqno;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 883df3bdb381..974bd7bcc801 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -84,6 +84,7 @@ struct drm_i915_gem_request {
 	struct intel_signal_node signaling;
 
 	struct i915_sw_fence submit;
+	wait_queue_t submitq;
 
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [CI 20/21] drm/i915: Nonblocking request submission
  2016-09-09  7:20 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
@ 2016-09-09  7:21 ` Chris Wilson
  0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2016-09-09  7:21 UTC (permalink / raw)
  To: intel-gfx

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

v2: Do the fence ordering first, where we can still fail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 +++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_request.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 017cadf54d80..154128a143c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -477,12 +477,13 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
@@ -577,6 +578,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_ring *ring = request->ring;
+	struct drm_i915_gem_request *prev;
 	u32 request_start;
 	u32 reserved_tail;
 	int ret;
@@ -631,6 +633,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
 	 * see a more recent value in the hws than we are tracking.
 	 */
+
+	prev = i915_gem_active_raw(&engine->last_request,
+				   &request->i915->drm.struct_mutex);
+	if (prev)
+		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
+					     &request->submitq);
+
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
 	engine->last_submitted_seqno = request->fence.seqno;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 883df3bdb381..974bd7bcc801 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -84,6 +84,7 @@ struct drm_i915_gem_request {
 	struct intel_signal_node signaling;
 
 	struct i915_sw_fence submit;
+	wait_queue_t submitq;
 
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2016-09-09 13:12 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-09 11:00 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
2016-09-09 11:00 ` [CI 02/21] drm/i915: Only queue requests during execlists submission Chris Wilson
2016-09-09 11:00 ` [CI 03/21] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
2016-09-09 11:00 ` [CI 04/21] drm/i915: Compute the ELSP register location once Chris Wilson
2016-09-09 11:00 ` [CI 05/21] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
2016-09-09 11:00 ` [CI 06/21] drm/i915: Simplify ELSP queue request tracking Chris Wilson
2016-09-09 11:00 ` [CI 07/21] drm/i915: Separate out reset flags from the reset counter Chris Wilson
2016-09-09 11:00 ` [CI 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
2016-09-09 11:00 ` [CI 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
2016-09-09 11:00 ` [CI 10/21] drm/i915: Mark up all locked waiters Chris Wilson
2016-09-09 11:00 ` [CI 11/21] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
2016-09-09 11:00 ` [CI 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
2016-09-09 11:00 ` [CI 13/21] drm/i915: Update reset path to fix incomplete requests Chris Wilson
2016-09-09 11:00 ` [CI 14/21] drm/i915: Drive request submission through fence callbacks Chris Wilson
2016-09-09 11:00 ` [CI 15/21] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
2016-09-09 11:00 ` [CI 16/21] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
2016-09-09 11:00 ` [CI 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
2016-09-09 11:00 ` [CI 18/21] drm/i915: Ignore valid but unknown semaphores Chris Wilson
2016-09-09 11:00 ` [CI 19/21] drm/i915: Avoid incrementing hangcheck whilst waiting for external fence Chris Wilson
2016-09-09 12:48   ` Mika Kuoppala
2016-09-09 11:01 ` [CI 20/21] drm/i915: Nonblocking request submission Chris Wilson
2016-09-09 11:01 ` [CI 21/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
2016-09-09 11:56 ` ✗ Fi.CI.BAT: failure for series starting with [CI,01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
2016-09-09 12:01   ` Chris Wilson
  -- strict thread matches above, loose matches on Subject: below --
2016-09-09 13:11 [CI 01/21] " Chris Wilson
2016-09-09 13:12 ` [CI 20/21] drm/i915: Nonblocking request submission Chris Wilson
2016-09-09  7:20 [CI 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
2016-09-09  7:21 ` [CI 20/21] drm/i915: Nonblocking request submission Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.